mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 16:56:24 +02:00
580 lines
No EOL
71 KiB
HTML
Executable file
580 lines
No EOL
71 KiB
HTML
Executable file
<!DOCTYPE html>
|
||
|
||
<html :class="{'dark': darkMode === 'dark' || (darkMode === 'system' && window.matchMedia('(prefers-color-scheme: dark)').matches)}" class="scroll-smooth" data-content_root="../" lang="en" x-data="{ darkMode: localStorage.getItem('darkMode') || localStorage.setItem('darkMode', 'system'), activeSection: '' }" x-init="$watch('darkMode', val => localStorage.setItem('darkMode', val))">
|
||
<head>
|
||
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
|
||
<meta charset="utf-8"/>
|
||
<meta content="white" media="(prefers-color-scheme: light)" name="theme-color"/>
|
||
<meta content="black" media="(prefers-color-scheme: dark)" name="theme-color"/>
|
||
<meta content="width=device-width, initial-scale=1" name="viewport"/>
|
||
<title>LLM Routing | Arch Docs v0.3.15</title>
|
||
<meta content="LLM Routing | Arch Docs v0.3.15" property="og:title"/>
|
||
<meta content="LLM Routing | Arch Docs v0.3.15" name="twitter:title"/>
|
||
<link href="../_static/pygments.css?v=466e7b45" rel="stylesheet" type="text/css"/>
|
||
<link href="../_static/theme.css?v=42baaae4" rel="stylesheet" type="text/css"/>
|
||
<link href="../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
|
||
<link href="../_static/sphinx-design.min.css?v=95c83b7e" rel="stylesheet" type="text/css"/>
|
||
<link href="../_static/awesome-sphinx-design.css?v=15e0fffa" rel="stylesheet" type="text/css"/>
|
||
<link href="./docs/guides/llm_router.html" rel="canonical"/>
|
||
<link href="../_static/favicon.ico" rel="icon"/>
|
||
<link href="../search.html" rel="search" title="Search"/>
|
||
<link href="observability/observability.html" rel="next" title="Observability"/>
|
||
<link href="function_calling.html" rel="prev" title="Function Calling"/>
|
||
<script>
|
||
<!-- Prevent Flash of wrong theme -->
|
||
const userPreference = localStorage.getItem('darkMode');
|
||
let mode;
|
||
if (userPreference === 'dark' || window.matchMedia('(prefers-color-scheme: dark)').matches) {
|
||
mode = 'dark';
|
||
document.documentElement.classList.add('dark');
|
||
} else {
|
||
mode = 'light';
|
||
}
|
||
if (!userPreference) {localStorage.setItem('darkMode', mode)}
|
||
</script>
|
||
</head>
|
||
<body :class="{ 'overflow-hidden': showSidebar }" class="min-h-screen font-sans antialiased bg-background text-foreground" x-data="{ showSidebar: false, showScrollTop: false }">
|
||
<div @click.self="showSidebar = false" class="fixed inset-0 z-50 overflow-hidden bg-background/80 backdrop-blur-sm md:hidden" x-cloak="" x-show="showSidebar"></div><div class="relative flex flex-col min-h-screen" id="page"><a class="absolute top-0 left-0 z-[100] block bg-background p-4 text-xl transition -translate-x-full opacity-0 focus:translate-x-0 focus:opacity-100" href="#content">
|
||
Skip to content
|
||
</a><header class="sticky top-0 z-40 w-full border-b shadow-sm border-border supports-backdrop-blur:bg-background/60 bg-background/95 backdrop-blur"><div class="container flex items-center h-14">
|
||
<div class="hidden mr-4 md:flex">
|
||
<a class="flex items-center mr-6" href="../index.html">
|
||
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Arch Docs v0.3.15</span>
|
||
</a></div><button @click="showSidebar = true" class="inline-flex items-center justify-center h-10 px-0 py-2 mr-2 text-base font-medium transition-colors rounded-md hover:text-accent-foreground hover:bg-transparent md:hidden" type="button">
|
||
<svg aria-hidden="true" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||
<path d="M152.587 825.087q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440Zm0-203.587q-19.152 0-32.326-13.174T107.087 576q0-19.152 13.174-32.326t32.326-13.174h320q19.152 0 32.326 13.174T518.087 576q0 19.152-13.174 32.326T472.587 621.5h-320Zm0-203.587q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440ZM708.913 576l112.174 112.174q12.674 12.674 12.674 31.826t-12.674 31.826Q808.413 764.5 789.261 764.5t-31.826-12.674l-144-144Q600 594.391 600 576t13.435-31.826l144-144q12.674-12.674 31.826-12.674t31.826 12.674q12.674 12.674 12.674 31.826t-12.674 31.826L708.913 576Z"></path>
|
||
</svg>
|
||
<span class="sr-only">Toggle navigation menu</span>
|
||
</button>
|
||
<div class="flex items-center justify-between flex-1 space-x-2 sm:space-x-4 md:justify-end">
|
||
<div class="flex-1 w-full md:w-auto md:flex-none"><form @keydown.k.window.meta="$refs.search.focus()" action="../search.html" class="relative flex items-center group" id="searchbox" method="get">
|
||
<input aria-label="Search the docs" class="inline-flex items-center font-medium transition-colors bg-transparent focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 ring-offset-background border border-input hover:bg-accent focus:bg-accent hover:text-accent-foreground focus:text-accent-foreground hover:placeholder-accent-foreground py-2 px-4 relative h-9 w-full justify-start rounded-[0.5rem] text-sm text-muted-foreground sm:pr-12 md:w-40 lg:w-64" id="search-input" name="q" placeholder="Search ..." type="search" x-ref="search"/>
|
||
<kbd class="pointer-events-none absolute right-1.5 top-2 hidden h-5 select-none text-muted-foreground items-center gap-1 rounded border border-border bg-muted px-1.5 font-mono text-[10px] font-medium opacity-100 sm:flex group-hover:bg-accent group-hover:text-accent-foreground">
|
||
<span class="text-xs">⌘</span>
|
||
K
|
||
</kbd>
|
||
</form>
|
||
</div>
|
||
<nav class="flex items-center space-x-1">
|
||
<a href="https://github.com/katanemo/arch" rel="noopener nofollow" title="Visit repository on GitHub">
|
||
<div class="inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md disabled:opacity-50 disabled:pointer-events-none hover:bg-accent hover:text-accent-foreground h-9 w-9">
|
||
<svg fill="currentColor" height="26px" style="margin-top:-2px;display:inline" viewbox="0 0 45 44" xmlns="http://www.w3.org/2000/svg"><path clip-rule="evenodd" d="M22.477.927C10.485.927.76 10.65.76 22.647c0 9.596 6.223 17.736 14.853 20.608 1.087.2 1.483-.47 1.483-1.047 0-.516-.019-1.881-.03-3.693-6.04 1.312-7.315-2.912-7.315-2.912-.988-2.51-2.412-3.178-2.412-3.178-1.972-1.346.149-1.32.149-1.32 2.18.154 3.327 2.24 3.327 2.24 1.937 3.318 5.084 2.36 6.321 1.803.197-1.403.759-2.36 1.379-2.903-4.823-.548-9.894-2.412-9.894-10.734 0-2.37.847-4.31 2.236-5.828-.224-.55-.969-2.759.214-5.748 0 0 1.822-.584 5.972 2.226 1.732-.482 3.59-.722 5.437-.732 1.845.01 3.703.25 5.437.732 4.147-2.81 5.967-2.226 5.967-2.226 1.185 2.99.44 5.198.217 5.748 1.392 1.517 2.232 3.457 2.232 5.828 0 8.344-5.078 10.18-9.916 10.717.779.67 1.474 1.996 1.474 4.021 0 2.904-.027 5.247-.027 5.96 0 .58.392 1.256 1.493 1.044C37.981 40.375 44.2 32.24 44.2 22.647c0-11.996-9.726-21.72-21.722-21.72" fill="currentColor" fill-rule="evenodd"></path></svg>
|
||
</div>
|
||
</a>
|
||
<button @click="darkMode = darkMode === 'light' ? 'dark' : 'light'" aria-label="Color theme switcher" class="relative inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md hover:bg-accent hover:text-accent-foreground h-9 w-9" type="button">
|
||
<svg class="absolute transition-all scale-100 rotate-0 dark:-rotate-90 dark:scale-0" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||
<path d="M480 685q45.456 0 77.228-31.772Q589 621.456 589 576q0-45.456-31.772-77.228Q525.456 467 480 467q-45.456 0-77.228 31.772Q371 530.544 371 576q0 45.456 31.772 77.228Q434.544 685 480 685Zm0 91q-83 0-141.5-58.5T280 576q0-83 58.5-141.5T480 376q83 0 141.5 58.5T680 576q0 83-58.5 141.5T480 776ZM80 621.5q-19.152 0-32.326-13.174T34.5 576q0-19.152 13.174-32.326T80 530.5h80q19.152 0 32.326 13.174T205.5 576q0 19.152-13.174 32.326T160 621.5H80Zm720 0q-19.152 0-32.326-13.174T754.5 576q0-19.152 13.174-32.326T800 530.5h80q19.152 0 32.326 13.174T925.5 576q0 19.152-13.174 32.326T880 621.5h-80Zm-320-320q-19.152 0-32.326-13.174T434.5 256v-80q0-19.152 13.174-32.326T480 130.5q19.152 0 32.326 13.174T525.5 176v80q0 19.152-13.174 32.326T480 301.5Zm0 720q-19.152 0-32.326-13.17Q434.5 995.152 434.5 976v-80q0-19.152 13.174-32.326T480 850.5q19.152 0 32.326 13.174T525.5 896v80q0 19.152-13.174 32.33-13.174 13.17-32.326 13.17ZM222.174 382.065l-43-42Q165.5 327.391 166 308.239t13.174-33.065q13.435-13.674 32.587-13.674t32.065 13.674l42.239 43q12.674 13.435 12.555 31.706-.12 18.272-12.555 31.946-12.674 13.674-31.445 13.413-18.772-.261-32.446-13.174Zm494 494.761-42.239-43q-12.674-13.435-12.674-32.087t12.674-31.565Q686.609 756.5 705.38 757q18.772.5 32.446 13.174l43 41.761Q794.5 824.609 794 843.761t-13.174 33.065Q767.391 890.5 748.239 890.5t-32.065-13.674Zm-42-494.761Q660.5 369.391 661 350.62q.5-18.772 13.174-32.446l41.761-43Q728.609 261.5 747.761 262t33.065 13.174q13.674 13.435 13.674 32.587t-13.674 32.065l-43 42.239q-13.435 12.674-31.706 12.555-18.272-.12-31.946-12.555Zm-495 494.761Q165.5 863.391 165.5 844.239t13.674-32.065l43-42.239q13.435-12.674 32.087-12.674t31.565 12.674Q299.5 782.609 299 801.38q-.5 18.772-13.174 32.446l-41.761 43Q231.391 890.5 212.239 890t-33.065-13.174ZM480 576Z"></path>
|
||
</svg>
|
||
<svg class="absolute transition-all scale-0 rotate-90 dark:rotate-0 dark:scale-100" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||
<path d="M480 936q-151 0-255.5-104.5T120 576q0-138 90-239.5T440 218q25-3 39 18t-1 44q-17 26-25.5 55t-8.5 61q0 90 63 153t153 63q31 0 61.5-9t54.5-25q21-14 43-1.5t19 39.5q-14 138-117.5 229T480 936Zm0-80q88 0 158-48.5T740 681q-20 5-40 8t-40 3q-123 0-209.5-86.5T364 396q0-20 3-40t8-40q-78 32-126.5 102T200 576q0 116 82 198t198 82Zm-10-270Z"></path>
|
||
</svg>
|
||
</button>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</header>
|
||
<div class="flex-1"><div class="container flex-1 items-start md:grid md:grid-cols-[220px_minmax(0,1fr)] md:gap-6 lg:grid-cols-[240px_minmax(0,1fr)] lg:gap-10"><aside :aria-hidden="!showSidebar" :class="{ 'translate-x-0': showSidebar }" class="fixed inset-y-0 left-0 md:top-14 z-50 md:z-30 bg-background md:bg-transparent transition-all duration-100 -translate-x-full md:translate-x-0 ml-0 p-6 md:p-0 md:-ml-2 md:h-[calc(100vh-3.5rem)] w-5/6 md:w-full shrink-0 overflow-y-auto border-r border-border md:sticky" id="left-sidebar">
|
||
<a class="!justify-start text-sm md:!hidden bg-background" href="../index.html">
|
||
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Arch Docs v0.3.15</span>
|
||
</a>
|
||
<div class="relative overflow-hidden md:overflow-auto my-4 md:my-0 h-[calc(100vh-8rem)] md:h-auto">
|
||
<div class="overflow-y-auto h-full w-full relative pr-6">
|
||
|
||
<script async="" src="https://www.googletagmanager.com/gtag/js?id=G-K2LXXSX6HB"></script>
|
||
<script>
|
||
window.dataLayer = window.dataLayer || [];
|
||
function gtag(){dataLayer.push(arguments);}
|
||
gtag('js', new Date());
|
||
|
||
gtag('config', 'G-K2LXXSX6HB');
|
||
</script>
|
||
<nav class="table w-full min-w-full my-6 lg:my-8">
|
||
<p class="caption" role="heading"><span class="caption-text">Get Started</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../get_started/overview.html">Overview</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../get_started/intro_to_arch.html">Intro to Arch</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../get_started/quickstart.html">Quickstart</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../get_started/quickstart.html#next-steps">Next Steps</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Concepts</span></p>
|
||
<ul>
|
||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/tech_overview/tech_overview.html">Tech Overview<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||
<ul class="current">
|
||
<li class="toctree-l1"><a class="reference internal" href="prompt_guard.html">Prompt Guard</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="agent_routing.html">Agent Routing and Hand Off</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="function_calling.html">Function Calling</a></li>
|
||
<li class="toctree-l1 current"><a class="current reference internal" href="#">LLM Routing</a></li>
|
||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="observability/observability.html">Observability<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||
<li class="toctree-l2"><a class="reference internal" href="observability/tracing.html">Tracing</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="observability/monitoring.html">Monitoring</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="observability/access_logging.html">Access Logging</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Build with Arch</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../build_with_arch/agent.html">Agentic Apps</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../build_with_arch/rag.html">RAG Apps</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../build_with_arch/multi_turn.html">Multi-Turn</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
<button @click="showSidebar = false" class="absolute md:hidden right-4 top-4 rounded-sm opacity-70 transition-opacity hover:opacity-100" type="button">
|
||
<svg class="h-4 w-4" fill="currentColor" height="24" stroke="none" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||
<path d="M480 632 284 828q-11 11-28 11t-28-11q-11-11-11-28t11-28l196-196-196-196q-11-11-11-28t11-28q11-11 28-11t28 11l196 196 196-196q11-11 28-11t28 11q11 11 11 28t-11 28L536 576l196 196q11 11 11 28t-11 28q-11 11-28 11t-28-11L480 632Z"></path>
|
||
</svg>
|
||
</button>
|
||
</aside>
|
||
<main class="relative py-6 lg:gap-10 lg:py-8 xl:grid xl:grid-cols-[1fr_300px]">
|
||
<div class="w-full min-w-0 mx-auto">
|
||
<nav aria-label="breadcrumbs" class="flex items-center mb-4 space-x-1 text-sm text-muted-foreground">
|
||
<a class="overflow-hidden text-ellipsis whitespace-nowrap hover:text-foreground" href="../index.html">
|
||
<span class="hidden md:inline">Arch Docs v0.3.15</span>
|
||
<svg aria-label="Home" class="md:hidden" fill="currentColor" height="18" stroke="none" viewbox="0 96 960 960" width="18" xmlns="http://www.w3.org/2000/svg">
|
||
<path d="M240 856h120V616h240v240h120V496L480 316 240 496v360Zm-80 80V456l320-240 320 240v480H520V696h-80v240H160Zm320-350Z"></path>
|
||
</svg>
|
||
</a>
|
||
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">LLM Routing</span>
|
||
</nav>
|
||
<div id="content" role="main">
|
||
<section id="llm-routing">
|
||
<span id="llm-router"></span><h1>LLM Routing<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#llm-routing"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||
<p>With the rapid proliferation of large language models (LLM) — each optimized for different strengths, style, or latency/cost profile — routing has become an essential technique to operationalize the use of different models.</p>
|
||
<p>Arch provides three distinct routing approaches to meet different use cases:</p>
|
||
<ol class="arabic simple">
|
||
<li><p><strong>Model-based Routing</strong>: Direct routing to specific models using provider/model names</p></li>
|
||
<li><p><strong>Alias-based Routing</strong>: Semantic routing using custom aliases that map to underlying models</p></li>
|
||
<li><p><strong>Preference-aligned Routing</strong>: Intelligent routing using the Arch-Router model based on context and user-defined preferences</p></li>
|
||
</ol>
|
||
<p>This enables optimal performance, cost efficiency, and response quality by matching requests with the most suitable model from your available LLM fleet.</p>
|
||
<section id="routing-methods">
|
||
<h2>Routing Methods<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#routing-methods" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#routing-methods'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<section id="model-based-routing">
|
||
<h3>Model-based Routing<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#model-based-routing" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#model-based-routing'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||
<p>Direct routing allows you to specify exact provider and model combinations using the format <code class="docutils literal notranslate"><span class="pre">provider/model-name</span></code>:</p>
|
||
<ul class="simple">
|
||
<li><p>Use provider-specific names like <code class="docutils literal notranslate"><span class="pre">openai/gpt-4o</span></code> or <code class="docutils literal notranslate"><span class="pre">anthropic/claude-3-5-sonnet-20241022</span></code></p></li>
|
||
<li><p>Provides full control and transparency over which model handles each request</p></li>
|
||
<li><p>Ideal for production workloads where you want predictable routing behavior</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="alias-based-routing">
|
||
<h3>Alias-based Routing<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#alias-based-routing" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#alias-based-routing'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||
<p>Alias-based routing lets you create semantic model names that decouple your application from specific providers:</p>
|
||
<ul class="simple">
|
||
<li><p>Use meaningful names like <code class="docutils literal notranslate"><span class="pre">fast-model</span></code>, <code class="docutils literal notranslate"><span class="pre">reasoning-model</span></code>, or <code class="docutils literal notranslate"><span class="pre">arch.summarize.v1</span></code> (see <a class="reference internal" href="../concepts/llm_providers/model_aliases.html#model-aliases"><span class="std std-ref">Model Aliases</span></a>)</p></li>
|
||
<li><p>Maps semantic names to underlying provider models for easier experimentation and provider switching</p></li>
|
||
<li><p>Ideal for applications that want abstraction from specific model names while maintaining control</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="preference-aligned-routing-arch-router">
|
||
<span id="preference-aligned-routing"></span><h3>Preference-aligned Routing (Arch-Router)<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#preference-aligned-routing-arch-router" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#preference-aligned-routing-arch-router'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||
<p>Traditional LLM routing approaches face significant limitations: they evaluate performance using benchmarks that often fail to capture human preferences, select from fixed model pools, and operate as “black boxes” without practical mechanisms for encoding user preferences.</p>
|
||
<p>Arch’s preference-aligned routing addresses these challenges by applying a fundamental engineering principle: decoupling. The framework separates route selection (matching queries to human-readable policies) from model assignment (mapping policies to specific LLMs). This separation allows you to define routing policies using descriptive labels like <code class="docutils literal notranslate"><span class="pre">Domain:</span> <span class="pre">'finance',</span> <span class="pre">Action:</span> <span class="pre">'analyze_earnings_report'</span></code> rather than cryptic identifiers, while independently configuring which models handle each policy.</p>
|
||
<p>The <a class="reference external" href="https://huggingface.co/katanemo/Arch-Router-1.5B" rel="nofollow noopener">Arch-Router<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a> model automatically selects the most appropriate LLM based on:</p>
|
||
<ul class="simple">
|
||
<li><p>Domain Analysis: Identifies the subject matter (e.g., legal, healthcare, programming)</p></li>
|
||
<li><p>Action Classification: Determines the type of operation (e.g., summarization, code generation, translation)</p></li>
|
||
<li><p>User-Defined Preferences: Maps domains and actions to preferred models using transparent, configurable routing decisions</p></li>
|
||
<li><p>Human Preference Alignment: Uses domain-action mappings that capture subjective evaluation criteria, ensuring routing aligns with real-world user needs rather than just benchmark scores</p></li>
|
||
</ul>
|
||
<p>This approach supports seamlessly adding new models without retraining and is ideal for dynamic, context-aware routing that adapts to request content and intent.</p>
|
||
</section>
|
||
</section>
|
||
<section id="model-based-routing-workflow">
|
||
<h2>Model-based Routing Workflow<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#model-based-routing-workflow" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#model-based-routing-workflow'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<p>For direct model routing, the process is straightforward:</p>
|
||
<ol class="arabic">
|
||
<li><p><strong>Client Request</strong></p>
|
||
<blockquote>
|
||
<div><p>The client specifies the exact model using provider/model format (<code class="docutils literal notranslate"><span class="pre">openai/gpt-4o</span></code>).</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><strong>Provider Validation</strong></p>
|
||
<blockquote>
|
||
<div><p>Arch validates that the specified provider and model are configured and available.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><strong>Direct Routing</strong></p>
|
||
<blockquote>
|
||
<div><p>The request is sent directly to the specified model without analysis or decision-making.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><strong>Response Handling</strong></p>
|
||
<blockquote>
|
||
<div><p>The response is returned to the client with optional metadata about the routing decision.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
<section id="alias-based-routing-workflow">
|
||
<h2>Alias-based Routing Workflow<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#alias-based-routing-workflow" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#alias-based-routing-workflow'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<p>For alias-based routing, the process includes name resolution:</p>
|
||
<ol class="arabic">
|
||
<li><p><strong>Client Request</strong></p>
|
||
<blockquote>
|
||
<div><p>The client specifies a semantic alias name (<code class="docutils literal notranslate"><span class="pre">reasoning-model</span></code>).</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><strong>Alias Resolution</strong></p>
|
||
<blockquote>
|
||
<div><p>Arch resolves the alias to the actual provider/model name based on configuration.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><strong>Model Selection</strong></p>
|
||
<blockquote>
|
||
<div><p>If the alias maps to multiple models, Arch selects one based on availability and load balancing.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><strong>Request Forwarding</strong></p>
|
||
<blockquote>
|
||
<div><p>The request is forwarded to the resolved model.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><strong>Response Handling</strong></p>
|
||
<blockquote>
|
||
<div><p>The response is returned with optional metadata about the alias resolution.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
<section id="preference-aligned-routing-workflow-arch-router">
|
||
<span id="preference-aligned-routing-workflow"></span><h2>Preference-aligned Routing Workflow (Arch-Router)<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#preference-aligned-routing-workflow-arch-router" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#preference-aligned-routing-workflow-arch-router'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<p>For preference-aligned dynamic routing, the process involves intelligent analysis:</p>
|
||
<ol class="arabic">
|
||
<li><p><strong>Prompt Analysis</strong></p>
|
||
<blockquote>
|
||
<div><p>When a user submits a prompt without specifying a model, the Arch-Router analyzes it to determine the domain (subject matter) and action (type of operation requested).</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><strong>Model Selection</strong></p>
|
||
<blockquote>
|
||
<div><p>Based on the analyzed intent and your configured routing preferences, the Router selects the most appropriate model from your available LLM fleet.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><strong>Request Forwarding</strong></p>
|
||
<blockquote>
|
||
<div><p>Once the optimal model is identified, our gateway forwards the original prompt to the selected LLM endpoint. The routing decision is transparent and can be logged for monitoring and optimization purposes.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><strong>Response Handling</strong></p>
|
||
<blockquote>
|
||
<div><p>After the selected model processes the request, the response is returned through the gateway. The gateway can optionally add routing metadata or performance metrics to help you understand and optimize your routing decisions.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
<section id="id1">
|
||
<h2>Arch-Router<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#id1'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<p>The <a class="reference external" href="https://huggingface.co/katanemo/Arch-Router-1.5B" rel="nofollow noopener">Arch-Router<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a> is a state-of-the-art <strong>preference-based routing model</strong> specifically designed to address the limitations of traditional LLM routing. This compact 1.5B model delivers production-ready performance with low latency and high accuracy while solving key routing challenges.</p>
|
||
<p><strong>Addressing Traditional Routing Limitations:</strong></p>
|
||
<p><strong>Human Preference Alignment</strong>
|
||
Unlike benchmark-driven approaches, Arch-Router learns to match queries with human preferences by using domain-action mappings that capture subjective evaluation criteria, ensuring routing decisions align with real-world user needs.</p>
|
||
<p><strong>Flexible Model Integration</strong>
|
||
The system supports seamlessly adding new models for routing without requiring retraining or architectural modifications, enabling dynamic adaptation to evolving model landscapes.</p>
|
||
<p><strong>Preference-Encoded Routing</strong>
|
||
Provides a practical mechanism to encode user preferences through domain-action mappings, offering transparent and controllable routing decisions that can be customized for specific use cases.</p>
|
||
<p>To support effective routing, Arch-Router introduces two key concepts:</p>
|
||
<ul class="simple">
|
||
<li><p><strong>Domain</strong> – the high-level thematic category or subject matter of a request (e.g., legal, healthcare, programming).</p></li>
|
||
<li><p><strong>Action</strong> – the specific type of operation the user wants performed (e.g., summarization, code generation, booking appointment, translation).</p></li>
|
||
</ul>
|
||
<p>Both domain and action configs are associated with preferred models or model variants. At inference time, Arch-Router analyzes the incoming prompt to infer its domain and action using semantic similarity, task indicators, and contextual cues. It then applies the user-defined routing preferences to select the model best suited to handle the request.</p>
|
||
<p>In summary, Arch-Router demonstrates:</p>
|
||
<ul class="simple">
|
||
<li><p><strong>Structured Preference Routing</strong>: Aligns prompt request with model strengths using explicit domain–action mappings.</p></li>
|
||
<li><p><strong>Transparent and Controllable</strong>: Makes routing decisions transparent and configurable, empowering users to customize system behavior.</p></li>
|
||
<li><p><strong>Flexible and Adaptive</strong>: Supports evolving user needs, model updates, and new domains/actions without retraining the router.</p></li>
|
||
<li><p><strong>Production-Ready Performance</strong>: Optimized for low-latency, high-throughput applications in multi-model environments.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="implementing-routing">
|
||
<h2>Implementing Routing<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#implementing-routing" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#implementing-routing'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<p><strong>Model-based Routing</strong></p>
|
||
<p>For direct model routing, configure your LLM providers with specific provider/model names:</p>
|
||
<div class="literal-block-wrapper docutils container" id="id3">
|
||
<div class="code-block-caption"><span class="caption-text">Model-based Routing Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">listeners</span><span class="p">:</span>
|
||
</span><span id="line-2"><span class="w"> </span><span class="nt">egress_traffic</span><span class="p">:</span>
|
||
</span><span id="line-3"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
|
||
</span><span id="line-4"><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">12000</span>
|
||
</span><span id="line-5"><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||
</span><span id="line-6"><span class="w"> </span><span class="nt">timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">30s</span>
|
||
</span><span id="line-7">
|
||
</span><span id="line-8"><span class="nt">llm_providers</span><span class="p">:</span>
|
||
</span><span id="line-9"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||
</span><span id="line-10"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||
</span><span id="line-11"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||
</span><span id="line-12">
|
||
</span><span id="line-13"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||
</span><span id="line-14"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||
</span><span id="line-15">
|
||
</span><span id="line-16"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-sonnet-20241022</span>
|
||
</span><span id="line-17"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Clients specify exact models:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Direct provider/model specification</span>
|
||
</span><span id="line-2"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||
</span><span id="line-3"> <span class="n">model</span><span class="o">=</span><span class="s2">"openai/gpt-4o-mini"</span><span class="p">,</span>
|
||
</span><span id="line-4"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Hello!"</span><span class="p">}]</span>
|
||
</span><span id="line-5"><span class="p">)</span>
|
||
</span><span id="line-6">
|
||
</span><span id="line-7"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||
</span><span id="line-8"> <span class="n">model</span><span class="o">=</span><span class="s2">"anthropic/claude-3-5-sonnet-20241022"</span><span class="p">,</span>
|
||
</span><span id="line-9"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Write a story"</span><span class="p">}]</span>
|
||
</span><span id="line-10"><span class="p">)</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
<p><strong>Alias-based Routing</strong></p>
|
||
<p>Configure semantic aliases that map to underlying models:</p>
|
||
<div class="literal-block-wrapper docutils container" id="id4">
|
||
<div class="code-block-caption"><span class="caption-text">Alias-based Routing Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id4"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">listeners</span><span class="p">:</span>
|
||
</span><span id="line-2"><span class="w"> </span><span class="nt">egress_traffic</span><span class="p">:</span>
|
||
</span><span id="line-3"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
|
||
</span><span id="line-4"><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">12000</span>
|
||
</span><span id="line-5"><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||
</span><span id="line-6"><span class="w"> </span><span class="nt">timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">30s</span>
|
||
</span><span id="line-7">
|
||
</span><span id="line-8"><span class="nt">llm_providers</span><span class="p">:</span>
|
||
</span><span id="line-9"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||
</span><span id="line-10"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||
</span><span id="line-11">
|
||
</span><span id="line-12"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||
</span><span id="line-14">
|
||
</span><span id="line-15"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-sonnet-20241022</span>
|
||
</span><span id="line-16"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||
</span><span id="line-17">
|
||
</span><span id="line-18"><span class="nt">model_aliases</span><span class="p">:</span>
|
||
</span><span id="line-19"><span class="w"> </span><span class="c1"># Model aliases - friendly names that map to actual provider names</span>
|
||
</span><span id="line-20"><span class="w"> </span><span class="nt">fast-model</span><span class="p">:</span>
|
||
</span><span id="line-21"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span><span id="line-22">
|
||
</span><span id="line-23"><span class="w"> </span><span class="nt">reasoning-model</span><span class="p">:</span>
|
||
</span><span id="line-24"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||
</span><span id="line-25">
|
||
</span><span id="line-26"><span class="w"> </span><span class="nt">creative-model</span><span class="p">:</span>
|
||
</span><span id="line-27"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-5-sonnet-20241022</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Clients use semantic names:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Using semantic aliases</span>
|
||
</span><span id="line-2"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||
</span><span id="line-3"> <span class="n">model</span><span class="o">=</span><span class="s2">"fast-model"</span><span class="p">,</span> <span class="c1"># Routes to best available fast model</span>
|
||
</span><span id="line-4"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Quick summary please"</span><span class="p">}]</span>
|
||
</span><span id="line-5"><span class="p">)</span>
|
||
</span><span id="line-6">
|
||
</span><span id="line-7"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||
</span><span id="line-8"> <span class="n">model</span><span class="o">=</span><span class="s2">"reasoning-model"</span><span class="p">,</span> <span class="c1"># Routes to best reasoning model</span>
|
||
</span><span id="line-9"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Solve this complex problem"</span><span class="p">}]</span>
|
||
</span><span id="line-10"><span class="p">)</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
<p><strong>Preference-aligned Routing (Arch-Router)</strong></p>
|
||
<p>To configure preference-aligned dynamic routing, you need to define routing preferences that map domains and actions to specific models:</p>
|
||
<div class="literal-block-wrapper docutils container" id="id5">
|
||
<div class="code-block-caption"><span class="caption-text">Preference-Aligned Dynamic Routing Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id5"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">listeners</span><span class="p">:</span>
|
||
</span><span id="line-2"><span class="w"> </span><span class="nt">egress_traffic</span><span class="p">:</span>
|
||
</span><span id="line-3"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
|
||
</span><span id="line-4"><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">12000</span>
|
||
</span><span id="line-5"><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||
</span><span id="line-6"><span class="w"> </span><span class="nt">timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">30s</span>
|
||
</span><span id="line-7">
|
||
</span><span id="line-8"><span class="nt">llm_providers</span><span class="p">:</span>
|
||
</span><span id="line-9"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||
</span><span id="line-10"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||
</span><span id="line-11"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||
</span><span id="line-12">
|
||
</span><span id="line-13"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||
</span><span id="line-14"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||
</span><span id="line-15"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||
</span><span id="line-16"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">code understanding</span>
|
||
</span><span id="line-17"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">understand and explain existing code snippets, functions, or libraries</span>
|
||
</span><span id="line-18"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">complex reasoning</span>
|
||
</span><span id="line-19"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">deep analysis, mathematical problem solving, and logical reasoning</span>
|
||
</span><span id="line-20">
|
||
</span><span id="line-21"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-sonnet-20241022</span>
|
||
</span><span id="line-22"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||
</span><span id="line-23"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||
</span><span id="line-24"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative writing</span>
|
||
</span><span id="line-25"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative content generation, storytelling, and writing assistance</span>
|
||
</span><span id="line-26"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">code generation</span>
|
||
</span><span id="line-27"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">generating new code snippets, functions, or boilerplate based on user prompts</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Clients can let the router decide or use aliases:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Let Arch-Router choose based on content</span>
|
||
</span><span id="line-2"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||
</span><span id="line-3"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Write a creative story about space exploration"</span><span class="p">}]</span>
|
||
</span><span id="line-4"> <span class="c1"># No model specified - router will analyze and choose claude-3-5-sonnet-20241022</span>
|
||
</span><span id="line-5"><span class="p">)</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="combining-routing-methods">
|
||
<h2>Combining Routing Methods<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#combining-routing-methods" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#combining-routing-methods'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<p>You can combine static model selection with dynamic routing preferences for maximum flexibility:</p>
|
||
<div class="literal-block-wrapper docutils container" id="id6">
|
||
<div class="code-block-caption"><span class="caption-text">Hybrid Routing Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id6"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||
</span><span id="line-4"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||
</span><span id="line-5">
|
||
</span><span id="line-6"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||
</span><span id="line-7"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||
</span><span id="line-8"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||
</span><span id="line-9"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">complex_reasoning</span>
|
||
</span><span id="line-10"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">deep analysis and complex problem solving</span>
|
||
</span><span id="line-11">
|
||
</span><span id="line-12"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-sonnet-20241022</span>
|
||
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||
</span><span id="line-14"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||
</span><span id="line-15"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative_tasks</span>
|
||
</span><span id="line-16"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative writing and content generation</span>
|
||
</span><span id="line-17">
|
||
</span><span id="line-18"><span class="nt">model_aliases</span><span class="p">:</span>
|
||
</span><span id="line-19"><span class="w"> </span><span class="c1"># Model aliases - friendly names that map to actual provider names</span>
|
||
</span><span id="line-20"><span class="w"> </span><span class="nt">fast-model</span><span class="p">:</span>
|
||
</span><span id="line-21"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span><span id="line-22">
|
||
</span><span id="line-23"><span class="w"> </span><span class="nt">reasoning-model</span><span class="p">:</span>
|
||
</span><span id="line-24"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||
</span><span id="line-25">
|
||
</span><span id="line-26"><span class="w"> </span><span class="c1"># Aliases that can also participate in dynamic routing</span>
|
||
</span><span id="line-27"><span class="w"> </span><span class="nt">creative-model</span><span class="p">:</span>
|
||
</span><span id="line-28"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-5-sonnet-20241022</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</div>
|
||
<p>This configuration allows clients to:</p>
|
||
<ol class="arabic simple">
|
||
<li><p><strong>Use direct model selection</strong>: <code class="docutils literal notranslate"><span class="pre">model="fast-model"</span></code></p></li>
|
||
<li><p><strong>Let the router decide</strong>: No model specified, router analyzes content</p></li>
|
||
</ol>
|
||
</section>
|
||
<section id="example-use-cases">
|
||
<h2>Example Use Cases<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#example-use-cases" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#example-use-cases'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<p>Here are common scenarios where Arch-Router excels:</p>
|
||
<ul class="simple">
|
||
<li><p><strong>Coding Tasks</strong>: Distinguish between code generation requests (“write a Python function”), debugging needs (“fix this error”), and code optimization (“make this faster”), routing each to appropriately specialized models.</p></li>
|
||
<li><p><strong>Content Processing Workflows</strong>: Classify requests as summarization (“summarize this document”), translation (“translate to Spanish”), or analysis (“what are the key themes”), enabling targeted model selection.</p></li>
|
||
<li><p><strong>Multi-Domain Applications</strong>: Accurately identify whether requests fall into legal, healthcare, technical, or general domains, even when the subject matter isn’t explicitly stated in the prompt.</p></li>
|
||
<li><p><strong>Conversational Routing</strong>: Track conversation context to identify when topics shift between domains or when the type of assistance needed changes mid-conversation.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="best-practicesm">
|
||
<h2>Best practicesm<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#best-practicesm" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#best-practicesm'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<ul class="simple">
|
||
<li><p><strong>💡Consistent Naming:</strong> Route names should align with their descriptions.</p>
|
||
<ul>
|
||
<li><p>❌ Bad:
|
||
<code class="docutils literal notranslate"><span class="pre">`</span>
|
||
<span class="pre">{"name":</span> <span class="pre">"math",</span> <span class="pre">"description":</span> <span class="pre">"handle</span> <span class="pre">solving</span> <span class="pre">quadratic</span> <span class="pre">equations"}</span>
|
||
<span class="pre">`</span></code></p></li>
|
||
<li><p>✅ Good:
|
||
<code class="docutils literal notranslate"><span class="pre">`</span>
|
||
<span class="pre">{"name":</span> <span class="pre">"quadratic_equation",</span> <span class="pre">"description":</span> <span class="pre">"solving</span> <span class="pre">quadratic</span> <span class="pre">equations"}</span>
|
||
<span class="pre">`</span></code></p></li>
|
||
</ul>
|
||
</li>
|
||
<li><p><strong>💡 Clear Usage Description:</strong> Make your route names and descriptions specific, unambiguous, and minimizing overlap between routes. The Router performs better when it can clearly distinguish between different types of requests.</p>
|
||
<ul>
|
||
<li><p>❌ Bad:
|
||
<code class="docutils literal notranslate"><span class="pre">`</span>
|
||
<span class="pre">{"name":</span> <span class="pre">"math",</span> <span class="pre">"description":</span> <span class="pre">"anything</span> <span class="pre">closely</span> <span class="pre">related</span> <span class="pre">to</span> <span class="pre">mathematics"}</span>
|
||
<span class="pre">`</span></code></p></li>
|
||
<li><p>✅ Good:
|
||
<code class="docutils literal notranslate"><span class="pre">`</span>
|
||
<span class="pre">{"name":</span> <span class="pre">"math",</span> <span class="pre">"description":</span> <span class="pre">"solving,</span> <span class="pre">explaining</span> <span class="pre">math</span> <span class="pre">problems,</span> <span class="pre">concepts"}</span>
|
||
<span class="pre">`</span></code></p></li>
|
||
</ul>
|
||
</li>
|
||
<li><p><strong>💡Nouns Descriptor:</strong> Preference-based routers perform better with noun-centric descriptors, as they offer more stable and semantically rich signals for matching.</p></li>
|
||
<li><p><strong>💡Domain Inclusion:</strong> for best user experience, you should always include domain route. This help the router fall back to domain when action is not</p></li>
|
||
</ul>
|
||
</section>
|
||
</section>
|
||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||
<div class="mr-auto">
|
||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="function_calling.html">
|
||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||
<polyline points="15 18 9 12 15 6"></polyline>
|
||
</svg>
|
||
Function Calling
|
||
</a>
|
||
</div>
|
||
<div class="ml-auto">
|
||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="observability/observability.html">
|
||
Observability
|
||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||
<polyline points="9 18 15 12 9 6"></polyline>
|
||
</svg>
|
||
</a>
|
||
</div>
|
||
</div></div><aside class="hidden text-sm xl:block" id="right-sidebar">
|
||
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
|
||
<ul>
|
||
<li><a :data-current="activeSection === '#routing-methods'" class="reference internal" href="#routing-methods">Routing Methods</a><ul>
|
||
<li><a :data-current="activeSection === '#model-based-routing'" class="reference internal" href="#model-based-routing">Model-based Routing</a></li>
|
||
<li><a :data-current="activeSection === '#alias-based-routing'" class="reference internal" href="#alias-based-routing">Alias-based Routing</a></li>
|
||
<li><a :data-current="activeSection === '#preference-aligned-routing-arch-router'" class="reference internal" href="#preference-aligned-routing-arch-router">Preference-aligned Routing (Arch-Router)</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a :data-current="activeSection === '#model-based-routing-workflow'" class="reference internal" href="#model-based-routing-workflow">Model-based Routing Workflow</a></li>
|
||
<li><a :data-current="activeSection === '#alias-based-routing-workflow'" class="reference internal" href="#alias-based-routing-workflow">Alias-based Routing Workflow</a></li>
|
||
<li><a :data-current="activeSection === '#preference-aligned-routing-workflow-arch-router'" class="reference internal" href="#preference-aligned-routing-workflow-arch-router">Preference-aligned Routing Workflow (Arch-Router)</a></li>
|
||
<li><a :data-current="activeSection === '#id1'" class="reference internal" href="#id1">Arch-Router</a></li>
|
||
<li><a :data-current="activeSection === '#implementing-routing'" class="reference internal" href="#implementing-routing">Implementing Routing</a></li>
|
||
<li><a :data-current="activeSection === '#combining-routing-methods'" class="reference internal" href="#combining-routing-methods">Combining Routing Methods</a></li>
|
||
<li><a :data-current="activeSection === '#example-use-cases'" class="reference internal" href="#example-use-cases">Example Use Cases</a></li>
|
||
<li><a :data-current="activeSection === '#best-practicesm'" class="reference internal" href="#best-practicesm">Best practicesm</a></li>
|
||
</ul>
|
||
</div>
|
||
</aside>
|
||
</main>
|
||
</div>
|
||
</div><footer class="py-6 border-t border-border md:py-0">
|
||
<div class="container flex flex-col items-center justify-between gap-4 md:h-24 md:flex-row">
|
||
<div class="flex flex-col items-center gap-4 px-8 md:flex-row md:gap-2 md:px-0">
|
||
<p class="text-sm leading-loose text-center text-muted-foreground md:text-left">© 2025, Katanemo Labs, Inc Last updated: Sep 30, 2025. </p>
|
||
</div>
|
||
</div>
|
||
</footer>
|
||
</div>
|
||
<script src="../_static/documentation_options.js?v=4ccc6128"></script>
|
||
<script src="../_static/doctools.js?v=9bcbadda"></script>
|
||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||
<script defer="defer" src="../_static/theme.js?v=073f68d9"></script>
|
||
<script src="../_static/design-tabs.js?v=f930bc37"></script>
|
||
</body>
|
||
</html> |