mirror of
https://github.com/katanemo/plano.git
synced 2026-05-24 14:05:14 +02:00
deploy: fbe82351c0
This commit is contained in:
parent
f2b60db65d
commit
abbd796901
34 changed files with 2623 additions and 371 deletions
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -1,283 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html :class="{'dark': darkMode === 'dark' || (darkMode === 'system' && window.matchMedia('(prefers-color-scheme: dark)').matches)}" class="scroll-smooth" data-content_root="../" lang="en" x-data="{ darkMode: localStorage.getItem('darkMode') || localStorage.setItem('darkMode', 'system'), activeSection: '' }" x-init="$watch('darkMode', val => localStorage.setItem('darkMode', val))">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
|
||||
<meta charset="utf-8"/>
|
||||
<meta content="white" media="(prefers-color-scheme: light)" name="theme-color"/>
|
||||
<meta content="black" media="(prefers-color-scheme: dark)" name="theme-color"/>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport"/>
|
||||
<title>LLM Provider | Arch Docs v0.3.12</title>
|
||||
<meta content="LLM Provider | Arch Docs v0.3.12" property="og:title"/>
|
||||
<meta content="LLM Provider | Arch Docs v0.3.12" name="twitter:title"/>
|
||||
<link href="../_static/pygments.css?v=466e7b45" rel="stylesheet" type="text/css"/>
|
||||
<link href="../_static/theme.css?v=42baaae4" rel="stylesheet" type="text/css"/>
|
||||
<link href="../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
|
||||
<link href="../_static/sphinx-design.min.css?v=95c83b7e" rel="stylesheet" type="text/css"/>
|
||||
<link href="../_static/awesome-sphinx-design.css?v=15e0fffa" rel="stylesheet" type="text/css"/>
|
||||
<link href="./docs/concepts/llm_provider.html" rel="canonical"/>
|
||||
<link href="../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../search.html" rel="search" title="Search"/>
|
||||
<link href="prompt_target.html" rel="next" title="Prompt Target"/>
|
||||
<link href="tech_overview/error_target.html" rel="prev" title="Error Target"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
const userPreference = localStorage.getItem('darkMode');
|
||||
let mode;
|
||||
if (userPreference === 'dark' || window.matchMedia('(prefers-color-scheme: dark)').matches) {
|
||||
mode = 'dark';
|
||||
document.documentElement.classList.add('dark');
|
||||
} else {
|
||||
mode = 'light';
|
||||
}
|
||||
if (!userPreference) {localStorage.setItem('darkMode', mode)}
|
||||
</script>
|
||||
</head>
|
||||
<body :class="{ 'overflow-hidden': showSidebar }" class="min-h-screen font-sans antialiased bg-background text-foreground" x-data="{ showSidebar: false, showScrollTop: false }">
|
||||
<div @click.self="showSidebar = false" class="fixed inset-0 z-50 overflow-hidden bg-background/80 backdrop-blur-sm md:hidden" x-cloak="" x-show="showSidebar"></div><div class="relative flex flex-col min-h-screen" id="page"><a class="absolute top-0 left-0 z-[100] block bg-background p-4 text-xl transition -translate-x-full opacity-0 focus:translate-x-0 focus:opacity-100" href="#content">
|
||||
Skip to content
|
||||
</a><header class="sticky top-0 z-40 w-full border-b shadow-sm border-border supports-backdrop-blur:bg-background/60 bg-background/95 backdrop-blur"><div class="container flex items-center h-14">
|
||||
<div class="hidden mr-4 md:flex">
|
||||
<a class="flex items-center mr-6" href="../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Arch Docs v0.3.12</span>
|
||||
</a></div><button @click="showSidebar = true" class="inline-flex items-center justify-center h-10 px-0 py-2 mr-2 text-base font-medium transition-colors rounded-md hover:text-accent-foreground hover:bg-transparent md:hidden" type="button">
|
||||
<svg aria-hidden="true" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M152.587 825.087q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440Zm0-203.587q-19.152 0-32.326-13.174T107.087 576q0-19.152 13.174-32.326t32.326-13.174h320q19.152 0 32.326 13.174T518.087 576q0 19.152-13.174 32.326T472.587 621.5h-320Zm0-203.587q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440ZM708.913 576l112.174 112.174q12.674 12.674 12.674 31.826t-12.674 31.826Q808.413 764.5 789.261 764.5t-31.826-12.674l-144-144Q600 594.391 600 576t13.435-31.826l144-144q12.674-12.674 31.826-12.674t31.826 12.674q12.674 12.674 12.674 31.826t-12.674 31.826L708.913 576Z"></path>
|
||||
</svg>
|
||||
<span class="sr-only">Toggle navigation menu</span>
|
||||
</button>
|
||||
<div class="flex items-center justify-between flex-1 space-x-2 sm:space-x-4 md:justify-end">
|
||||
<div class="flex-1 w-full md:w-auto md:flex-none"><form @keydown.k.window.meta="$refs.search.focus()" action="../search.html" class="relative flex items-center group" id="searchbox" method="get">
|
||||
<input aria-label="Search the docs" class="inline-flex items-center font-medium transition-colors bg-transparent focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 ring-offset-background border border-input hover:bg-accent focus:bg-accent hover:text-accent-foreground focus:text-accent-foreground hover:placeholder-accent-foreground py-2 px-4 relative h-9 w-full justify-start rounded-[0.5rem] text-sm text-muted-foreground sm:pr-12 md:w-40 lg:w-64" id="search-input" name="q" placeholder="Search ..." type="search" x-ref="search"/>
|
||||
<kbd class="pointer-events-none absolute right-1.5 top-2 hidden h-5 select-none text-muted-foreground items-center gap-1 rounded border border-border bg-muted px-1.5 font-mono text-[10px] font-medium opacity-100 sm:flex group-hover:bg-accent group-hover:text-accent-foreground">
|
||||
<span class="text-xs">⌘</span>
|
||||
K
|
||||
</kbd>
|
||||
</form>
|
||||
</div>
|
||||
<nav class="flex items-center space-x-1">
|
||||
<a href="https://github.com/katanemo/arch" rel="noopener nofollow" title="Visit repository on GitHub">
|
||||
<div class="inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md disabled:opacity-50 disabled:pointer-events-none hover:bg-accent hover:text-accent-foreground h-9 w-9">
|
||||
<svg fill="currentColor" height="26px" style="margin-top:-2px;display:inline" viewbox="0 0 45 44" xmlns="http://www.w3.org/2000/svg"><path clip-rule="evenodd" d="M22.477.927C10.485.927.76 10.65.76 22.647c0 9.596 6.223 17.736 14.853 20.608 1.087.2 1.483-.47 1.483-1.047 0-.516-.019-1.881-.03-3.693-6.04 1.312-7.315-2.912-7.315-2.912-.988-2.51-2.412-3.178-2.412-3.178-1.972-1.346.149-1.32.149-1.32 2.18.154 3.327 2.24 3.327 2.24 1.937 3.318 5.084 2.36 6.321 1.803.197-1.403.759-2.36 1.379-2.903-4.823-.548-9.894-2.412-9.894-10.734 0-2.37.847-4.31 2.236-5.828-.224-.55-.969-2.759.214-5.748 0 0 1.822-.584 5.972 2.226 1.732-.482 3.59-.722 5.437-.732 1.845.01 3.703.25 5.437.732 4.147-2.81 5.967-2.226 5.967-2.226 1.185 2.99.44 5.198.217 5.748 1.392 1.517 2.232 3.457 2.232 5.828 0 8.344-5.078 10.18-9.916 10.717.779.67 1.474 1.996 1.474 4.021 0 2.904-.027 5.247-.027 5.96 0 .58.392 1.256 1.493 1.044C37.981 40.375 44.2 32.24 44.2 22.647c0-11.996-9.726-21.72-21.722-21.72" fill="currentColor" fill-rule="evenodd"></path></svg>
|
||||
</div>
|
||||
</a>
|
||||
<button @click="darkMode = darkMode === 'light' ? 'dark' : 'light'" aria-label="Color theme switcher" class="relative inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md hover:bg-accent hover:text-accent-foreground h-9 w-9" type="button">
|
||||
<svg class="absolute transition-all scale-100 rotate-0 dark:-rotate-90 dark:scale-0" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 685q45.456 0 77.228-31.772Q589 621.456 589 576q0-45.456-31.772-77.228Q525.456 467 480 467q-45.456 0-77.228 31.772Q371 530.544 371 576q0 45.456 31.772 77.228Q434.544 685 480 685Zm0 91q-83 0-141.5-58.5T280 576q0-83 58.5-141.5T480 376q83 0 141.5 58.5T680 576q0 83-58.5 141.5T480 776ZM80 621.5q-19.152 0-32.326-13.174T34.5 576q0-19.152 13.174-32.326T80 530.5h80q19.152 0 32.326 13.174T205.5 576q0 19.152-13.174 32.326T160 621.5H80Zm720 0q-19.152 0-32.326-13.174T754.5 576q0-19.152 13.174-32.326T800 530.5h80q19.152 0 32.326 13.174T925.5 576q0 19.152-13.174 32.326T880 621.5h-80Zm-320-320q-19.152 0-32.326-13.174T434.5 256v-80q0-19.152 13.174-32.326T480 130.5q19.152 0 32.326 13.174T525.5 176v80q0 19.152-13.174 32.326T480 301.5Zm0 720q-19.152 0-32.326-13.17Q434.5 995.152 434.5 976v-80q0-19.152 13.174-32.326T480 850.5q19.152 0 32.326 13.174T525.5 896v80q0 19.152-13.174 32.33-13.174 13.17-32.326 13.17ZM222.174 382.065l-43-42Q165.5 327.391 166 308.239t13.174-33.065q13.435-13.674 32.587-13.674t32.065 13.674l42.239 43q12.674 13.435 12.555 31.706-.12 18.272-12.555 31.946-12.674 13.674-31.445 13.413-18.772-.261-32.446-13.174Zm494 494.761-42.239-43q-12.674-13.435-12.674-32.087t12.674-31.565Q686.609 756.5 705.38 757q18.772.5 32.446 13.174l43 41.761Q794.5 824.609 794 843.761t-13.174 33.065Q767.391 890.5 748.239 890.5t-32.065-13.674Zm-42-494.761Q660.5 369.391 661 350.62q.5-18.772 13.174-32.446l41.761-43Q728.609 261.5 747.761 262t33.065 13.174q13.674 13.435 13.674 32.587t-13.674 32.065l-43 42.239q-13.435 12.674-31.706 12.555-18.272-.12-31.946-12.555Zm-495 494.761Q165.5 863.391 165.5 844.239t13.674-32.065l43-42.239q13.435-12.674 32.087-12.674t31.565 12.674Q299.5 782.609 299 801.38q-.5 18.772-13.174 32.446l-41.761 43Q231.391 890.5 212.239 890t-33.065-13.174ZM480 576Z"></path>
|
||||
</svg>
|
||||
<svg class="absolute transition-all scale-0 rotate-90 dark:rotate-0 dark:scale-100" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 936q-151 0-255.5-104.5T120 576q0-138 90-239.5T440 218q25-3 39 18t-1 44q-17 26-25.5 55t-8.5 61q0 90 63 153t153 63q31 0 61.5-9t54.5-25q21-14 43-1.5t19 39.5q-14 138-117.5 229T480 936Zm0-80q88 0 158-48.5T740 681q-20 5-40 8t-40 3q-123 0-209.5-86.5T364 396q0-20 3-40t8-40q-78 32-126.5 102T200 576q0 116 82 198t198 82Zm-10-270Z"></path>
|
||||
</svg>
|
||||
</button>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
<div class="flex-1"><div class="container flex-1 items-start md:grid md:grid-cols-[220px_minmax(0,1fr)] md:gap-6 lg:grid-cols-[240px_minmax(0,1fr)] lg:gap-10"><aside :aria-hidden="!showSidebar" :class="{ 'translate-x-0': showSidebar }" class="fixed inset-y-0 left-0 md:top-14 z-50 md:z-30 bg-background md:bg-transparent transition-all duration-100 -translate-x-full md:translate-x-0 ml-0 p-6 md:p-0 md:-ml-2 md:h-[calc(100vh-3.5rem)] w-5/6 md:w-full shrink-0 overflow-y-auto border-r border-border md:sticky" id="left-sidebar">
|
||||
<a class="!justify-start text-sm md:!hidden bg-background" href="../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Arch Docs v0.3.12</span>
|
||||
</a>
|
||||
<div class="relative overflow-hidden md:overflow-auto my-4 md:my-0 h-[calc(100vh-8rem)] md:h-auto">
|
||||
<div class="overflow-y-auto h-full w-full relative pr-6">
|
||||
|
||||
<script async="" src="https://www.googletagmanager.com/gtag/js?id=G-K2LXXSX6HB"></script>
|
||||
<script>
|
||||
window.dataLayer = window.dataLayer || [];
|
||||
function gtag(){dataLayer.push(arguments);}
|
||||
gtag('js', new Date());
|
||||
|
||||
gtag('config', 'G-K2LXXSX6HB');
|
||||
</script>
|
||||
<nav class="table w-full min-w-full my-6 lg:my-8">
|
||||
<p class="caption" role="heading"><span class="caption-text">Get Started</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../get_started/overview.html">Overview</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../get_started/intro_to_arch.html">Intro to Arch</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../get_started/quickstart.html">Quickstart</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../get_started/quickstart.html#next-steps">Next Steps</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Concepts</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="tech_overview/tech_overview.html">Tech Overview<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1 current"><a class="current reference internal" href="#">LLM Provider</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../guides/prompt_guard.html">Prompt Guard</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../guides/agent_routing.html">Agent Routing and Hand Off</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../guides/function_calling.html">Function Calling</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../guides/llm_router.html">LLM Routing</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../guides/observability/observability.html">Observability<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../guides/observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../guides/observability/monitoring.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../guides/observability/access_logging.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Build with Arch</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../build_with_arch/agent.html">Agentic Apps</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../build_with_arch/rag.html">RAG Apps</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../build_with_arch/multi_turn.html">Multi-Turn</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
<button @click="showSidebar = false" class="absolute md:hidden right-4 top-4 rounded-sm opacity-70 transition-opacity hover:opacity-100" type="button">
|
||||
<svg class="h-4 w-4" fill="currentColor" height="24" stroke="none" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 632 284 828q-11 11-28 11t-28-11q-11-11-11-28t11-28l196-196-196-196q-11-11-11-28t11-28q11-11 28-11t28 11l196 196 196-196q11-11 28-11t28 11q11 11 11 28t-11 28L536 576l196 196q11 11 11 28t-11 28q-11 11-28 11t-28-11L480 632Z"></path>
|
||||
</svg>
|
||||
</button>
|
||||
</aside>
|
||||
<main class="relative py-6 lg:gap-10 lg:py-8 xl:grid xl:grid-cols-[1fr_300px]">
|
||||
<div class="w-full min-w-0 mx-auto">
|
||||
<nav aria-label="breadcrumbs" class="flex items-center mb-4 space-x-1 text-sm text-muted-foreground">
|
||||
<a class="overflow-hidden text-ellipsis whitespace-nowrap hover:text-foreground" href="../index.html">
|
||||
<span class="hidden md:inline">Arch Docs v0.3.12</span>
|
||||
<svg aria-label="Home" class="md:hidden" fill="currentColor" height="18" stroke="none" viewbox="0 96 960 960" width="18" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M240 856h120V616h240v240h120V496L480 316 240 496v360Zm-80 80V456l320-240 320 240v480H520V696h-80v240H160Zm320-350Z"></path>
|
||||
</svg>
|
||||
</a>
|
||||
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">LLM Provider</span>
|
||||
</nav>
|
||||
<div id="content" role="main">
|
||||
<section id="llm-provider">
|
||||
<span id="id1"></span><h1>LLM Provider<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#llm-provider"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p><strong>LLM provider</strong> is a top-level primitive in Arch, helping developers centrally define, secure, observe,
|
||||
and manage the usage of their LLMs. Arch builds on Envoy’s reliable <a class="reference external" href="https://www.envoyproxy.io/docs/envoy/v1.31.2/intro/arch_overview/upstream/cluster_manager" rel="nofollow noopener">cluster subsystem<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>
|
||||
to manage egress traffic to LLMs, which includes intelligent routing, retry and fail-over mechanisms,
|
||||
ensuring high availability and fault tolerance. This abstraction also enables developers to seamlessly
|
||||
switching between LLM providers or upgrade LLM versions, simplifying the integration and scaling of LLMs
|
||||
across applications.</p>
|
||||
<p>Below is an example of how you can configure <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> with an instance of an Arch gateway.</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id2">
|
||||
<div class="code-block-caption"><span class="caption-text">Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1.0</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listeners</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">ingress_traffic</span><span class="p">:</span>
|
||||
</span><span id="line-5"><span class="linenos"> 5</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span><span class="w"> </span><span class="nt">timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">30s</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span>
|
||||
</span><span id="line-10"><mark><span class="linenos">10</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</mark></span><span id="line-11"><mark><span class="linenos">11</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</mark></span><span id="line-12"><mark><span class="linenos">12</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</mark></span><span id="line-13"><mark><span class="linenos">13</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||||
</mark></span><span id="line-14"><mark><span class="linenos">14</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</mark></span><span id="line-15"><mark><span class="linenos">15</span>
|
||||
</mark></span><span id="line-16"><mark><span class="linenos">16</span><span class="c1"># default system prompt used by all prompt targets</span>
|
||||
</mark></span><span id="line-17"><span class="linenos">17</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span><span class="nt">prompt_guards</span><span class="p">:</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>When you start Arch, it creates a listener port for egress traffic based on the presence of <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code>
|
||||
configuration section in the <code class="docutils literal notranslate"><span class="pre">arch_config.yml</span></code> file. Arch binds itself to a local address such as
|
||||
<code class="docutils literal notranslate"><span class="pre">127.0.0.1:12000</span></code>.</p>
|
||||
</div>
|
||||
<p>Arch also offers vendor-agnostic SDKs and libraries to make LLM calls to API-based LLM providers (like OpenAI,
|
||||
Anthropic, Mistral, Cohere, etc.) and supports calls to OSS LLMs that are hosted on your infrastructure. Arch
|
||||
abstracts the complexities of integrating with different LLM providers, providing a unified interface for making
|
||||
calls, handling retries, managing rate limits, and ensuring seamless integration with cloud-based and on-premise
|
||||
LLMs. Simply configure the details of the LLMs your application will use, and Arch offers a unified interface to
|
||||
make outbound LLM calls.</p>
|
||||
<section id="adding-custom-llm-provider">
|
||||
<h2>Adding custom LLM Provider<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#adding-custom-llm-provider" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#adding-custom-llm-provider'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>We support any OpenAI compliant LLM for example mistral, openai, ollama etc. We also offer first class support for OpenAI, Anthropic, DeepSeek, Mistral, Groq, and Ollama based models.
|
||||
You can easily configure an LLM that communicates over the OpenAI API interface, by following the below guide.</p>
|
||||
<p>For example following code block shows you how to add an ollama-supported LLM in the <code class="docutils literal notranslate"><span class="pre">arch_config.yaml</span></code> file.</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">some_custom_llm_provider/llama3.2</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">provider_interface</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://host.docker.internal:11434</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p>And in the following code block shows you how to add mistral llm provider in the <code class="docutils literal notranslate"><span class="pre">arch_config.yaml</span></code> file.</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral/ministral-3b-latest</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_API_KEY</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="example-using-the-openai-python-sdk">
|
||||
<h2>Example: Using the OpenAI Python SDK<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#example-using-the-openai-python-sdk" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#example-using-the-openai-python-sdk'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
|
||||
</span><span id="line-2">
|
||||
</span><span id="line-3"><span class="c1"># Initialize the Arch client</span>
|
||||
</span><span id="line-4"><span class="n">client</span> <span class="o">=</span> <span class="n">OpenAI</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:2000/"</span><span class="p">)</span>
|
||||
</span><span id="line-5">
|
||||
</span><span id="line-6"><span class="c1"># Define your model and messages</span>
|
||||
</span><span id="line-7"><span class="n">model</span> <span class="o">=</span> <span class="s2">"llama3.2"</span>
|
||||
</span><span id="line-8"><span class="n">messages</span> <span class="o">=</span> <span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"What is the capital of France?"</span><span class="p">}]</span>
|
||||
</span><span id="line-9">
|
||||
</span><span id="line-10"><span class="c1"># Send the messages to the LLM through Arch</span>
|
||||
</span><span id="line-11"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="n">model</span><span class="p">,</span> <span class="n">messages</span><span class="o">=</span><span class="n">messages</span><span class="p">)</span>
|
||||
</span><span id="line-12">
|
||||
</span><span id="line-13"><span class="c1"># Print the response</span>
|
||||
</span><span id="line-14"><span class="nb">print</span><span class="p">(</span><span class="s2">"LLM Response:"</span><span class="p">,</span> <span class="n">response</span><span class="o">.</span><span class="n">choices</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">message</span><span class="o">.</span><span class="n">content</span><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="tech_overview/error_target.html">
|
||||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="15 18 9 12 15 6"></polyline>
|
||||
</svg>
|
||||
Error Target
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="prompt_target.html">
|
||||
Prompt Target
|
||||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="9 18 15 12 9 6"></polyline>
|
||||
</svg>
|
||||
</a>
|
||||
</div>
|
||||
</div></div><aside class="hidden text-sm xl:block" id="right-sidebar">
|
||||
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
|
||||
<ul>
|
||||
<li><a :data-current="activeSection === '#adding-custom-llm-provider'" class="reference internal" href="#adding-custom-llm-provider">Adding custom LLM Provider</a></li>
|
||||
<li><a :data-current="activeSection === '#example-using-the-openai-python-sdk'" class="reference internal" href="#example-using-the-openai-python-sdk">Example: Using the OpenAI Python SDK</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</aside>
|
||||
</main>
|
||||
</div>
|
||||
</div><footer class="py-6 border-t border-border md:py-0">
|
||||
<div class="container flex flex-col items-center justify-between gap-4 md:h-24 md:flex-row">
|
||||
<div class="flex flex-col items-center gap-4 px-8 md:flex-row md:gap-2 md:px-0">
|
||||
<p class="text-sm leading-loose text-center text-muted-foreground md:text-left">© 2025, Katanemo Labs, Inc Last updated: Sep 19, 2025. </p>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
</div>
|
||||
<script src="../_static/documentation_options.js?v=a9d256b5"></script>
|
||||
<script src="../_static/doctools.js?v=9bcbadda"></script>
|
||||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script defer="defer" src="../_static/theme.js?v=073f68d9"></script>
|
||||
<script src="../_static/design-tabs.js?v=f930bc37"></script>
|
||||
</body>
|
||||
</html>
|
||||
597
concepts/llm_providers/client_libraries.html
Executable file
597
concepts/llm_providers/client_libraries.html
Executable file
|
|
@ -0,0 +1,597 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html :class="{'dark': darkMode === 'dark' || (darkMode === 'system' && window.matchMedia('(prefers-color-scheme: dark)').matches)}" class="scroll-smooth" data-content_root="../../" lang="en" x-data="{ darkMode: localStorage.getItem('darkMode') || localStorage.setItem('darkMode', 'system'), activeSection: '' }" x-init="$watch('darkMode', val => localStorage.setItem('darkMode', val))">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
|
||||
<meta charset="utf-8"/>
|
||||
<meta content="white" media="(prefers-color-scheme: light)" name="theme-color"/>
|
||||
<meta content="black" media="(prefers-color-scheme: dark)" name="theme-color"/>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport"/>
|
||||
<title>Client Libraries | Arch Docs v0.3.12</title>
|
||||
<meta content="Client Libraries | Arch Docs v0.3.12" property="og:title"/>
|
||||
<meta content="Client Libraries | Arch Docs v0.3.12" name="twitter:title"/>
|
||||
<link href="../../_static/pygments.css?v=466e7b45" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/theme.css?v=42baaae4" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/sphinx-design.min.css?v=95c83b7e" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/awesome-sphinx-design.css?v=15e0fffa" rel="stylesheet" type="text/css"/>
|
||||
<link href="./docs/concepts/llm_providers/client_libraries.html" rel="canonical"/>
|
||||
<link href="../../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../../search.html" rel="search" title="Search"/>
|
||||
<link href="model_aliases.html" rel="next" title="Model Aliases"/>
|
||||
<link href="supported_providers.html" rel="prev" title="Supported Providers & Configuration"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
const userPreference = localStorage.getItem('darkMode');
|
||||
let mode;
|
||||
if (userPreference === 'dark' || window.matchMedia('(prefers-color-scheme: dark)').matches) {
|
||||
mode = 'dark';
|
||||
document.documentElement.classList.add('dark');
|
||||
} else {
|
||||
mode = 'light';
|
||||
}
|
||||
if (!userPreference) {localStorage.setItem('darkMode', mode)}
|
||||
</script>
|
||||
</head>
|
||||
<body :class="{ 'overflow-hidden': showSidebar }" class="min-h-screen font-sans antialiased bg-background text-foreground" x-data="{ showSidebar: false, showScrollTop: false }">
|
||||
<div @click.self="showSidebar = false" class="fixed inset-0 z-50 overflow-hidden bg-background/80 backdrop-blur-sm md:hidden" x-cloak="" x-show="showSidebar"></div><div class="relative flex flex-col min-h-screen" id="page"><a class="absolute top-0 left-0 z-[100] block bg-background p-4 text-xl transition -translate-x-full opacity-0 focus:translate-x-0 focus:opacity-100" href="#content">
|
||||
Skip to content
|
||||
</a><header class="sticky top-0 z-40 w-full border-b shadow-sm border-border supports-backdrop-blur:bg-background/60 bg-background/95 backdrop-blur"><div class="container flex items-center h-14">
|
||||
<div class="hidden mr-4 md:flex">
|
||||
<a class="flex items-center mr-6" href="../../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Arch Docs v0.3.12</span>
|
||||
</a></div><button @click="showSidebar = true" class="inline-flex items-center justify-center h-10 px-0 py-2 mr-2 text-base font-medium transition-colors rounded-md hover:text-accent-foreground hover:bg-transparent md:hidden" type="button">
|
||||
<svg aria-hidden="true" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M152.587 825.087q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440Zm0-203.587q-19.152 0-32.326-13.174T107.087 576q0-19.152 13.174-32.326t32.326-13.174h320q19.152 0 32.326 13.174T518.087 576q0 19.152-13.174 32.326T472.587 621.5h-320Zm0-203.587q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440ZM708.913 576l112.174 112.174q12.674 12.674 12.674 31.826t-12.674 31.826Q808.413 764.5 789.261 764.5t-31.826-12.674l-144-144Q600 594.391 600 576t13.435-31.826l144-144q12.674-12.674 31.826-12.674t31.826 12.674q12.674 12.674 12.674 31.826t-12.674 31.826L708.913 576Z"></path>
|
||||
</svg>
|
||||
<span class="sr-only">Toggle navigation menu</span>
|
||||
</button>
|
||||
<div class="flex items-center justify-between flex-1 space-x-2 sm:space-x-4 md:justify-end">
|
||||
<div class="flex-1 w-full md:w-auto md:flex-none"><form @keydown.k.window.meta="$refs.search.focus()" action="../../search.html" class="relative flex items-center group" id="searchbox" method="get">
|
||||
<input aria-label="Search the docs" class="inline-flex items-center font-medium transition-colors bg-transparent focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 ring-offset-background border border-input hover:bg-accent focus:bg-accent hover:text-accent-foreground focus:text-accent-foreground hover:placeholder-accent-foreground py-2 px-4 relative h-9 w-full justify-start rounded-[0.5rem] text-sm text-muted-foreground sm:pr-12 md:w-40 lg:w-64" id="search-input" name="q" placeholder="Search ..." type="search" x-ref="search"/>
|
||||
<kbd class="pointer-events-none absolute right-1.5 top-2 hidden h-5 select-none text-muted-foreground items-center gap-1 rounded border border-border bg-muted px-1.5 font-mono text-[10px] font-medium opacity-100 sm:flex group-hover:bg-accent group-hover:text-accent-foreground">
|
||||
<span class="text-xs">⌘</span>
|
||||
K
|
||||
</kbd>
|
||||
</form>
|
||||
</div>
|
||||
<nav class="flex items-center space-x-1">
|
||||
<a href="https://github.com/katanemo/arch" rel="noopener nofollow" title="Visit repository on GitHub">
|
||||
<div class="inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md disabled:opacity-50 disabled:pointer-events-none hover:bg-accent hover:text-accent-foreground h-9 w-9">
|
||||
<svg fill="currentColor" height="26px" style="margin-top:-2px;display:inline" viewbox="0 0 45 44" xmlns="http://www.w3.org/2000/svg"><path clip-rule="evenodd" d="M22.477.927C10.485.927.76 10.65.76 22.647c0 9.596 6.223 17.736 14.853 20.608 1.087.2 1.483-.47 1.483-1.047 0-.516-.019-1.881-.03-3.693-6.04 1.312-7.315-2.912-7.315-2.912-.988-2.51-2.412-3.178-2.412-3.178-1.972-1.346.149-1.32.149-1.32 2.18.154 3.327 2.24 3.327 2.24 1.937 3.318 5.084 2.36 6.321 1.803.197-1.403.759-2.36 1.379-2.903-4.823-.548-9.894-2.412-9.894-10.734 0-2.37.847-4.31 2.236-5.828-.224-.55-.969-2.759.214-5.748 0 0 1.822-.584 5.972 2.226 1.732-.482 3.59-.722 5.437-.732 1.845.01 3.703.25 5.437.732 4.147-2.81 5.967-2.226 5.967-2.226 1.185 2.99.44 5.198.217 5.748 1.392 1.517 2.232 3.457 2.232 5.828 0 8.344-5.078 10.18-9.916 10.717.779.67 1.474 1.996 1.474 4.021 0 2.904-.027 5.247-.027 5.96 0 .58.392 1.256 1.493 1.044C37.981 40.375 44.2 32.24 44.2 22.647c0-11.996-9.726-21.72-21.722-21.72" fill="currentColor" fill-rule="evenodd"></path></svg>
|
||||
</div>
|
||||
</a>
|
||||
<button @click="darkMode = darkMode === 'light' ? 'dark' : 'light'" aria-label="Color theme switcher" class="relative inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md hover:bg-accent hover:text-accent-foreground h-9 w-9" type="button">
|
||||
<svg class="absolute transition-all scale-100 rotate-0 dark:-rotate-90 dark:scale-0" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 685q45.456 0 77.228-31.772Q589 621.456 589 576q0-45.456-31.772-77.228Q525.456 467 480 467q-45.456 0-77.228 31.772Q371 530.544 371 576q0 45.456 31.772 77.228Q434.544 685 480 685Zm0 91q-83 0-141.5-58.5T280 576q0-83 58.5-141.5T480 376q83 0 141.5 58.5T680 576q0 83-58.5 141.5T480 776ZM80 621.5q-19.152 0-32.326-13.174T34.5 576q0-19.152 13.174-32.326T80 530.5h80q19.152 0 32.326 13.174T205.5 576q0 19.152-13.174 32.326T160 621.5H80Zm720 0q-19.152 0-32.326-13.174T754.5 576q0-19.152 13.174-32.326T800 530.5h80q19.152 0 32.326 13.174T925.5 576q0 19.152-13.174 32.326T880 621.5h-80Zm-320-320q-19.152 0-32.326-13.174T434.5 256v-80q0-19.152 13.174-32.326T480 130.5q19.152 0 32.326 13.174T525.5 176v80q0 19.152-13.174 32.326T480 301.5Zm0 720q-19.152 0-32.326-13.17Q434.5 995.152 434.5 976v-80q0-19.152 13.174-32.326T480 850.5q19.152 0 32.326 13.174T525.5 896v80q0 19.152-13.174 32.33-13.174 13.17-32.326 13.17ZM222.174 382.065l-43-42Q165.5 327.391 166 308.239t13.174-33.065q13.435-13.674 32.587-13.674t32.065 13.674l42.239 43q12.674 13.435 12.555 31.706-.12 18.272-12.555 31.946-12.674 13.674-31.445 13.413-18.772-.261-32.446-13.174Zm494 494.761-42.239-43q-12.674-13.435-12.674-32.087t12.674-31.565Q686.609 756.5 705.38 757q18.772.5 32.446 13.174l43 41.761Q794.5 824.609 794 843.761t-13.174 33.065Q767.391 890.5 748.239 890.5t-32.065-13.674Zm-42-494.761Q660.5 369.391 661 350.62q.5-18.772 13.174-32.446l41.761-43Q728.609 261.5 747.761 262t33.065 13.174q13.674 13.435 13.674 32.587t-13.674 32.065l-43 42.239q-13.435 12.674-31.706 12.555-18.272-.12-31.946-12.555Zm-495 494.761Q165.5 863.391 165.5 844.239t13.674-32.065l43-42.239q13.435-12.674 32.087-12.674t31.565 12.674Q299.5 782.609 299 801.38q-.5 18.772-13.174 32.446l-41.761 43Q231.391 890.5 212.239 890t-33.065-13.174ZM480 576Z"></path>
|
||||
</svg>
|
||||
<svg class="absolute transition-all scale-0 rotate-90 dark:rotate-0 dark:scale-100" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 936q-151 0-255.5-104.5T120 576q0-138 90-239.5T440 218q25-3 39 18t-1 44q-17 26-25.5 55t-8.5 61q0 90 63 153t153 63q31 0 61.5-9t54.5-25q21-14 43-1.5t19 39.5q-14 138-117.5 229T480 936Zm0-80q88 0 158-48.5T740 681q-20 5-40 8t-40 3q-123 0-209.5-86.5T364 396q0-20 3-40t8-40q-78 32-126.5 102T200 576q0 116 82 198t198 82Zm-10-270Z"></path>
|
||||
</svg>
|
||||
</button>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
<div class="flex-1"><div class="container flex-1 items-start md:grid md:grid-cols-[220px_minmax(0,1fr)] md:gap-6 lg:grid-cols-[240px_minmax(0,1fr)] lg:gap-10"><aside :aria-hidden="!showSidebar" :class="{ 'translate-x-0': showSidebar }" class="fixed inset-y-0 left-0 md:top-14 z-50 md:z-30 bg-background md:bg-transparent transition-all duration-100 -translate-x-full md:translate-x-0 ml-0 p-6 md:p-0 md:-ml-2 md:h-[calc(100vh-3.5rem)] w-5/6 md:w-full shrink-0 overflow-y-auto border-r border-border md:sticky" id="left-sidebar">
|
||||
<a class="!justify-start text-sm md:!hidden bg-background" href="../../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Arch Docs v0.3.12</span>
|
||||
</a>
|
||||
<div class="relative overflow-hidden md:overflow-auto my-4 md:my-0 h-[calc(100vh-8rem)] md:h-auto">
|
||||
<div class="overflow-y-auto h-full w-full relative pr-6">
|
||||
|
||||
<script async="" src="https://www.googletagmanager.com/gtag/js?id=G-K2LXXSX6HB"></script>
|
||||
<script>
|
||||
window.dataLayer = window.dataLayer || [];
|
||||
function gtag(){dataLayer.push(arguments);}
|
||||
gtag('js', new Date());
|
||||
|
||||
gtag('config', 'G-K2LXXSX6HB');
|
||||
</script>
|
||||
<nav class="table w-full min-w-full my-6 lg:my-8">
|
||||
<p class="caption" role="heading"><span class="caption-text">Get Started</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/overview.html">Overview</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/intro_to_arch.html">Intro to Arch</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/quickstart.html">Quickstart</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/quickstart.html#next-steps">Next Steps</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Concepts</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../tech_overview/tech_overview.html">Tech Overview<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1 current" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul class="current" x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/prompt_guard.html">Prompt Guard</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/agent_routing.html">Agent Routing and Hand Off</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/function_calling.html">Function Calling</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/llm_router.html">LLM Routing</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../../guides/observability/observability.html">Observability<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/monitoring.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/access_logging.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Build with Arch</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/agent.html">Agentic Apps</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/rag.html">RAG Apps</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/multi_turn.html">Multi-Turn</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
<button @click="showSidebar = false" class="absolute md:hidden right-4 top-4 rounded-sm opacity-70 transition-opacity hover:opacity-100" type="button">
|
||||
<svg class="h-4 w-4" fill="currentColor" height="24" stroke="none" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 632 284 828q-11 11-28 11t-28-11q-11-11-11-28t11-28l196-196-196-196q-11-11-11-28t11-28q11-11 28-11t28 11l196 196 196-196q11-11 28-11t28 11q11 11 11 28t-11 28L536 576l196 196q11 11 11 28t-11 28q-11 11-28 11t-28-11L480 632Z"></path>
|
||||
</svg>
|
||||
</button>
|
||||
</aside>
|
||||
<main class="relative py-6 lg:gap-10 lg:py-8 xl:grid xl:grid-cols-[1fr_300px]">
|
||||
<div class="w-full min-w-0 mx-auto">
|
||||
<nav aria-label="breadcrumbs" class="flex items-center mb-4 space-x-1 text-sm text-muted-foreground">
|
||||
<a class="overflow-hidden text-ellipsis whitespace-nowrap hover:text-foreground" href="../../index.html">
|
||||
<span class="hidden md:inline">Arch Docs v0.3.12</span>
|
||||
<svg aria-label="Home" class="md:hidden" fill="currentColor" height="18" stroke="none" viewbox="0 96 960 960" width="18" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M240 856h120V616h240v240h120V496L480 316 240 496v360Zm-80 80V456l320-240 320 240v480H520V696h-80v240H160Zm320-350Z"></path>
|
||||
</svg>
|
||||
</a>
|
||||
<div class="mr-1">/</div><a class="hover:text-foreground overflow-hidden text-ellipsis whitespace-nowrap" href="llm_providers.html">LLM Providers</a>
|
||||
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">Client Libraries</span>
|
||||
</nav>
|
||||
<div id="content" role="main">
|
||||
<section id="client-libraries">
|
||||
<span id="id1"></span><h1>Client Libraries<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#client-libraries"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p>Arch provides a unified interface that works seamlessly with multiple client libraries and tools. You can use your preferred client library without changing your existing code - just point it to Arch’s gateway endpoints.</p>
|
||||
<section id="supported-clients">
|
||||
<h2>Supported Clients<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#supported-clients" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#supported-clients'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<ul class="simple">
|
||||
<li><p><strong>OpenAI SDK</strong> - Full compatibility with OpenAI’s official client</p></li>
|
||||
<li><p><strong>Anthropic SDK</strong> - Native support for Anthropic’s client library</p></li>
|
||||
<li><p><strong>cURL</strong> - Direct HTTP requests for any programming language</p></li>
|
||||
<li><p><strong>Custom HTTP Clients</strong> - Any HTTP client that supports REST APIs</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="gateway-endpoints">
|
||||
<h2>Gateway Endpoints<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#gateway-endpoints" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#gateway-endpoints'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>Arch exposes two main endpoints:</p>
|
||||
<table class="docutils align-default">
|
||||
<colgroup>
|
||||
<col style="width: 40.0%"/>
|
||||
<col style="width: 60.0%"/>
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Endpoint</p></th>
|
||||
<th class="head"><p>Purpose</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">http://127.0.0.1:12000/v1/chat/completions</span></code></p></td>
|
||||
<td><p>OpenAI-compatible chat completions (LLM Gateway)</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">http://127.0.0.1:12000/v1/messages</span></code></p></td>
|
||||
<td><p>Anthropic-compatible messages (LLM Gateway)</p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</section>
|
||||
<section id="openai-python-sdk">
|
||||
<h2>OpenAI (Python) SDK<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#openai-python-sdk" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#openai-python-sdk'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>The OpenAI SDK works with any provider through Arch’s OpenAI-compatible endpoint.</p>
|
||||
<p><strong>Installation:</strong></p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><code><span id="line-1">pip<span class="w"> </span>install<span class="w"> </span>openai
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Basic Usage:</strong></p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
|
||||
</span><span id="line-2">
|
||||
</span><span id="line-3"><span class="c1"># Point to Arch's LLM Gateway</span>
|
||||
</span><span id="line-4"><span class="n">client</span> <span class="o">=</span> <span class="n">OpenAI</span><span class="p">(</span>
|
||||
</span><span id="line-5"> <span class="n">api_key</span><span class="o">=</span><span class="s2">"test-key"</span><span class="p">,</span> <span class="c1"># Can be any value for local testing</span>
|
||||
</span><span id="line-6"> <span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:12000/v1"</span>
|
||||
</span><span id="line-7"><span class="p">)</span>
|
||||
</span><span id="line-8">
|
||||
</span><span id="line-9"><span class="c1"># Use any model configured in your arch_config.yaml</span>
|
||||
</span><span id="line-10"><span class="n">completion</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-11"> <span class="n">model</span><span class="o">=</span><span class="s2">"gpt-4o-mini"</span><span class="p">,</span> <span class="c1"># Or use :ref:`model aliases <model_aliases>` like "fast-model"</span>
|
||||
</span><span id="line-12"> <span class="n">max_tokens</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
|
||||
</span><span id="line-13"> <span class="n">messages</span><span class="o">=</span><span class="p">[</span>
|
||||
</span><span id="line-14"> <span class="p">{</span>
|
||||
</span><span id="line-15"> <span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span>
|
||||
</span><span id="line-16"> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Hello, how are you?"</span>
|
||||
</span><span id="line-17"> <span class="p">}</span>
|
||||
</span><span id="line-18"> <span class="p">]</span>
|
||||
</span><span id="line-19"><span class="p">)</span>
|
||||
</span><span id="line-20">
|
||||
</span><span id="line-21"><span class="nb">print</span><span class="p">(</span><span class="n">completion</span><span class="o">.</span><span class="n">choices</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">message</span><span class="o">.</span><span class="n">content</span><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Streaming Responses:</strong></p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
|
||||
</span><span id="line-2">
|
||||
</span><span id="line-3"><span class="n">client</span> <span class="o">=</span> <span class="n">OpenAI</span><span class="p">(</span>
|
||||
</span><span id="line-4"> <span class="n">api_key</span><span class="o">=</span><span class="s2">"test-key"</span><span class="p">,</span>
|
||||
</span><span id="line-5"> <span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:12000/v1"</span>
|
||||
</span><span id="line-6"><span class="p">)</span>
|
||||
</span><span id="line-7">
|
||||
</span><span id="line-8"><span class="n">stream</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-9"> <span class="n">model</span><span class="o">=</span><span class="s2">"gpt-4o-mini"</span><span class="p">,</span>
|
||||
</span><span id="line-10"> <span class="n">max_tokens</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
|
||||
</span><span id="line-11"> <span class="n">messages</span><span class="o">=</span><span class="p">[</span>
|
||||
</span><span id="line-12"> <span class="p">{</span>
|
||||
</span><span id="line-13"> <span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span>
|
||||
</span><span id="line-14"> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Tell me a short story"</span>
|
||||
</span><span id="line-15"> <span class="p">}</span>
|
||||
</span><span id="line-16"> <span class="p">],</span>
|
||||
</span><span id="line-17"> <span class="n">stream</span><span class="o">=</span><span class="kc">True</span>
|
||||
</span><span id="line-18"><span class="p">)</span>
|
||||
</span><span id="line-19">
|
||||
</span><span id="line-20"><span class="c1"># Collect streaming chunks</span>
|
||||
</span><span id="line-21"><span class="k">for</span> <span class="n">chunk</span> <span class="ow">in</span> <span class="n">stream</span><span class="p">:</span>
|
||||
</span><span id="line-22"> <span class="k">if</span> <span class="n">chunk</span><span class="o">.</span><span class="n">choices</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">delta</span><span class="o">.</span><span class="n">content</span><span class="p">:</span>
|
||||
</span><span id="line-23"> <span class="nb">print</span><span class="p">(</span><span class="n">chunk</span><span class="o">.</span><span class="n">choices</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">delta</span><span class="o">.</span><span class="n">content</span><span class="p">,</span> <span class="n">end</span><span class="o">=</span><span class="s2">""</span><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Using with Non-OpenAI Models:</strong></p>
|
||||
<p>The OpenAI SDK can be used with any provider configured in Arch:</p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Using Claude model through OpenAI SDK</span>
|
||||
</span><span id="line-2"><span class="n">completion</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-3"> <span class="n">model</span><span class="o">=</span><span class="s2">"claude-3-5-sonnet-20241022"</span><span class="p">,</span>
|
||||
</span><span id="line-4"> <span class="n">max_tokens</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
|
||||
</span><span id="line-5"> <span class="n">messages</span><span class="o">=</span><span class="p">[</span>
|
||||
</span><span id="line-6"> <span class="p">{</span>
|
||||
</span><span id="line-7"> <span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span>
|
||||
</span><span id="line-8"> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Explain quantum computing briefly"</span>
|
||||
</span><span id="line-9"> <span class="p">}</span>
|
||||
</span><span id="line-10"> <span class="p">]</span>
|
||||
</span><span id="line-11"><span class="p">)</span>
|
||||
</span><span id="line-12">
|
||||
</span><span id="line-13"><span class="c1"># Using Ollama model through OpenAI SDK</span>
|
||||
</span><span id="line-14"><span class="n">completion</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-15"> <span class="n">model</span><span class="o">=</span><span class="s2">"llama3.1"</span><span class="p">,</span>
|
||||
</span><span id="line-16"> <span class="n">max_tokens</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
|
||||
</span><span id="line-17"> <span class="n">messages</span><span class="o">=</span><span class="p">[</span>
|
||||
</span><span id="line-18"> <span class="p">{</span>
|
||||
</span><span id="line-19"> <span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span>
|
||||
</span><span id="line-20"> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"What's the capital of France?"</span>
|
||||
</span><span id="line-21"> <span class="p">}</span>
|
||||
</span><span id="line-22"> <span class="p">]</span>
|
||||
</span><span id="line-23"><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="anthropic-python-sdk">
|
||||
<h2>Anthropic (Python) SDK<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#anthropic-python-sdk" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#anthropic-python-sdk'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>The Anthropic SDK works with any provider through Arch’s Anthropic-compatible endpoint.</p>
|
||||
<p><strong>Installation:</strong></p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><code><span id="line-1">pip<span class="w"> </span>install<span class="w"> </span>anthropic
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Basic Usage:</strong></p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kn">import</span><span class="w"> </span><span class="nn">anthropic</span>
|
||||
</span><span id="line-2">
|
||||
</span><span id="line-3"><span class="c1"># Point to Arch's LLM Gateway</span>
|
||||
</span><span id="line-4"><span class="n">client</span> <span class="o">=</span> <span class="n">anthropic</span><span class="o">.</span><span class="n">Anthropic</span><span class="p">(</span>
|
||||
</span><span id="line-5"> <span class="n">api_key</span><span class="o">=</span><span class="s2">"test-key"</span><span class="p">,</span> <span class="c1"># Can be any value for local testing</span>
|
||||
</span><span id="line-6"> <span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:12000"</span>
|
||||
</span><span id="line-7"><span class="p">)</span>
|
||||
</span><span id="line-8">
|
||||
</span><span id="line-9"><span class="c1"># Use any model configured in your arch_config.yaml</span>
|
||||
</span><span id="line-10"><span class="n">message</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">messages</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-11"> <span class="n">model</span><span class="o">=</span><span class="s2">"claude-3-5-sonnet-20241022"</span><span class="p">,</span>
|
||||
</span><span id="line-12"> <span class="n">max_tokens</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
|
||||
</span><span id="line-13"> <span class="n">messages</span><span class="o">=</span><span class="p">[</span>
|
||||
</span><span id="line-14"> <span class="p">{</span>
|
||||
</span><span id="line-15"> <span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span>
|
||||
</span><span id="line-16"> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Hello, please respond briefly!"</span>
|
||||
</span><span id="line-17"> <span class="p">}</span>
|
||||
</span><span id="line-18"> <span class="p">]</span>
|
||||
</span><span id="line-19"><span class="p">)</span>
|
||||
</span><span id="line-20">
|
||||
</span><span id="line-21"><span class="nb">print</span><span class="p">(</span><span class="n">message</span><span class="o">.</span><span class="n">content</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Streaming Responses:</strong></p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kn">import</span><span class="w"> </span><span class="nn">anthropic</span>
|
||||
</span><span id="line-2">
|
||||
</span><span id="line-3"><span class="n">client</span> <span class="o">=</span> <span class="n">anthropic</span><span class="o">.</span><span class="n">Anthropic</span><span class="p">(</span>
|
||||
</span><span id="line-4"> <span class="n">api_key</span><span class="o">=</span><span class="s2">"test-key"</span><span class="p">,</span>
|
||||
</span><span id="line-5"> <span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:12000"</span>
|
||||
</span><span id="line-6"><span class="p">)</span>
|
||||
</span><span id="line-7">
|
||||
</span><span id="line-8"><span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">messages</span><span class="o">.</span><span class="n">stream</span><span class="p">(</span>
|
||||
</span><span id="line-9"> <span class="n">model</span><span class="o">=</span><span class="s2">"claude-3-5-sonnet-20241022"</span><span class="p">,</span>
|
||||
</span><span id="line-10"> <span class="n">max_tokens</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
|
||||
</span><span id="line-11"> <span class="n">messages</span><span class="o">=</span><span class="p">[</span>
|
||||
</span><span id="line-12"> <span class="p">{</span>
|
||||
</span><span id="line-13"> <span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span>
|
||||
</span><span id="line-14"> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Tell me about artificial intelligence"</span>
|
||||
</span><span id="line-15"> <span class="p">}</span>
|
||||
</span><span id="line-16"> <span class="p">]</span>
|
||||
</span><span id="line-17"><span class="p">)</span> <span class="k">as</span> <span class="n">stream</span><span class="p">:</span>
|
||||
</span><span id="line-18"> <span class="c1"># Collect text deltas</span>
|
||||
</span><span id="line-19"> <span class="k">for</span> <span class="n">text</span> <span class="ow">in</span> <span class="n">stream</span><span class="o">.</span><span class="n">text_stream</span><span class="p">:</span>
|
||||
</span><span id="line-20"> <span class="nb">print</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">end</span><span class="o">=</span><span class="s2">""</span><span class="p">)</span>
|
||||
</span><span id="line-21">
|
||||
</span><span id="line-22"> <span class="c1"># Get final assembled message</span>
|
||||
</span><span id="line-23"> <span class="n">final_message</span> <span class="o">=</span> <span class="n">stream</span><span class="o">.</span><span class="n">get_final_message</span><span class="p">()</span>
|
||||
</span><span id="line-24"> <span class="n">final_text</span> <span class="o">=</span> <span class="s2">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">block</span><span class="o">.</span><span class="n">text</span> <span class="k">for</span> <span class="n">block</span> <span class="ow">in</span> <span class="n">final_message</span><span class="o">.</span><span class="n">content</span> <span class="k">if</span> <span class="n">block</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s2">"text"</span><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Using with Non-Anthropic Models:</strong></p>
|
||||
<p>The Anthropic SDK can be used with any provider configured in Arch:</p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Using OpenAI model through Anthropic SDK</span>
|
||||
</span><span id="line-2"><span class="n">message</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">messages</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-3"> <span class="n">model</span><span class="o">=</span><span class="s2">"gpt-4o-mini"</span><span class="p">,</span>
|
||||
</span><span id="line-4"> <span class="n">max_tokens</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
|
||||
</span><span id="line-5"> <span class="n">messages</span><span class="o">=</span><span class="p">[</span>
|
||||
</span><span id="line-6"> <span class="p">{</span>
|
||||
</span><span id="line-7"> <span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span>
|
||||
</span><span id="line-8"> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Explain machine learning in simple terms"</span>
|
||||
</span><span id="line-9"> <span class="p">}</span>
|
||||
</span><span id="line-10"> <span class="p">]</span>
|
||||
</span><span id="line-11"><span class="p">)</span>
|
||||
</span><span id="line-12">
|
||||
</span><span id="line-13"><span class="c1"># Using Ollama model through Anthropic SDK</span>
|
||||
</span><span id="line-14"><span class="n">message</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">messages</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-15"> <span class="n">model</span><span class="o">=</span><span class="s2">"llama3.1"</span><span class="p">,</span>
|
||||
</span><span id="line-16"> <span class="n">max_tokens</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
|
||||
</span><span id="line-17"> <span class="n">messages</span><span class="o">=</span><span class="p">[</span>
|
||||
</span><span id="line-18"> <span class="p">{</span>
|
||||
</span><span id="line-19"> <span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span>
|
||||
</span><span id="line-20"> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"What is Python programming?"</span>
|
||||
</span><span id="line-21"> <span class="p">}</span>
|
||||
</span><span id="line-22"> <span class="p">]</span>
|
||||
</span><span id="line-23"><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="curl-examples">
|
||||
<h2>cURL Examples<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#curl-examples" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#curl-examples'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>For direct HTTP requests or integration with any programming language:</p>
|
||||
<p><strong>OpenAI-Compatible Endpoint:</strong></p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Basic request</span>
|
||||
</span><span id="line-2">curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://127.0.0.1:12000/v1/chat/completions<span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-3"><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-4"><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer test-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-5"><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
</span><span id="line-6"><span class="s1"> "model": "gpt-4o-mini",</span>
|
||||
</span><span id="line-7"><span class="s1"> "messages": [</span>
|
||||
</span><span id="line-8"><span class="s1"> {"role": "user", "content": "Hello!"}</span>
|
||||
</span><span id="line-9"><span class="s1"> ],</span>
|
||||
</span><span id="line-10"><span class="s1"> "max_tokens": 50</span>
|
||||
</span><span id="line-11"><span class="s1"> }'</span>
|
||||
</span><span id="line-12">
|
||||
</span><span id="line-13"><span class="c1"># Using :ref:`model aliases <model_aliases>`</span>
|
||||
</span><span id="line-14">curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://127.0.0.1:12000/v1/chat/completions<span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-15"><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-16"><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
</span><span id="line-17"><span class="s1"> "model": "fast-model",</span>
|
||||
</span><span id="line-18"><span class="s1"> "messages": [</span>
|
||||
</span><span id="line-19"><span class="s1"> {"role": "user", "content": "Summarize this text..."}</span>
|
||||
</span><span id="line-20"><span class="s1"> ],</span>
|
||||
</span><span id="line-21"><span class="s1"> "max_tokens": 100</span>
|
||||
</span><span id="line-22"><span class="s1"> }'</span>
|
||||
</span><span id="line-23">
|
||||
</span><span id="line-24"><span class="c1"># Streaming request</span>
|
||||
</span><span id="line-25">curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://127.0.0.1:12000/v1/chat/completions<span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-26"><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-27"><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
</span><span id="line-28"><span class="s1"> "model": "gpt-4o-mini",</span>
|
||||
</span><span id="line-29"><span class="s1"> "messages": [</span>
|
||||
</span><span id="line-30"><span class="s1"> {"role": "user", "content": "Tell me a story"}</span>
|
||||
</span><span id="line-31"><span class="s1"> ],</span>
|
||||
</span><span id="line-32"><span class="s1"> "stream": true,</span>
|
||||
</span><span id="line-33"><span class="s1"> "max_tokens": 200</span>
|
||||
</span><span id="line-34"><span class="s1"> }'</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Anthropic-Compatible Endpoint:</strong></p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Basic request</span>
|
||||
</span><span id="line-2">curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://127.0.0.1:12000/v1/messages<span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-3"><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-4"><span class="w"> </span>-H<span class="w"> </span><span class="s2">"x-api-key: test-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-5"><span class="w"> </span>-H<span class="w"> </span><span class="s2">"anthropic-version: 2023-06-01"</span><span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-6"><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
</span><span id="line-7"><span class="s1"> "model": "claude-3-5-sonnet-20241022",</span>
|
||||
</span><span id="line-8"><span class="s1"> "max_tokens": 50,</span>
|
||||
</span><span id="line-9"><span class="s1"> "messages": [</span>
|
||||
</span><span id="line-10"><span class="s1"> {"role": "user", "content": "Hello Claude!"}</span>
|
||||
</span><span id="line-11"><span class="s1"> ]</span>
|
||||
</span><span id="line-12"><span class="s1"> }'</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="cross-client-compatibility">
|
||||
<h2>Cross-Client Compatibility<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#cross-client-compatibility" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#cross-client-compatibility'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>One of Arch’s key features is cross-client compatibility. You can:</p>
|
||||
<p><strong>Use OpenAI SDK with Claude Models:</strong></p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># OpenAI client calling Claude model</span>
|
||||
</span><span id="line-2"><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
|
||||
</span><span id="line-3">
|
||||
</span><span id="line-4"><span class="n">client</span> <span class="o">=</span> <span class="n">OpenAI</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:12000/v1"</span><span class="p">,</span> <span class="n">api_key</span><span class="o">=</span><span class="s2">"test"</span><span class="p">)</span>
|
||||
</span><span id="line-5">
|
||||
</span><span id="line-6"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-7"> <span class="n">model</span><span class="o">=</span><span class="s2">"claude-3-5-sonnet-20241022"</span><span class="p">,</span> <span class="c1"># Claude model</span>
|
||||
</span><span id="line-8"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Hello"</span><span class="p">}]</span>
|
||||
</span><span id="line-9"><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Use Anthropic SDK with OpenAI Models:</strong></p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Anthropic client calling OpenAI model</span>
|
||||
</span><span id="line-2"><span class="kn">import</span><span class="w"> </span><span class="nn">anthropic</span>
|
||||
</span><span id="line-3">
|
||||
</span><span id="line-4"><span class="n">client</span> <span class="o">=</span> <span class="n">anthropic</span><span class="o">.</span><span class="n">Anthropic</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:12000"</span><span class="p">,</span> <span class="n">api_key</span><span class="o">=</span><span class="s2">"test"</span><span class="p">)</span>
|
||||
</span><span id="line-5">
|
||||
</span><span id="line-6"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">messages</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-7"> <span class="n">model</span><span class="o">=</span><span class="s2">"gpt-4o-mini"</span><span class="p">,</span> <span class="c1"># OpenAI model</span>
|
||||
</span><span id="line-8"> <span class="n">max_tokens</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
|
||||
</span><span id="line-9"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Hello"</span><span class="p">}]</span>
|
||||
</span><span id="line-10"><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Mix and Match with</strong> <a class="reference internal" href="model_aliases.html#model-aliases"><span class="std std-ref">Model Aliases</span></a>:</p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Same code works with different underlying models</span>
|
||||
</span><span id="line-2"><span class="k">def</span><span class="w"> </span><span class="nf">ask_question</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">question</span><span class="p">):</span>
|
||||
</span><span id="line-3"> <span class="k">return</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-4"> <span class="n">model</span><span class="o">=</span><span class="s2">"reasoning-model"</span><span class="p">,</span> <span class="c1"># Alias could point to any provider</span>
|
||||
</span><span id="line-5"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="n">question</span><span class="p">}]</span>
|
||||
</span><span id="line-6"> <span class="p">)</span>
|
||||
</span><span id="line-7">
|
||||
</span><span id="line-8"><span class="c1"># Works regardless of what "reasoning-model" actually points to</span>
|
||||
</span><span id="line-9"><span class="n">openai_client</span> <span class="o">=</span> <span class="n">OpenAI</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:12000/v1"</span><span class="p">,</span> <span class="n">api_key</span><span class="o">=</span><span class="s2">"test"</span><span class="p">)</span>
|
||||
</span><span id="line-10"><span class="n">response</span> <span class="o">=</span> <span class="n">ask_question</span><span class="p">(</span><span class="n">openai_client</span><span class="p">,</span> <span class="s2">"Solve this math problem..."</span><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="error-handling">
|
||||
<h2>Error Handling<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#error-handling" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#error-handling'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p><strong>OpenAI SDK Error Handling:</strong></p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
|
||||
</span><span id="line-2"><span class="kn">import</span><span class="w"> </span><span class="nn">openai</span>
|
||||
</span><span id="line-3">
|
||||
</span><span id="line-4"><span class="n">client</span> <span class="o">=</span> <span class="n">OpenAI</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:12000/v1"</span><span class="p">,</span> <span class="n">api_key</span><span class="o">=</span><span class="s2">"test"</span><span class="p">)</span>
|
||||
</span><span id="line-5">
|
||||
</span><span id="line-6"><span class="k">try</span><span class="p">:</span>
|
||||
</span><span id="line-7"> <span class="n">completion</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-8"> <span class="n">model</span><span class="o">=</span><span class="s2">"nonexistent-model"</span><span class="p">,</span>
|
||||
</span><span id="line-9"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Hello"</span><span class="p">}]</span>
|
||||
</span><span id="line-10"> <span class="p">)</span>
|
||||
</span><span id="line-11"><span class="k">except</span> <span class="n">openai</span><span class="o">.</span><span class="n">NotFoundError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
</span><span id="line-12"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Model not found: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
</span><span id="line-13"><span class="k">except</span> <span class="n">openai</span><span class="o">.</span><span class="n">APIError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
</span><span id="line-14"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"API error: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Anthropic SDK Error Handling:</strong></p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kn">import</span><span class="w"> </span><span class="nn">anthropic</span>
|
||||
</span><span id="line-2">
|
||||
</span><span id="line-3"><span class="n">client</span> <span class="o">=</span> <span class="n">anthropic</span><span class="o">.</span><span class="n">Anthropic</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:12000"</span><span class="p">,</span> <span class="n">api_key</span><span class="o">=</span><span class="s2">"test"</span><span class="p">)</span>
|
||||
</span><span id="line-4">
|
||||
</span><span id="line-5"><span class="k">try</span><span class="p">:</span>
|
||||
</span><span id="line-6"> <span class="n">message</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">messages</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-7"> <span class="n">model</span><span class="o">=</span><span class="s2">"nonexistent-model"</span><span class="p">,</span>
|
||||
</span><span id="line-8"> <span class="n">max_tokens</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
|
||||
</span><span id="line-9"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Hello"</span><span class="p">}]</span>
|
||||
</span><span id="line-10"> <span class="p">)</span>
|
||||
</span><span id="line-11"><span class="k">except</span> <span class="n">anthropic</span><span class="o">.</span><span class="n">NotFoundError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
</span><span id="line-12"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Model not found: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
</span><span id="line-13"><span class="k">except</span> <span class="n">anthropic</span><span class="o">.</span><span class="n">APIError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
</span><span id="line-14"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"API error: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="best-practices">
|
||||
<h2>Best Practices<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#best-practices" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#best-practices'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p><strong>Use</strong> <a class="reference internal" href="model_aliases.html#model-aliases"><span class="std std-ref">Model Aliases</span></a>:
|
||||
Instead of hardcoding provider-specific model names, use semantic aliases:</p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Good - uses semantic alias</span>
|
||||
</span><span id="line-2"><span class="n">model</span> <span class="o">=</span> <span class="s2">"fast-model"</span>
|
||||
</span><span id="line-3">
|
||||
</span><span id="line-4"><span class="c1"># Less ideal - hardcoded provider model</span>
|
||||
</span><span id="line-5"><span class="n">model</span> <span class="o">=</span> <span class="s2">"openai/gpt-4o-mini"</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Environment-Based Configuration:</strong>
|
||||
Use different <a class="reference internal" href="model_aliases.html#model-aliases"><span class="std std-ref">model aliases</span></a> for different environments:</p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
|
||||
</span><span id="line-2">
|
||||
</span><span id="line-3"><span class="c1"># Development uses cheaper/faster models</span>
|
||||
</span><span id="line-4"><span class="n">model</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getenv</span><span class="p">(</span><span class="s2">"MODEL_ALIAS"</span><span class="p">,</span> <span class="s2">"dev.chat.v1"</span><span class="p">)</span>
|
||||
</span><span id="line-5">
|
||||
</span><span id="line-6"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-7"> <span class="n">model</span><span class="o">=</span><span class="n">model</span><span class="p">,</span>
|
||||
</span><span id="line-8"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Hello"</span><span class="p">}]</span>
|
||||
</span><span id="line-9"><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Graceful Fallbacks:</strong>
|
||||
Implement fallback logic for better reliability:</p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="k">def</span><span class="w"> </span><span class="nf">chat_with_fallback</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">messages</span><span class="p">,</span> <span class="n">primary_model</span><span class="o">=</span><span class="s2">"smart-model"</span><span class="p">,</span> <span class="n">fallback_model</span><span class="o">=</span><span class="s2">"fast-model"</span><span class="p">):</span>
|
||||
</span><span id="line-2"> <span class="k">try</span><span class="p">:</span>
|
||||
</span><span id="line-3"> <span class="k">return</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="n">primary_model</span><span class="p">,</span> <span class="n">messages</span><span class="o">=</span><span class="n">messages</span><span class="p">)</span>
|
||||
</span><span id="line-4"> <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
</span><span id="line-5"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Primary model failed, trying fallback: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
</span><span id="line-6"> <span class="k">return</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="n">fallback_model</span><span class="p">,</span> <span class="n">messages</span><span class="o">=</span><span class="n">messages</span><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="see-also">
|
||||
<h2>See Also<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#see-also" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#see-also'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<ul class="simple">
|
||||
<li><p><a class="reference internal" href="supported_providers.html#supported-providers"><span class="std std-ref">Supported Providers & Configuration</span></a> - Configure your providers and see available models</p></li>
|
||||
<li><p><a class="reference internal" href="model_aliases.html#model-aliases"><span class="std std-ref">Model Aliases</span></a> - Create semantic model names</p></li>
|
||||
<li><p><a class="reference internal" href="../../guides/llm_router.html#llm-router"><span class="std std-ref">LLM Routing</span></a> - Intelligent routing capabilities</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="supported_providers.html">
|
||||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="15 18 9 12 15 6"></polyline>
|
||||
</svg>
|
||||
Supported Providers & Configuration
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="model_aliases.html">
|
||||
Model Aliases
|
||||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="9 18 15 12 9 6"></polyline>
|
||||
</svg>
|
||||
</a>
|
||||
</div>
|
||||
</div></div><aside class="hidden text-sm xl:block" id="right-sidebar">
|
||||
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
|
||||
<ul>
|
||||
<li><a :data-current="activeSection === '#supported-clients'" class="reference internal" href="#supported-clients">Supported Clients</a></li>
|
||||
<li><a :data-current="activeSection === '#gateway-endpoints'" class="reference internal" href="#gateway-endpoints">Gateway Endpoints</a></li>
|
||||
<li><a :data-current="activeSection === '#openai-python-sdk'" class="reference internal" href="#openai-python-sdk">OpenAI (Python) SDK</a></li>
|
||||
<li><a :data-current="activeSection === '#anthropic-python-sdk'" class="reference internal" href="#anthropic-python-sdk">Anthropic (Python) SDK</a></li>
|
||||
<li><a :data-current="activeSection === '#curl-examples'" class="reference internal" href="#curl-examples">cURL Examples</a></li>
|
||||
<li><a :data-current="activeSection === '#cross-client-compatibility'" class="reference internal" href="#cross-client-compatibility">Cross-Client Compatibility</a></li>
|
||||
<li><a :data-current="activeSection === '#error-handling'" class="reference internal" href="#error-handling">Error Handling</a></li>
|
||||
<li><a :data-current="activeSection === '#best-practices'" class="reference internal" href="#best-practices">Best Practices</a></li>
|
||||
<li><a :data-current="activeSection === '#see-also'" class="reference internal" href="#see-also">See Also</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</aside>
|
||||
</main>
|
||||
</div>
|
||||
</div><footer class="py-6 border-t border-border md:py-0">
|
||||
<div class="container flex flex-col items-center justify-between gap-4 md:h-24 md:flex-row">
|
||||
<div class="flex flex-col items-center gap-4 px-8 md:flex-row md:gap-2 md:px-0">
|
||||
<p class="text-sm leading-loose text-center text-muted-foreground md:text-left">© 2025, Katanemo Labs, Inc Last updated: Sep 19, 2025. </p>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
</div>
|
||||
<script src="../../_static/documentation_options.js?v=a9d256b5"></script>
|
||||
<script src="../../_static/doctools.js?v=9bcbadda"></script>
|
||||
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script defer="defer" src="../../_static/theme.js?v=073f68d9"></script>
|
||||
<script src="../../_static/design-tabs.js?v=f930bc37"></script>
|
||||
</body>
|
||||
</html>
|
||||
314
concepts/llm_providers/llm_providers.html
Executable file
314
concepts/llm_providers/llm_providers.html
Executable file
|
|
@ -0,0 +1,314 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html :class="{'dark': darkMode === 'dark' || (darkMode === 'system' && window.matchMedia('(prefers-color-scheme: dark)').matches)}" class="scroll-smooth" data-content_root="../../" lang="en" x-data="{ darkMode: localStorage.getItem('darkMode') || localStorage.setItem('darkMode', 'system'), activeSection: '' }" x-init="$watch('darkMode', val => localStorage.setItem('darkMode', val))">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
|
||||
<meta charset="utf-8"/>
|
||||
<meta content="white" media="(prefers-color-scheme: light)" name="theme-color"/>
|
||||
<meta content="black" media="(prefers-color-scheme: dark)" name="theme-color"/>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport"/>
|
||||
<title>LLM Providers | Arch Docs v0.3.12</title>
|
||||
<meta content="LLM Providers | Arch Docs v0.3.12" property="og:title"/>
|
||||
<meta content="LLM Providers | Arch Docs v0.3.12" name="twitter:title"/>
|
||||
<link href="../../_static/pygments.css?v=466e7b45" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/theme.css?v=42baaae4" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/sphinx-design.min.css?v=95c83b7e" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/awesome-sphinx-design.css?v=15e0fffa" rel="stylesheet" type="text/css"/>
|
||||
<link href="./docs/concepts/llm_providers/llm_providers.html" rel="canonical"/>
|
||||
<link href="../../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../../search.html" rel="search" title="Search"/>
|
||||
<link href="supported_providers.html" rel="next" title="Supported Providers & Configuration"/>
|
||||
<link href="../tech_overview/error_target.html" rel="prev" title="Error Target"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
const userPreference = localStorage.getItem('darkMode');
|
||||
let mode;
|
||||
if (userPreference === 'dark' || window.matchMedia('(prefers-color-scheme: dark)').matches) {
|
||||
mode = 'dark';
|
||||
document.documentElement.classList.add('dark');
|
||||
} else {
|
||||
mode = 'light';
|
||||
}
|
||||
if (!userPreference) {localStorage.setItem('darkMode', mode)}
|
||||
</script>
|
||||
</head>
|
||||
<body :class="{ 'overflow-hidden': showSidebar }" class="min-h-screen font-sans antialiased bg-background text-foreground" x-data="{ showSidebar: false, showScrollTop: false }">
|
||||
<div @click.self="showSidebar = false" class="fixed inset-0 z-50 overflow-hidden bg-background/80 backdrop-blur-sm md:hidden" x-cloak="" x-show="showSidebar"></div><div class="relative flex flex-col min-h-screen" id="page"><a class="absolute top-0 left-0 z-[100] block bg-background p-4 text-xl transition -translate-x-full opacity-0 focus:translate-x-0 focus:opacity-100" href="#content">
|
||||
Skip to content
|
||||
</a><header class="sticky top-0 z-40 w-full border-b shadow-sm border-border supports-backdrop-blur:bg-background/60 bg-background/95 backdrop-blur"><div class="container flex items-center h-14">
|
||||
<div class="hidden mr-4 md:flex">
|
||||
<a class="flex items-center mr-6" href="../../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Arch Docs v0.3.12</span>
|
||||
</a></div><button @click="showSidebar = true" class="inline-flex items-center justify-center h-10 px-0 py-2 mr-2 text-base font-medium transition-colors rounded-md hover:text-accent-foreground hover:bg-transparent md:hidden" type="button">
|
||||
<svg aria-hidden="true" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M152.587 825.087q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440Zm0-203.587q-19.152 0-32.326-13.174T107.087 576q0-19.152 13.174-32.326t32.326-13.174h320q19.152 0 32.326 13.174T518.087 576q0 19.152-13.174 32.326T472.587 621.5h-320Zm0-203.587q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440ZM708.913 576l112.174 112.174q12.674 12.674 12.674 31.826t-12.674 31.826Q808.413 764.5 789.261 764.5t-31.826-12.674l-144-144Q600 594.391 600 576t13.435-31.826l144-144q12.674-12.674 31.826-12.674t31.826 12.674q12.674 12.674 12.674 31.826t-12.674 31.826L708.913 576Z"></path>
|
||||
</svg>
|
||||
<span class="sr-only">Toggle navigation menu</span>
|
||||
</button>
|
||||
<div class="flex items-center justify-between flex-1 space-x-2 sm:space-x-4 md:justify-end">
|
||||
<div class="flex-1 w-full md:w-auto md:flex-none"><form @keydown.k.window.meta="$refs.search.focus()" action="../../search.html" class="relative flex items-center group" id="searchbox" method="get">
|
||||
<input aria-label="Search the docs" class="inline-flex items-center font-medium transition-colors bg-transparent focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 ring-offset-background border border-input hover:bg-accent focus:bg-accent hover:text-accent-foreground focus:text-accent-foreground hover:placeholder-accent-foreground py-2 px-4 relative h-9 w-full justify-start rounded-[0.5rem] text-sm text-muted-foreground sm:pr-12 md:w-40 lg:w-64" id="search-input" name="q" placeholder="Search ..." type="search" x-ref="search"/>
|
||||
<kbd class="pointer-events-none absolute right-1.5 top-2 hidden h-5 select-none text-muted-foreground items-center gap-1 rounded border border-border bg-muted px-1.5 font-mono text-[10px] font-medium opacity-100 sm:flex group-hover:bg-accent group-hover:text-accent-foreground">
|
||||
<span class="text-xs">⌘</span>
|
||||
K
|
||||
</kbd>
|
||||
</form>
|
||||
</div>
|
||||
<nav class="flex items-center space-x-1">
|
||||
<a href="https://github.com/katanemo/arch" rel="noopener nofollow" title="Visit repository on GitHub">
|
||||
<div class="inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md disabled:opacity-50 disabled:pointer-events-none hover:bg-accent hover:text-accent-foreground h-9 w-9">
|
||||
<svg fill="currentColor" height="26px" style="margin-top:-2px;display:inline" viewbox="0 0 45 44" xmlns="http://www.w3.org/2000/svg"><path clip-rule="evenodd" d="M22.477.927C10.485.927.76 10.65.76 22.647c0 9.596 6.223 17.736 14.853 20.608 1.087.2 1.483-.47 1.483-1.047 0-.516-.019-1.881-.03-3.693-6.04 1.312-7.315-2.912-7.315-2.912-.988-2.51-2.412-3.178-2.412-3.178-1.972-1.346.149-1.32.149-1.32 2.18.154 3.327 2.24 3.327 2.24 1.937 3.318 5.084 2.36 6.321 1.803.197-1.403.759-2.36 1.379-2.903-4.823-.548-9.894-2.412-9.894-10.734 0-2.37.847-4.31 2.236-5.828-.224-.55-.969-2.759.214-5.748 0 0 1.822-.584 5.972 2.226 1.732-.482 3.59-.722 5.437-.732 1.845.01 3.703.25 5.437.732 4.147-2.81 5.967-2.226 5.967-2.226 1.185 2.99.44 5.198.217 5.748 1.392 1.517 2.232 3.457 2.232 5.828 0 8.344-5.078 10.18-9.916 10.717.779.67 1.474 1.996 1.474 4.021 0 2.904-.027 5.247-.027 5.96 0 .58.392 1.256 1.493 1.044C37.981 40.375 44.2 32.24 44.2 22.647c0-11.996-9.726-21.72-21.722-21.72" fill="currentColor" fill-rule="evenodd"></path></svg>
|
||||
</div>
|
||||
</a>
|
||||
<button @click="darkMode = darkMode === 'light' ? 'dark' : 'light'" aria-label="Color theme switcher" class="relative inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md hover:bg-accent hover:text-accent-foreground h-9 w-9" type="button">
|
||||
<svg class="absolute transition-all scale-100 rotate-0 dark:-rotate-90 dark:scale-0" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 685q45.456 0 77.228-31.772Q589 621.456 589 576q0-45.456-31.772-77.228Q525.456 467 480 467q-45.456 0-77.228 31.772Q371 530.544 371 576q0 45.456 31.772 77.228Q434.544 685 480 685Zm0 91q-83 0-141.5-58.5T280 576q0-83 58.5-141.5T480 376q83 0 141.5 58.5T680 576q0 83-58.5 141.5T480 776ZM80 621.5q-19.152 0-32.326-13.174T34.5 576q0-19.152 13.174-32.326T80 530.5h80q19.152 0 32.326 13.174T205.5 576q0 19.152-13.174 32.326T160 621.5H80Zm720 0q-19.152 0-32.326-13.174T754.5 576q0-19.152 13.174-32.326T800 530.5h80q19.152 0 32.326 13.174T925.5 576q0 19.152-13.174 32.326T880 621.5h-80Zm-320-320q-19.152 0-32.326-13.174T434.5 256v-80q0-19.152 13.174-32.326T480 130.5q19.152 0 32.326 13.174T525.5 176v80q0 19.152-13.174 32.326T480 301.5Zm0 720q-19.152 0-32.326-13.17Q434.5 995.152 434.5 976v-80q0-19.152 13.174-32.326T480 850.5q19.152 0 32.326 13.174T525.5 896v80q0 19.152-13.174 32.33-13.174 13.17-32.326 13.17ZM222.174 382.065l-43-42Q165.5 327.391 166 308.239t13.174-33.065q13.435-13.674 32.587-13.674t32.065 13.674l42.239 43q12.674 13.435 12.555 31.706-.12 18.272-12.555 31.946-12.674 13.674-31.445 13.413-18.772-.261-32.446-13.174Zm494 494.761-42.239-43q-12.674-13.435-12.674-32.087t12.674-31.565Q686.609 756.5 705.38 757q18.772.5 32.446 13.174l43 41.761Q794.5 824.609 794 843.761t-13.174 33.065Q767.391 890.5 748.239 890.5t-32.065-13.674Zm-42-494.761Q660.5 369.391 661 350.62q.5-18.772 13.174-32.446l41.761-43Q728.609 261.5 747.761 262t33.065 13.174q13.674 13.435 13.674 32.587t-13.674 32.065l-43 42.239q-13.435 12.674-31.706 12.555-18.272-.12-31.946-12.555Zm-495 494.761Q165.5 863.391 165.5 844.239t13.674-32.065l43-42.239q13.435-12.674 32.087-12.674t31.565 12.674Q299.5 782.609 299 801.38q-.5 18.772-13.174 32.446l-41.761 43Q231.391 890.5 212.239 890t-33.065-13.174ZM480 576Z"></path>
|
||||
</svg>
|
||||
<svg class="absolute transition-all scale-0 rotate-90 dark:rotate-0 dark:scale-100" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 936q-151 0-255.5-104.5T120 576q0-138 90-239.5T440 218q25-3 39 18t-1 44q-17 26-25.5 55t-8.5 61q0 90 63 153t153 63q31 0 61.5-9t54.5-25q21-14 43-1.5t19 39.5q-14 138-117.5 229T480 936Zm0-80q88 0 158-48.5T740 681q-20 5-40 8t-40 3q-123 0-209.5-86.5T364 396q0-20 3-40t8-40q-78 32-126.5 102T200 576q0 116 82 198t198 82Zm-10-270Z"></path>
|
||||
</svg>
|
||||
</button>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
<div class="flex-1"><div class="container flex-1 items-start md:grid md:grid-cols-[220px_minmax(0,1fr)] md:gap-6 lg:grid-cols-[240px_minmax(0,1fr)] lg:gap-10"><aside :aria-hidden="!showSidebar" :class="{ 'translate-x-0': showSidebar }" class="fixed inset-y-0 left-0 md:top-14 z-50 md:z-30 bg-background md:bg-transparent transition-all duration-100 -translate-x-full md:translate-x-0 ml-0 p-6 md:p-0 md:-ml-2 md:h-[calc(100vh-3.5rem)] w-5/6 md:w-full shrink-0 overflow-y-auto border-r border-border md:sticky" id="left-sidebar">
|
||||
<a class="!justify-start text-sm md:!hidden bg-background" href="../../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Arch Docs v0.3.12</span>
|
||||
</a>
|
||||
<div class="relative overflow-hidden md:overflow-auto my-4 md:my-0 h-[calc(100vh-8rem)] md:h-auto">
|
||||
<div class="overflow-y-auto h-full w-full relative pr-6">
|
||||
|
||||
<script async="" src="https://www.googletagmanager.com/gtag/js?id=G-K2LXXSX6HB"></script>
|
||||
<script>
|
||||
window.dataLayer = window.dataLayer || [];
|
||||
function gtag(){dataLayer.push(arguments);}
|
||||
gtag('js', new Date());
|
||||
|
||||
gtag('config', 'G-K2LXXSX6HB');
|
||||
</script>
|
||||
<nav class="table w-full min-w-full my-6 lg:my-8">
|
||||
<p class="caption" role="heading"><span class="caption-text">Get Started</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/overview.html">Overview</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/intro_to_arch.html">Intro to Arch</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/quickstart.html">Quickstart</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/quickstart.html#next-steps">Next Steps</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Concepts</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../tech_overview/tech_overview.html">Tech Overview<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1 current" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="current reference internal expandable" href="#">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/prompt_guard.html">Prompt Guard</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/agent_routing.html">Agent Routing and Hand Off</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/function_calling.html">Function Calling</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/llm_router.html">LLM Routing</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../../guides/observability/observability.html">Observability<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/monitoring.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/access_logging.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Build with Arch</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/agent.html">Agentic Apps</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/rag.html">RAG Apps</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/multi_turn.html">Multi-Turn</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
<button @click="showSidebar = false" class="absolute md:hidden right-4 top-4 rounded-sm opacity-70 transition-opacity hover:opacity-100" type="button">
|
||||
<svg class="h-4 w-4" fill="currentColor" height="24" stroke="none" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 632 284 828q-11 11-28 11t-28-11q-11-11-11-28t11-28l196-196-196-196q-11-11-11-28t11-28q11-11 28-11t28 11l196 196 196-196q11-11 28-11t28 11q11 11 11 28t-11 28L536 576l196 196q11 11 11 28t-11 28q-11 11-28 11t-28-11L480 632Z"></path>
|
||||
</svg>
|
||||
</button>
|
||||
</aside>
|
||||
<main class="relative py-6 lg:gap-10 lg:py-8 xl:grid xl:grid-cols-[1fr_300px]">
|
||||
<div class="w-full min-w-0 mx-auto">
|
||||
<nav aria-label="breadcrumbs" class="flex items-center mb-4 space-x-1 text-sm text-muted-foreground">
|
||||
<a class="overflow-hidden text-ellipsis whitespace-nowrap hover:text-foreground" href="../../index.html">
|
||||
<span class="hidden md:inline">Arch Docs v0.3.12</span>
|
||||
<svg aria-label="Home" class="md:hidden" fill="currentColor" height="18" stroke="none" viewbox="0 96 960 960" width="18" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M240 856h120V616h240v240h120V496L480 316 240 496v360Zm-80 80V456l320-240 320 240v480H520V696h-80v240H160Zm320-350Z"></path>
|
||||
</svg>
|
||||
</a>
|
||||
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">LLM Providers</span>
|
||||
</nav>
|
||||
<div id="content" role="main">
|
||||
<section id="llm-providers">
|
||||
<span id="id1"></span><h1>LLM Providers<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#llm-providers"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p><strong>LLM Providers</strong> are a top-level primitive in Arch, helping developers centrally define, secure, observe,
|
||||
and manage the usage of their LLMs. Arch builds on Envoy’s reliable <a class="reference external" href="https://www.envoyproxy.io/docs/envoy/v1.31.2/intro/arch_overview/upstream/cluster_manager" rel="nofollow noopener">cluster subsystem<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>
|
||||
to manage egress traffic to LLMs, which includes intelligent routing, retry and fail-over mechanisms,
|
||||
ensuring high availability and fault tolerance. This abstraction also enables developers to seamlessly
|
||||
switch between LLM providers or upgrade LLM versions, simplifying the integration and scaling of LLMs
|
||||
across applications.</p>
|
||||
<p>Today, we are enabling you to connect to 11+ different AI providers through a unified interface with advanced routing and management capabilities.
|
||||
Whether you’re using OpenAI, Anthropic, Azure OpenAI, local Ollama models, or any OpenAI-compatible provider, Arch provides seamless integration with enterprise-grade features.</p>
|
||||
<section id="core-capabilities">
|
||||
<h2>Core Capabilities<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#core-capabilities" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#core-capabilities'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p><strong>Multi-Provider Support</strong>
|
||||
Connect to any combination of providers simultaneously (see <a class="reference internal" href="supported_providers.html#supported-providers"><span class="std std-ref">Supported Providers & Configuration</span></a> for full details):</p>
|
||||
<ul class="simple">
|
||||
<li><p><strong>First-Class Providers</strong>: Native integrations with OpenAI, Anthropic, DeepSeek, Mistral, Groq, Google Gemini, Together AI, xAI, Azure OpenAI, and Ollama</p></li>
|
||||
<li><p><strong>OpenAI-Compatible Providers</strong>: Any provider implementing the OpenAI Chat Completions API standard</p></li>
|
||||
</ul>
|
||||
<p><strong>Intelligent Routing</strong>
|
||||
Three powerful routing approaches to optimize model selection:</p>
|
||||
<ul class="simple">
|
||||
<li><p><strong>Model-based Routing</strong>: Direct routing to specific models using provider/model names (see <a class="reference internal" href="supported_providers.html#supported-providers"><span class="std std-ref">Supported Providers & Configuration</span></a>)</p></li>
|
||||
<li><p><strong>Alias-based Routing</strong>: Semantic routing using custom aliases (see <a class="reference internal" href="model_aliases.html#model-aliases"><span class="std std-ref">Model Aliases</span></a>)</p></li>
|
||||
<li><p><strong>Preference-aligned Routing</strong>: Intelligent routing using the Arch-Router model (see <a class="reference internal" href="../../guides/llm_router.html#preference-aligned-routing"><span class="std std-ref">Preference-aligned Routing (Arch-Router)</span></a>)</p></li>
|
||||
</ul>
|
||||
<p><strong>Unified Client Interface</strong>
|
||||
Use your preferred client library without changing existing code (see <a class="reference internal" href="client_libraries.html#client-libraries"><span class="std std-ref">Client Libraries</span></a> for details):</p>
|
||||
<ul class="simple">
|
||||
<li><p><strong>OpenAI Python SDK</strong>: Full compatibility with all providers</p></li>
|
||||
<li><p><strong>Anthropic Python SDK</strong>: Native support with cross-provider capabilities</p></li>
|
||||
<li><p><strong>cURL & HTTP Clients</strong>: Direct REST API access for any programming language</p></li>
|
||||
<li><p><strong>Custom Integrations</strong>: Standard HTTP interfaces for seamless integration</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="key-benefits">
|
||||
<h2>Key Benefits<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#key-benefits" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#key-benefits'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<ul class="simple">
|
||||
<li><p><strong>Provider Flexibility</strong>: Switch between providers without changing client code</p></li>
|
||||
<li><p><strong>Three Routing Methods</strong>: Choose from model-based, alias-based, or preference-aligned routing (using <a class="reference external" href="https://huggingface.co/katanemo/Arch-Router-1.5B" rel="nofollow noopener">Arch-Router-1.5B<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>) strategies</p></li>
|
||||
<li><p><strong>Cost Optimization</strong>: Route requests to cost-effective models based on complexity</p></li>
|
||||
<li><p><strong>Performance Optimization</strong>: Use fast models for simple tasks, powerful models for complex reasoning</p></li>
|
||||
<li><p><strong>Environment Management</strong>: Configure different models for different environments</p></li>
|
||||
<li><p><strong>Future-Proof</strong>: Easy to add new providers and upgrade models</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="common-use-cases">
|
||||
<h2>Common Use Cases<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#common-use-cases" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#common-use-cases'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p><strong>Development Teams</strong>
|
||||
- Use aliases like <code class="docutils literal notranslate"><span class="pre">dev.chat.v1</span></code> and <code class="docutils literal notranslate"><span class="pre">prod.chat.v1</span></code> for environment-specific models
|
||||
- Route simple queries to fast/cheap models, complex tasks to powerful models
|
||||
- Test new models safely using canary deployments (coming soon)</p>
|
||||
<p><strong>Production Applications</strong>
|
||||
- Implement fallback strategies across multiple providers for reliability
|
||||
- Use intelligent routing to optimize cost and performance automatically
|
||||
- Monitor usage patterns and model performance across providers</p>
|
||||
<p><strong>Enterprise Deployments</strong>
|
||||
- Connect to both cloud providers and on-premises models (Ollama, custom deployments)
|
||||
- Apply consistent security and governance policies across all providers
|
||||
- Scale across regions using different provider endpoints</p>
|
||||
</section>
|
||||
<section id="advanced-features">
|
||||
<h2>Advanced Features<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#advanced-features" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#advanced-features'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<ul class="simple">
|
||||
<li><p><a class="reference internal" href="../../guides/llm_router.html#preference-aligned-routing"><span class="std std-ref">Preference-aligned Routing (Arch-Router)</span></a> - Learn about preference-aligned dynamic routing and intelligent model selection</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="getting-started">
|
||||
<h2>Getting Started<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#getting-started" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#getting-started'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>Dive into specific areas based on your needs:</p>
|
||||
<div class="toctree-wrapper compound">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="supported_providers.html">Supported Providers & Configuration</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="supported_providers.html#configuration-structure">Configuration Structure</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="supported_providers.html#provider-categories">Provider Categories</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="supported_providers.html#supported-api-endpoints">Supported API Endpoints</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="supported_providers.html#first-class-providers">First-Class Providers</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="supported_providers.html#providers-requiring-base-url">Providers Requiring Base URL</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="supported_providers.html#advanced-configuration">Advanced Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="supported_providers.html#model-selection-guidelines">Model Selection Guidelines</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="supported_providers.html#see-also">See Also</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="client_libraries.html">Client Libraries</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html#supported-clients">Supported Clients</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html#gateway-endpoints">Gateway Endpoints</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html#openai-python-sdk">OpenAI (Python) SDK</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html#anthropic-python-sdk">Anthropic (Python) SDK</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html#curl-examples">cURL Examples</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html#cross-client-compatibility">Cross-Client Compatibility</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html#error-handling">Error Handling</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html#best-practices">Best Practices</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html#see-also">See Also</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="model_aliases.html">Model Aliases</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_aliases.html#basic-configuration">Basic Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_aliases.html#using-aliases">Using Aliases</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_aliases.html#naming-best-practices">Naming Best Practices</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_aliases.html#advanced-features-coming-soon">Advanced Features (Coming Soon)</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_aliases.html#validation-rules">Validation Rules</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_aliases.html#see-also">See Also</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="../tech_overview/error_target.html">
|
||||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="15 18 9 12 15 6"></polyline>
|
||||
</svg>
|
||||
Error Target
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="supported_providers.html">
|
||||
Supported Providers & Configuration
|
||||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="9 18 15 12 9 6"></polyline>
|
||||
</svg>
|
||||
</a>
|
||||
</div>
|
||||
</div></div><aside class="hidden text-sm xl:block" id="right-sidebar">
|
||||
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
|
||||
<ul>
|
||||
<li><a :data-current="activeSection === '#core-capabilities'" class="reference internal" href="#core-capabilities">Core Capabilities</a></li>
|
||||
<li><a :data-current="activeSection === '#key-benefits'" class="reference internal" href="#key-benefits">Key Benefits</a></li>
|
||||
<li><a :data-current="activeSection === '#common-use-cases'" class="reference internal" href="#common-use-cases">Common Use Cases</a></li>
|
||||
<li><a :data-current="activeSection === '#advanced-features'" class="reference internal" href="#advanced-features">Advanced Features</a></li>
|
||||
<li><a :data-current="activeSection === '#getting-started'" class="reference internal" href="#getting-started">Getting Started</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</aside>
|
||||
</main>
|
||||
</div>
|
||||
</div><footer class="py-6 border-t border-border md:py-0">
|
||||
<div class="container flex flex-col items-center justify-between gap-4 md:h-24 md:flex-row">
|
||||
<div class="flex flex-col items-center gap-4 px-8 md:flex-row md:gap-2 md:px-0">
|
||||
<p class="text-sm leading-loose text-center text-muted-foreground md:text-left">© 2025, Katanemo Labs, Inc Last updated: Sep 19, 2025. </p>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
</div>
|
||||
<script src="../../_static/documentation_options.js?v=a9d256b5"></script>
|
||||
<script src="../../_static/doctools.js?v=9bcbadda"></script>
|
||||
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script defer="defer" src="../../_static/theme.js?v=073f68d9"></script>
|
||||
<script src="../../_static/design-tabs.js?v=f930bc37"></script>
|
||||
</body>
|
||||
</html>
|
||||
448
concepts/llm_providers/model_aliases.html
Executable file
448
concepts/llm_providers/model_aliases.html
Executable file
|
|
@ -0,0 +1,448 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html :class="{'dark': darkMode === 'dark' || (darkMode === 'system' && window.matchMedia('(prefers-color-scheme: dark)').matches)}" class="scroll-smooth" data-content_root="../../" lang="en" x-data="{ darkMode: localStorage.getItem('darkMode') || localStorage.setItem('darkMode', 'system'), activeSection: '' }" x-init="$watch('darkMode', val => localStorage.setItem('darkMode', val))">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
|
||||
<meta charset="utf-8"/>
|
||||
<meta content="white" media="(prefers-color-scheme: light)" name="theme-color"/>
|
||||
<meta content="black" media="(prefers-color-scheme: dark)" name="theme-color"/>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport"/>
|
||||
<title>Model Aliases | Arch Docs v0.3.12</title>
|
||||
<meta content="Model Aliases | Arch Docs v0.3.12" property="og:title"/>
|
||||
<meta content="Model Aliases | Arch Docs v0.3.12" name="twitter:title"/>
|
||||
<link href="../../_static/pygments.css?v=466e7b45" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/theme.css?v=42baaae4" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/sphinx-design.min.css?v=95c83b7e" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/awesome-sphinx-design.css?v=15e0fffa" rel="stylesheet" type="text/css"/>
|
||||
<link href="./docs/concepts/llm_providers/model_aliases.html" rel="canonical"/>
|
||||
<link href="../../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../../search.html" rel="search" title="Search"/>
|
||||
<link href="../prompt_target.html" rel="next" title="Prompt Target"/>
|
||||
<link href="client_libraries.html" rel="prev" title="Client Libraries"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
const userPreference = localStorage.getItem('darkMode');
|
||||
let mode;
|
||||
if (userPreference === 'dark' || window.matchMedia('(prefers-color-scheme: dark)').matches) {
|
||||
mode = 'dark';
|
||||
document.documentElement.classList.add('dark');
|
||||
} else {
|
||||
mode = 'light';
|
||||
}
|
||||
if (!userPreference) {localStorage.setItem('darkMode', mode)}
|
||||
</script>
|
||||
</head>
|
||||
<body :class="{ 'overflow-hidden': showSidebar }" class="min-h-screen font-sans antialiased bg-background text-foreground" x-data="{ showSidebar: false, showScrollTop: false }">
|
||||
<div @click.self="showSidebar = false" class="fixed inset-0 z-50 overflow-hidden bg-background/80 backdrop-blur-sm md:hidden" x-cloak="" x-show="showSidebar"></div><div class="relative flex flex-col min-h-screen" id="page"><a class="absolute top-0 left-0 z-[100] block bg-background p-4 text-xl transition -translate-x-full opacity-0 focus:translate-x-0 focus:opacity-100" href="#content">
|
||||
Skip to content
|
||||
</a><header class="sticky top-0 z-40 w-full border-b shadow-sm border-border supports-backdrop-blur:bg-background/60 bg-background/95 backdrop-blur"><div class="container flex items-center h-14">
|
||||
<div class="hidden mr-4 md:flex">
|
||||
<a class="flex items-center mr-6" href="../../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Arch Docs v0.3.12</span>
|
||||
</a></div><button @click="showSidebar = true" class="inline-flex items-center justify-center h-10 px-0 py-2 mr-2 text-base font-medium transition-colors rounded-md hover:text-accent-foreground hover:bg-transparent md:hidden" type="button">
|
||||
<svg aria-hidden="true" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M152.587 825.087q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440Zm0-203.587q-19.152 0-32.326-13.174T107.087 576q0-19.152 13.174-32.326t32.326-13.174h320q19.152 0 32.326 13.174T518.087 576q0 19.152-13.174 32.326T472.587 621.5h-320Zm0-203.587q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440ZM708.913 576l112.174 112.174q12.674 12.674 12.674 31.826t-12.674 31.826Q808.413 764.5 789.261 764.5t-31.826-12.674l-144-144Q600 594.391 600 576t13.435-31.826l144-144q12.674-12.674 31.826-12.674t31.826 12.674q12.674 12.674 12.674 31.826t-12.674 31.826L708.913 576Z"></path>
|
||||
</svg>
|
||||
<span class="sr-only">Toggle navigation menu</span>
|
||||
</button>
|
||||
<div class="flex items-center justify-between flex-1 space-x-2 sm:space-x-4 md:justify-end">
|
||||
<div class="flex-1 w-full md:w-auto md:flex-none"><form @keydown.k.window.meta="$refs.search.focus()" action="../../search.html" class="relative flex items-center group" id="searchbox" method="get">
|
||||
<input aria-label="Search the docs" class="inline-flex items-center font-medium transition-colors bg-transparent focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 ring-offset-background border border-input hover:bg-accent focus:bg-accent hover:text-accent-foreground focus:text-accent-foreground hover:placeholder-accent-foreground py-2 px-4 relative h-9 w-full justify-start rounded-[0.5rem] text-sm text-muted-foreground sm:pr-12 md:w-40 lg:w-64" id="search-input" name="q" placeholder="Search ..." type="search" x-ref="search"/>
|
||||
<kbd class="pointer-events-none absolute right-1.5 top-2 hidden h-5 select-none text-muted-foreground items-center gap-1 rounded border border-border bg-muted px-1.5 font-mono text-[10px] font-medium opacity-100 sm:flex group-hover:bg-accent group-hover:text-accent-foreground">
|
||||
<span class="text-xs">⌘</span>
|
||||
K
|
||||
</kbd>
|
||||
</form>
|
||||
</div>
|
||||
<nav class="flex items-center space-x-1">
|
||||
<a href="https://github.com/katanemo/arch" rel="noopener nofollow" title="Visit repository on GitHub">
|
||||
<div class="inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md disabled:opacity-50 disabled:pointer-events-none hover:bg-accent hover:text-accent-foreground h-9 w-9">
|
||||
<svg fill="currentColor" height="26px" style="margin-top:-2px;display:inline" viewbox="0 0 45 44" xmlns="http://www.w3.org/2000/svg"><path clip-rule="evenodd" d="M22.477.927C10.485.927.76 10.65.76 22.647c0 9.596 6.223 17.736 14.853 20.608 1.087.2 1.483-.47 1.483-1.047 0-.516-.019-1.881-.03-3.693-6.04 1.312-7.315-2.912-7.315-2.912-.988-2.51-2.412-3.178-2.412-3.178-1.972-1.346.149-1.32.149-1.32 2.18.154 3.327 2.24 3.327 2.24 1.937 3.318 5.084 2.36 6.321 1.803.197-1.403.759-2.36 1.379-2.903-4.823-.548-9.894-2.412-9.894-10.734 0-2.37.847-4.31 2.236-5.828-.224-.55-.969-2.759.214-5.748 0 0 1.822-.584 5.972 2.226 1.732-.482 3.59-.722 5.437-.732 1.845.01 3.703.25 5.437.732 4.147-2.81 5.967-2.226 5.967-2.226 1.185 2.99.44 5.198.217 5.748 1.392 1.517 2.232 3.457 2.232 5.828 0 8.344-5.078 10.18-9.916 10.717.779.67 1.474 1.996 1.474 4.021 0 2.904-.027 5.247-.027 5.96 0 .58.392 1.256 1.493 1.044C37.981 40.375 44.2 32.24 44.2 22.647c0-11.996-9.726-21.72-21.722-21.72" fill="currentColor" fill-rule="evenodd"></path></svg>
|
||||
</div>
|
||||
</a>
|
||||
<button @click="darkMode = darkMode === 'light' ? 'dark' : 'light'" aria-label="Color theme switcher" class="relative inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md hover:bg-accent hover:text-accent-foreground h-9 w-9" type="button">
|
||||
<svg class="absolute transition-all scale-100 rotate-0 dark:-rotate-90 dark:scale-0" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 685q45.456 0 77.228-31.772Q589 621.456 589 576q0-45.456-31.772-77.228Q525.456 467 480 467q-45.456 0-77.228 31.772Q371 530.544 371 576q0 45.456 31.772 77.228Q434.544 685 480 685Zm0 91q-83 0-141.5-58.5T280 576q0-83 58.5-141.5T480 376q83 0 141.5 58.5T680 576q0 83-58.5 141.5T480 776ZM80 621.5q-19.152 0-32.326-13.174T34.5 576q0-19.152 13.174-32.326T80 530.5h80q19.152 0 32.326 13.174T205.5 576q0 19.152-13.174 32.326T160 621.5H80Zm720 0q-19.152 0-32.326-13.174T754.5 576q0-19.152 13.174-32.326T800 530.5h80q19.152 0 32.326 13.174T925.5 576q0 19.152-13.174 32.326T880 621.5h-80Zm-320-320q-19.152 0-32.326-13.174T434.5 256v-80q0-19.152 13.174-32.326T480 130.5q19.152 0 32.326 13.174T525.5 176v80q0 19.152-13.174 32.326T480 301.5Zm0 720q-19.152 0-32.326-13.17Q434.5 995.152 434.5 976v-80q0-19.152 13.174-32.326T480 850.5q19.152 0 32.326 13.174T525.5 896v80q0 19.152-13.174 32.33-13.174 13.17-32.326 13.17ZM222.174 382.065l-43-42Q165.5 327.391 166 308.239t13.174-33.065q13.435-13.674 32.587-13.674t32.065 13.674l42.239 43q12.674 13.435 12.555 31.706-.12 18.272-12.555 31.946-12.674 13.674-31.445 13.413-18.772-.261-32.446-13.174Zm494 494.761-42.239-43q-12.674-13.435-12.674-32.087t12.674-31.565Q686.609 756.5 705.38 757q18.772.5 32.446 13.174l43 41.761Q794.5 824.609 794 843.761t-13.174 33.065Q767.391 890.5 748.239 890.5t-32.065-13.674Zm-42-494.761Q660.5 369.391 661 350.62q.5-18.772 13.174-32.446l41.761-43Q728.609 261.5 747.761 262t33.065 13.174q13.674 13.435 13.674 32.587t-13.674 32.065l-43 42.239q-13.435 12.674-31.706 12.555-18.272-.12-31.946-12.555Zm-495 494.761Q165.5 863.391 165.5 844.239t13.674-32.065l43-42.239q13.435-12.674 32.087-12.674t31.565 12.674Q299.5 782.609 299 801.38q-.5 18.772-13.174 32.446l-41.761 43Q231.391 890.5 212.239 890t-33.065-13.174ZM480 576Z"></path>
|
||||
</svg>
|
||||
<svg class="absolute transition-all scale-0 rotate-90 dark:rotate-0 dark:scale-100" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 936q-151 0-255.5-104.5T120 576q0-138 90-239.5T440 218q25-3 39 18t-1 44q-17 26-25.5 55t-8.5 61q0 90 63 153t153 63q31 0 61.5-9t54.5-25q21-14 43-1.5t19 39.5q-14 138-117.5 229T480 936Zm0-80q88 0 158-48.5T740 681q-20 5-40 8t-40 3q-123 0-209.5-86.5T364 396q0-20 3-40t8-40q-78 32-126.5 102T200 576q0 116 82 198t198 82Zm-10-270Z"></path>
|
||||
</svg>
|
||||
</button>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
<div class="flex-1"><div class="container flex-1 items-start md:grid md:grid-cols-[220px_minmax(0,1fr)] md:gap-6 lg:grid-cols-[240px_minmax(0,1fr)] lg:gap-10"><aside :aria-hidden="!showSidebar" :class="{ 'translate-x-0': showSidebar }" class="fixed inset-y-0 left-0 md:top-14 z-50 md:z-30 bg-background md:bg-transparent transition-all duration-100 -translate-x-full md:translate-x-0 ml-0 p-6 md:p-0 md:-ml-2 md:h-[calc(100vh-3.5rem)] w-5/6 md:w-full shrink-0 overflow-y-auto border-r border-border md:sticky" id="left-sidebar">
|
||||
<a class="!justify-start text-sm md:!hidden bg-background" href="../../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Arch Docs v0.3.12</span>
|
||||
</a>
|
||||
<div class="relative overflow-hidden md:overflow-auto my-4 md:my-0 h-[calc(100vh-8rem)] md:h-auto">
|
||||
<div class="overflow-y-auto h-full w-full relative pr-6">
|
||||
|
||||
<script async="" src="https://www.googletagmanager.com/gtag/js?id=G-K2LXXSX6HB"></script>
|
||||
<script>
|
||||
window.dataLayer = window.dataLayer || [];
|
||||
function gtag(){dataLayer.push(arguments);}
|
||||
gtag('js', new Date());
|
||||
|
||||
gtag('config', 'G-K2LXXSX6HB');
|
||||
</script>
|
||||
<nav class="table w-full min-w-full my-6 lg:my-8">
|
||||
<p class="caption" role="heading"><span class="caption-text">Get Started</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/overview.html">Overview</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/intro_to_arch.html">Intro to Arch</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/quickstart.html">Quickstart</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/quickstart.html#next-steps">Next Steps</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Concepts</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../tech_overview/tech_overview.html">Tech Overview<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1 current" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul class="current" x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/prompt_guard.html">Prompt Guard</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/agent_routing.html">Agent Routing and Hand Off</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/function_calling.html">Function Calling</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/llm_router.html">LLM Routing</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../../guides/observability/observability.html">Observability<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/monitoring.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/access_logging.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Build with Arch</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/agent.html">Agentic Apps</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/rag.html">RAG Apps</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/multi_turn.html">Multi-Turn</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
<button @click="showSidebar = false" class="absolute md:hidden right-4 top-4 rounded-sm opacity-70 transition-opacity hover:opacity-100" type="button">
|
||||
<svg class="h-4 w-4" fill="currentColor" height="24" stroke="none" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 632 284 828q-11 11-28 11t-28-11q-11-11-11-28t11-28l196-196-196-196q-11-11-11-28t11-28q11-11 28-11t28 11l196 196 196-196q11-11 28-11t28 11q11 11 11 28t-11 28L536 576l196 196q11 11 11 28t-11 28q-11 11-28 11t-28-11L480 632Z"></path>
|
||||
</svg>
|
||||
</button>
|
||||
</aside>
|
||||
<main class="relative py-6 lg:gap-10 lg:py-8 xl:grid xl:grid-cols-[1fr_300px]">
|
||||
<div class="w-full min-w-0 mx-auto">
|
||||
<nav aria-label="breadcrumbs" class="flex items-center mb-4 space-x-1 text-sm text-muted-foreground">
|
||||
<a class="overflow-hidden text-ellipsis whitespace-nowrap hover:text-foreground" href="../../index.html">
|
||||
<span class="hidden md:inline">Arch Docs v0.3.12</span>
|
||||
<svg aria-label="Home" class="md:hidden" fill="currentColor" height="18" stroke="none" viewbox="0 96 960 960" width="18" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M240 856h120V616h240v240h120V496L480 316 240 496v360Zm-80 80V456l320-240 320 240v480H520V696h-80v240H160Zm320-350Z"></path>
|
||||
</svg>
|
||||
</a>
|
||||
<div class="mr-1">/</div><a class="hover:text-foreground overflow-hidden text-ellipsis whitespace-nowrap" href="llm_providers.html">LLM Providers</a>
|
||||
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">Model Aliases</span>
|
||||
</nav>
|
||||
<div id="content" role="main">
|
||||
<section id="model-aliases">
|
||||
<span id="id1"></span><h1>Model Aliases<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#model-aliases"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p>Model aliases provide semantic, version-controlled names for your models, enabling cleaner client code, easier model management, and advanced routing capabilities. Instead of using provider-specific model names like <code class="docutils literal notranslate"><span class="pre">gpt-4o-mini</span></code> or <code class="docutils literal notranslate"><span class="pre">claude-3-5-sonnet-20241022</span></code>, you can create meaningful aliases like <code class="docutils literal notranslate"><span class="pre">fast-model</span></code> or <code class="docutils literal notranslate"><span class="pre">arch.summarize.v1</span></code>.</p>
|
||||
<p><strong>Benefits of Model Aliases:</strong></p>
|
||||
<ul class="simple">
|
||||
<li><p><strong>Semantic Naming</strong>: Use descriptive names that reflect the model’s purpose</p></li>
|
||||
<li><p><strong>Version Control</strong>: Implement versioning schemes (e.g., <code class="docutils literal notranslate"><span class="pre">v1</span></code>, <code class="docutils literal notranslate"><span class="pre">v2</span></code>) for model upgrades</p></li>
|
||||
<li><p><strong>Environment Management</strong>: Different aliases can point to different models across environments</p></li>
|
||||
<li><p><strong>Client Simplification</strong>: Clients use consistent, meaningful names regardless of underlying provider</p></li>
|
||||
<li><p><strong>Advanced Routing (Coming Soon)</strong>: Enable guardrails, fallbacks, and traffic splitting at the alias level</p></li>
|
||||
</ul>
|
||||
<section id="basic-configuration">
|
||||
<h2>Basic Configuration<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#basic-configuration" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#basic-configuration'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p><strong>Simple Alias Mapping</strong></p>
|
||||
<div class="literal-block-wrapper docutils container" id="id2">
|
||||
<div class="code-block-caption"><span class="caption-text">Basic Model Aliases</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-4">
|
||||
</span><span id="line-5"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-7">
|
||||
</span><span id="line-8"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-sonnet-20241022</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span><span id="line-10">
|
||||
</span><span id="line-11"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">ollama/llama3.1</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://host.docker.internal:11434</span>
|
||||
</span><span id="line-13">
|
||||
</span><span id="line-14"><span class="c1"># Define aliases that map to the models above</span>
|
||||
</span><span id="line-15"><span class="nt">model_aliases</span><span class="p">:</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="c1"># Semantic versioning approach</span>
|
||||
</span><span id="line-17"><span class="w"> </span><span class="nt">arch.summarize.v1</span><span class="p">:</span>
|
||||
</span><span id="line-18"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span><span id="line-19">
|
||||
</span><span id="line-20"><span class="w"> </span><span class="nt">arch.reasoning.v1</span><span class="p">:</span>
|
||||
</span><span id="line-21"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-22">
|
||||
</span><span id="line-23"><span class="w"> </span><span class="nt">arch.creative.v1</span><span class="p">:</span>
|
||||
</span><span id="line-24"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-5-sonnet-20241022</span>
|
||||
</span><span id="line-25">
|
||||
</span><span id="line-26"><span class="w"> </span><span class="c1"># Functional aliases</span>
|
||||
</span><span id="line-27"><span class="w"> </span><span class="nt">fast-model</span><span class="p">:</span>
|
||||
</span><span id="line-28"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span><span id="line-29">
|
||||
</span><span id="line-30"><span class="w"> </span><span class="nt">smart-model</span><span class="p">:</span>
|
||||
</span><span id="line-31"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-32">
|
||||
</span><span id="line-33"><span class="w"> </span><span class="nt">creative-model</span><span class="p">:</span>
|
||||
</span><span id="line-34"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-5-sonnet-20241022</span>
|
||||
</span><span id="line-35">
|
||||
</span><span id="line-36"><span class="w"> </span><span class="c1"># Local model alias</span>
|
||||
</span><span id="line-37"><span class="w"> </span><span class="nt">local-chat</span><span class="p">:</span>
|
||||
</span><span id="line-38"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">llama3.1</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="using-aliases">
|
||||
<h2>Using Aliases<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#using-aliases" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#using-aliases'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p><strong>Client Code Examples</strong></p>
|
||||
<p>Once aliases are configured, clients can use semantic names instead of provider-specific model names:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id3">
|
||||
<div class="code-block-caption"><span class="caption-text">Python Client Usage</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
|
||||
</span><span id="line-2">
|
||||
</span><span id="line-3"><span class="n">client</span> <span class="o">=</span> <span class="n">OpenAI</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:12000/"</span><span class="p">)</span>
|
||||
</span><span id="line-4">
|
||||
</span><span id="line-5"><span class="c1"># Use semantic alias instead of provider model name</span>
|
||||
</span><span id="line-6"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-7"> <span class="n">model</span><span class="o">=</span><span class="s2">"arch.summarize.v1"</span><span class="p">,</span> <span class="c1"># Points to gpt-4o-mini</span>
|
||||
</span><span id="line-8"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Summarize this document..."</span><span class="p">}]</span>
|
||||
</span><span id="line-9"><span class="p">)</span>
|
||||
</span><span id="line-10">
|
||||
</span><span id="line-11"><span class="c1"># Switch to a different capability</span>
|
||||
</span><span id="line-12"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-13"> <span class="n">model</span><span class="o">=</span><span class="s2">"arch.reasoning.v1"</span><span class="p">,</span> <span class="c1"># Points to gpt-4o</span>
|
||||
</span><span id="line-14"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Solve this complex problem..."</span><span class="p">}]</span>
|
||||
</span><span id="line-15"><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="literal-block-wrapper docutils container" id="id4">
|
||||
<div class="code-block-caption"><span class="caption-text">cURL Example</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id4"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><code><span id="line-1">curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://127.0.0.1:12000/v1/chat/completions<span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-2"><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
</span><span id="line-3"><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
</span><span id="line-4"><span class="s1"> "model": "fast-model",</span>
|
||||
</span><span id="line-5"><span class="s1"> "messages": [{"role": "user", "content": "Hello!"}]</span>
|
||||
</span><span id="line-6"><span class="s1"> }'</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="naming-best-practices">
|
||||
<h2>Naming Best Practices<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#naming-best-practices" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#naming-best-practices'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p><strong>Semantic Versioning</strong></p>
|
||||
<p>Use version numbers for backward compatibility and gradual model upgrades:</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="c1"># Current production version</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">arch.summarize.v1</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span><span id="line-5">
|
||||
</span><span id="line-6"><span class="w"> </span><span class="c1"># Beta version for testing</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">arch.summarize.v2</span><span class="p">:</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-9">
|
||||
</span><span id="line-10"><span class="w"> </span><span class="c1"># Stable alias that always points to latest</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">arch.summarize.latest</span><span class="p">:</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Purpose-Based Naming</strong></p>
|
||||
<p>Create aliases that reflect the intended use case:</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="c1"># Task-specific</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">code-reviewer</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-5">
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">document-summarizer</span><span class="p">:</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span><span id="line-8">
|
||||
</span><span id="line-9"><span class="w"> </span><span class="nt">creative-writer</span><span class="p">:</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-5-sonnet-20241022</span>
|
||||
</span><span id="line-11">
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">data-analyst</span><span class="p">:</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Environment-Specific Aliases</strong></p>
|
||||
<p>Different environments can use different underlying models:</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="c1"># Development environment - use faster/cheaper models</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">dev.chat.v1</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span><span id="line-5">
|
||||
</span><span id="line-6"><span class="w"> </span><span class="c1"># Production environment - use more capable models</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">prod.chat.v1</span><span class="p">:</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-9">
|
||||
</span><span id="line-10"><span class="w"> </span><span class="c1"># Staging environment - test new models</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">staging.chat.v1</span><span class="p">:</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-5-sonnet-20241022</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="advanced-features-coming-soon">
|
||||
<h2>Advanced Features (Coming Soon)<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#advanced-features-coming-soon" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#advanced-features-coming-soon'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>The following features are planned for future releases of model aliases:</p>
|
||||
<p><strong>Guardrails Integration</strong></p>
|
||||
<p>Apply safety, cost, or latency rules at the alias level:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id5">
|
||||
<div class="code-block-caption"><span class="caption-text">Future Feature - Guardrails</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id5"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="nt">arch.reasoning.v1</span><span class="p">:</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-oss-120b</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">guardrails</span><span class="p">:</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">max_latency</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">5s</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">max_cost_per_request</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.10</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">block_categories</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"jailbreak"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"PII"</span><span class="p p-Indicator">]</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="nt">content_filters</span><span class="p">:</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"profanity"</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"sensitive_data"</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<p><strong>Fallback Chains</strong></p>
|
||||
<p>Provide a chain of models if the primary target fails or hits quota limits:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id6">
|
||||
<div class="code-block-caption"><span class="caption-text">Future Feature - Fallbacks</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id6"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="nt">arch.summarize.v1</span><span class="p">:</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">fallbacks</span><span class="p">:</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">llama3.1</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">conditions</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"quota_exceeded"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"timeout"</span><span class="p p-Indicator">]</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-haiku-20240307</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="nt">conditions</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"primary_and_first_fallback_failed"</span><span class="p p-Indicator">]</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<p><strong>Traffic Splitting & Canary Deployments</strong></p>
|
||||
<p>Distribute traffic across multiple models for A/B testing or gradual rollouts:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id7">
|
||||
<div class="code-block-caption"><span class="caption-text">Future Feature - Traffic Splitting</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id7"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="nt">arch.v1</span><span class="p">:</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">targets</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">llama3.1</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">weight</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">80</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">weight</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">20</span>
|
||||
</span><span id="line-8">
|
||||
</span><span id="line-9"><span class="w"> </span><span class="c1"># Canary deployment</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="nt">arch.experimental.v1</span><span class="p">:</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">targets</span><span class="p">:</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span><span class="w"> </span><span class="c1"># Current stable</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">weight</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">95</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">o1-preview</span><span class="w"> </span><span class="c1"># New model being tested</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="nt">weight</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">5</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<p><strong>Load Balancing</strong></p>
|
||||
<p>Distribute requests across multiple instances of the same model:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id8">
|
||||
<div class="code-block-caption"><span class="caption-text">Future Feature - Load Balancing</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id8"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="nt">high-throughput-chat</span><span class="p">:</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">load_balance</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">algorithm</span><span class="p">:</span><span class="w"> </span><span class="s">"round_robin"</span><span class="w"> </span><span class="c1"># or "least_connections", "weighted"</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">targets</span><span class="p">:</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"https://api-1.example.com"</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"https://api-2.example.com"</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"https://api-3.example.com"</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="validation-rules">
|
||||
<h2>Validation Rules<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#validation-rules" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#validation-rules'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<ul class="simple">
|
||||
<li><p>Alias names must be valid identifiers (alphanumeric, dots, hyphens, underscores)</p></li>
|
||||
<li><p>Target models must be defined in the <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> section</p></li>
|
||||
<li><p>Circular references between aliases are not allowed</p></li>
|
||||
<li><p>Weights in traffic splitting must sum to 100</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="see-also">
|
||||
<h2>See Also<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#see-also" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#see-also'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<ul class="simple">
|
||||
<li><p><a class="reference internal" href="llm_providers.html#llm-providers"><span class="std std-ref">LLM Providers</span></a> - Learn about configuring LLM providers</p></li>
|
||||
<li><p><a class="reference internal" href="../../guides/llm_router.html#llm-router"><span class="std std-ref">LLM Routing</span></a> - Understand how aliases work with intelligent routing</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="client_libraries.html">
|
||||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="15 18 9 12 15 6"></polyline>
|
||||
</svg>
|
||||
Client Libraries
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="../prompt_target.html">
|
||||
Prompt Target
|
||||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="9 18 15 12 9 6"></polyline>
|
||||
</svg>
|
||||
</a>
|
||||
</div>
|
||||
</div></div><aside class="hidden text-sm xl:block" id="right-sidebar">
|
||||
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
|
||||
<ul>
|
||||
<li><a :data-current="activeSection === '#basic-configuration'" class="reference internal" href="#basic-configuration">Basic Configuration</a></li>
|
||||
<li><a :data-current="activeSection === '#using-aliases'" class="reference internal" href="#using-aliases">Using Aliases</a></li>
|
||||
<li><a :data-current="activeSection === '#naming-best-practices'" class="reference internal" href="#naming-best-practices">Naming Best Practices</a></li>
|
||||
<li><a :data-current="activeSection === '#advanced-features-coming-soon'" class="reference internal" href="#advanced-features-coming-soon">Advanced Features (Coming Soon)</a></li>
|
||||
<li><a :data-current="activeSection === '#validation-rules'" class="reference internal" href="#validation-rules">Validation Rules</a></li>
|
||||
<li><a :data-current="activeSection === '#see-also'" class="reference internal" href="#see-also">See Also</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</aside>
|
||||
</main>
|
||||
</div>
|
||||
</div><footer class="py-6 border-t border-border md:py-0">
|
||||
<div class="container flex flex-col items-center justify-between gap-4 md:h-24 md:flex-row">
|
||||
<div class="flex flex-col items-center gap-4 px-8 md:flex-row md:gap-2 md:px-0">
|
||||
<p class="text-sm leading-loose text-center text-muted-foreground md:text-left">© 2025, Katanemo Labs, Inc Last updated: Sep 19, 2025. </p>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
</div>
|
||||
<script src="../../_static/documentation_options.js?v=a9d256b5"></script>
|
||||
<script src="../../_static/doctools.js?v=9bcbadda"></script>
|
||||
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script defer="defer" src="../../_static/theme.js?v=073f68d9"></script>
|
||||
<script src="../../_static/design-tabs.js?v=f930bc37"></script>
|
||||
</body>
|
||||
</html>
|
||||
801
concepts/llm_providers/supported_providers.html
Executable file
801
concepts/llm_providers/supported_providers.html
Executable file
|
|
@ -0,0 +1,801 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html :class="{'dark': darkMode === 'dark' || (darkMode === 'system' && window.matchMedia('(prefers-color-scheme: dark)').matches)}" class="scroll-smooth" data-content_root="../../" lang="en" x-data="{ darkMode: localStorage.getItem('darkMode') || localStorage.setItem('darkMode', 'system'), activeSection: '' }" x-init="$watch('darkMode', val => localStorage.setItem('darkMode', val))">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
|
||||
<meta charset="utf-8"/>
|
||||
<meta content="white" media="(prefers-color-scheme: light)" name="theme-color"/>
|
||||
<meta content="black" media="(prefers-color-scheme: dark)" name="theme-color"/>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport"/>
|
||||
<title>Supported Providers & Configuration | Arch Docs v0.3.12</title>
|
||||
<meta content="Supported Providers & Configuration | Arch Docs v0.3.12" property="og:title"/>
|
||||
<meta content="Supported Providers & Configuration | Arch Docs v0.3.12" name="twitter:title"/>
|
||||
<link href="../../_static/pygments.css?v=466e7b45" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/theme.css?v=42baaae4" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/sphinx-design.min.css?v=95c83b7e" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/awesome-sphinx-design.css?v=15e0fffa" rel="stylesheet" type="text/css"/>
|
||||
<link href="./docs/concepts/llm_providers/supported_providers.html" rel="canonical"/>
|
||||
<link href="../../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../../search.html" rel="search" title="Search"/>
|
||||
<link href="client_libraries.html" rel="next" title="Client Libraries"/>
|
||||
<link href="llm_providers.html" rel="prev" title="LLM Providers"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
const userPreference = localStorage.getItem('darkMode');
|
||||
let mode;
|
||||
if (userPreference === 'dark' || window.matchMedia('(prefers-color-scheme: dark)').matches) {
|
||||
mode = 'dark';
|
||||
document.documentElement.classList.add('dark');
|
||||
} else {
|
||||
mode = 'light';
|
||||
}
|
||||
if (!userPreference) {localStorage.setItem('darkMode', mode)}
|
||||
</script>
|
||||
</head>
|
||||
<body :class="{ 'overflow-hidden': showSidebar }" class="min-h-screen font-sans antialiased bg-background text-foreground" x-data="{ showSidebar: false, showScrollTop: false }">
|
||||
<div @click.self="showSidebar = false" class="fixed inset-0 z-50 overflow-hidden bg-background/80 backdrop-blur-sm md:hidden" x-cloak="" x-show="showSidebar"></div><div class="relative flex flex-col min-h-screen" id="page"><a class="absolute top-0 left-0 z-[100] block bg-background p-4 text-xl transition -translate-x-full opacity-0 focus:translate-x-0 focus:opacity-100" href="#content">
|
||||
Skip to content
|
||||
</a><header class="sticky top-0 z-40 w-full border-b shadow-sm border-border supports-backdrop-blur:bg-background/60 bg-background/95 backdrop-blur"><div class="container flex items-center h-14">
|
||||
<div class="hidden mr-4 md:flex">
|
||||
<a class="flex items-center mr-6" href="../../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Arch Docs v0.3.12</span>
|
||||
</a></div><button @click="showSidebar = true" class="inline-flex items-center justify-center h-10 px-0 py-2 mr-2 text-base font-medium transition-colors rounded-md hover:text-accent-foreground hover:bg-transparent md:hidden" type="button">
|
||||
<svg aria-hidden="true" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M152.587 825.087q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440Zm0-203.587q-19.152 0-32.326-13.174T107.087 576q0-19.152 13.174-32.326t32.326-13.174h320q19.152 0 32.326 13.174T518.087 576q0 19.152-13.174 32.326T472.587 621.5h-320Zm0-203.587q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440ZM708.913 576l112.174 112.174q12.674 12.674 12.674 31.826t-12.674 31.826Q808.413 764.5 789.261 764.5t-31.826-12.674l-144-144Q600 594.391 600 576t13.435-31.826l144-144q12.674-12.674 31.826-12.674t31.826 12.674q12.674 12.674 12.674 31.826t-12.674 31.826L708.913 576Z"></path>
|
||||
</svg>
|
||||
<span class="sr-only">Toggle navigation menu</span>
|
||||
</button>
|
||||
<div class="flex items-center justify-between flex-1 space-x-2 sm:space-x-4 md:justify-end">
|
||||
<div class="flex-1 w-full md:w-auto md:flex-none"><form @keydown.k.window.meta="$refs.search.focus()" action="../../search.html" class="relative flex items-center group" id="searchbox" method="get">
|
||||
<input aria-label="Search the docs" class="inline-flex items-center font-medium transition-colors bg-transparent focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 ring-offset-background border border-input hover:bg-accent focus:bg-accent hover:text-accent-foreground focus:text-accent-foreground hover:placeholder-accent-foreground py-2 px-4 relative h-9 w-full justify-start rounded-[0.5rem] text-sm text-muted-foreground sm:pr-12 md:w-40 lg:w-64" id="search-input" name="q" placeholder="Search ..." type="search" x-ref="search"/>
|
||||
<kbd class="pointer-events-none absolute right-1.5 top-2 hidden h-5 select-none text-muted-foreground items-center gap-1 rounded border border-border bg-muted px-1.5 font-mono text-[10px] font-medium opacity-100 sm:flex group-hover:bg-accent group-hover:text-accent-foreground">
|
||||
<span class="text-xs">⌘</span>
|
||||
K
|
||||
</kbd>
|
||||
</form>
|
||||
</div>
|
||||
<nav class="flex items-center space-x-1">
|
||||
<a href="https://github.com/katanemo/arch" rel="noopener nofollow" title="Visit repository on GitHub">
|
||||
<div class="inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md disabled:opacity-50 disabled:pointer-events-none hover:bg-accent hover:text-accent-foreground h-9 w-9">
|
||||
<svg fill="currentColor" height="26px" style="margin-top:-2px;display:inline" viewbox="0 0 45 44" xmlns="http://www.w3.org/2000/svg"><path clip-rule="evenodd" d="M22.477.927C10.485.927.76 10.65.76 22.647c0 9.596 6.223 17.736 14.853 20.608 1.087.2 1.483-.47 1.483-1.047 0-.516-.019-1.881-.03-3.693-6.04 1.312-7.315-2.912-7.315-2.912-.988-2.51-2.412-3.178-2.412-3.178-1.972-1.346.149-1.32.149-1.32 2.18.154 3.327 2.24 3.327 2.24 1.937 3.318 5.084 2.36 6.321 1.803.197-1.403.759-2.36 1.379-2.903-4.823-.548-9.894-2.412-9.894-10.734 0-2.37.847-4.31 2.236-5.828-.224-.55-.969-2.759.214-5.748 0 0 1.822-.584 5.972 2.226 1.732-.482 3.59-.722 5.437-.732 1.845.01 3.703.25 5.437.732 4.147-2.81 5.967-2.226 5.967-2.226 1.185 2.99.44 5.198.217 5.748 1.392 1.517 2.232 3.457 2.232 5.828 0 8.344-5.078 10.18-9.916 10.717.779.67 1.474 1.996 1.474 4.021 0 2.904-.027 5.247-.027 5.96 0 .58.392 1.256 1.493 1.044C37.981 40.375 44.2 32.24 44.2 22.647c0-11.996-9.726-21.72-21.722-21.72" fill="currentColor" fill-rule="evenodd"></path></svg>
|
||||
</div>
|
||||
</a>
|
||||
<button @click="darkMode = darkMode === 'light' ? 'dark' : 'light'" aria-label="Color theme switcher" class="relative inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md hover:bg-accent hover:text-accent-foreground h-9 w-9" type="button">
|
||||
<svg class="absolute transition-all scale-100 rotate-0 dark:-rotate-90 dark:scale-0" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 685q45.456 0 77.228-31.772Q589 621.456 589 576q0-45.456-31.772-77.228Q525.456 467 480 467q-45.456 0-77.228 31.772Q371 530.544 371 576q0 45.456 31.772 77.228Q434.544 685 480 685Zm0 91q-83 0-141.5-58.5T280 576q0-83 58.5-141.5T480 376q83 0 141.5 58.5T680 576q0 83-58.5 141.5T480 776ZM80 621.5q-19.152 0-32.326-13.174T34.5 576q0-19.152 13.174-32.326T80 530.5h80q19.152 0 32.326 13.174T205.5 576q0 19.152-13.174 32.326T160 621.5H80Zm720 0q-19.152 0-32.326-13.174T754.5 576q0-19.152 13.174-32.326T800 530.5h80q19.152 0 32.326 13.174T925.5 576q0 19.152-13.174 32.326T880 621.5h-80Zm-320-320q-19.152 0-32.326-13.174T434.5 256v-80q0-19.152 13.174-32.326T480 130.5q19.152 0 32.326 13.174T525.5 176v80q0 19.152-13.174 32.326T480 301.5Zm0 720q-19.152 0-32.326-13.17Q434.5 995.152 434.5 976v-80q0-19.152 13.174-32.326T480 850.5q19.152 0 32.326 13.174T525.5 896v80q0 19.152-13.174 32.33-13.174 13.17-32.326 13.17ZM222.174 382.065l-43-42Q165.5 327.391 166 308.239t13.174-33.065q13.435-13.674 32.587-13.674t32.065 13.674l42.239 43q12.674 13.435 12.555 31.706-.12 18.272-12.555 31.946-12.674 13.674-31.445 13.413-18.772-.261-32.446-13.174Zm494 494.761-42.239-43q-12.674-13.435-12.674-32.087t12.674-31.565Q686.609 756.5 705.38 757q18.772.5 32.446 13.174l43 41.761Q794.5 824.609 794 843.761t-13.174 33.065Q767.391 890.5 748.239 890.5t-32.065-13.674Zm-42-494.761Q660.5 369.391 661 350.62q.5-18.772 13.174-32.446l41.761-43Q728.609 261.5 747.761 262t33.065 13.174q13.674 13.435 13.674 32.587t-13.674 32.065l-43 42.239q-13.435 12.674-31.706 12.555-18.272-.12-31.946-12.555Zm-495 494.761Q165.5 863.391 165.5 844.239t13.674-32.065l43-42.239q13.435-12.674 32.087-12.674t31.565 12.674Q299.5 782.609 299 801.38q-.5 18.772-13.174 32.446l-41.761 43Q231.391 890.5 212.239 890t-33.065-13.174ZM480 576Z"></path>
|
||||
</svg>
|
||||
<svg class="absolute transition-all scale-0 rotate-90 dark:rotate-0 dark:scale-100" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 936q-151 0-255.5-104.5T120 576q0-138 90-239.5T440 218q25-3 39 18t-1 44q-17 26-25.5 55t-8.5 61q0 90 63 153t153 63q31 0 61.5-9t54.5-25q21-14 43-1.5t19 39.5q-14 138-117.5 229T480 936Zm0-80q88 0 158-48.5T740 681q-20 5-40 8t-40 3q-123 0-209.5-86.5T364 396q0-20 3-40t8-40q-78 32-126.5 102T200 576q0 116 82 198t198 82Zm-10-270Z"></path>
|
||||
</svg>
|
||||
</button>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
<div class="flex-1"><div class="container flex-1 items-start md:grid md:grid-cols-[220px_minmax(0,1fr)] md:gap-6 lg:grid-cols-[240px_minmax(0,1fr)] lg:gap-10"><aside :aria-hidden="!showSidebar" :class="{ 'translate-x-0': showSidebar }" class="fixed inset-y-0 left-0 md:top-14 z-50 md:z-30 bg-background md:bg-transparent transition-all duration-100 -translate-x-full md:translate-x-0 ml-0 p-6 md:p-0 md:-ml-2 md:h-[calc(100vh-3.5rem)] w-5/6 md:w-full shrink-0 overflow-y-auto border-r border-border md:sticky" id="left-sidebar">
|
||||
<a class="!justify-start text-sm md:!hidden bg-background" href="../../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Arch Docs v0.3.12</span>
|
||||
</a>
|
||||
<div class="relative overflow-hidden md:overflow-auto my-4 md:my-0 h-[calc(100vh-8rem)] md:h-auto">
|
||||
<div class="overflow-y-auto h-full w-full relative pr-6">
|
||||
|
||||
<script async="" src="https://www.googletagmanager.com/gtag/js?id=G-K2LXXSX6HB"></script>
|
||||
<script>
|
||||
window.dataLayer = window.dataLayer || [];
|
||||
function gtag(){dataLayer.push(arguments);}
|
||||
gtag('js', new Date());
|
||||
|
||||
gtag('config', 'G-K2LXXSX6HB');
|
||||
</script>
|
||||
<nav class="table w-full min-w-full my-6 lg:my-8">
|
||||
<p class="caption" role="heading"><span class="caption-text">Get Started</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/overview.html">Overview</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/intro_to_arch.html">Intro to Arch</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/quickstart.html">Quickstart</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/quickstart.html#next-steps">Next Steps</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Concepts</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../tech_overview/tech_overview.html">Tech Overview<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1 current" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul class="current" x-show="expanded">
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/prompt_guard.html">Prompt Guard</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/agent_routing.html">Agent Routing and Hand Off</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/function_calling.html">Function Calling</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/llm_router.html">LLM Routing</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../../guides/observability/observability.html">Observability<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/monitoring.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/access_logging.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Build with Arch</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/agent.html">Agentic Apps</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/rag.html">RAG Apps</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/multi_turn.html">Multi-Turn</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
<button @click="showSidebar = false" class="absolute md:hidden right-4 top-4 rounded-sm opacity-70 transition-opacity hover:opacity-100" type="button">
|
||||
<svg class="h-4 w-4" fill="currentColor" height="24" stroke="none" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M480 632 284 828q-11 11-28 11t-28-11q-11-11-11-28t11-28l196-196-196-196q-11-11-11-28t11-28q11-11 28-11t28 11l196 196 196-196q11-11 28-11t28 11q11 11 11 28t-11 28L536 576l196 196q11 11 11 28t-11 28q-11 11-28 11t-28-11L480 632Z"></path>
|
||||
</svg>
|
||||
</button>
|
||||
</aside>
|
||||
<main class="relative py-6 lg:gap-10 lg:py-8 xl:grid xl:grid-cols-[1fr_300px]">
|
||||
<div class="w-full min-w-0 mx-auto">
|
||||
<nav aria-label="breadcrumbs" class="flex items-center mb-4 space-x-1 text-sm text-muted-foreground">
|
||||
<a class="overflow-hidden text-ellipsis whitespace-nowrap hover:text-foreground" href="../../index.html">
|
||||
<span class="hidden md:inline">Arch Docs v0.3.12</span>
|
||||
<svg aria-label="Home" class="md:hidden" fill="currentColor" height="18" stroke="none" viewbox="0 96 960 960" width="18" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M240 856h120V616h240v240h120V496L480 316 240 496v360Zm-80 80V456l320-240 320 240v480H520V696h-80v240H160Zm320-350Z"></path>
|
||||
</svg>
|
||||
</a>
|
||||
<div class="mr-1">/</div><a class="hover:text-foreground overflow-hidden text-ellipsis whitespace-nowrap" href="llm_providers.html">LLM Providers</a>
|
||||
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">Supported Providers & Configuration</span>
|
||||
</nav>
|
||||
<div id="content" role="main">
|
||||
<section id="supported-providers-configuration">
|
||||
<span id="supported-providers"></span><h1>Supported Providers & Configuration<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#supported-providers-configuration"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p>Arch provides first-class support for multiple LLM providers through native integrations and OpenAI-compatible interfaces. This comprehensive guide covers all supported providers, their available chat models, and detailed configuration instructions.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p><strong>Model Support:</strong> Arch supports all chat models from each provider, not just the examples shown in this guide. The configurations below demonstrate common models for reference, but you can use any chat model available from your chosen provider.</p>
|
||||
</div>
|
||||
<section id="configuration-structure">
|
||||
<h2>Configuration Structure<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#configuration-structure" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#configuration-structure'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>All providers are configured in the <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> section of your <code class="docutils literal notranslate"><span class="pre">arch_config.yaml</span></code> file:</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
|
||||
</span><span id="line-2">
|
||||
</span><span id="line-3"><span class="nt">listeners</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">egress_traffic</span><span class="p">:</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">12000</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="nt">timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">30s</span>
|
||||
</span><span id="line-9">
|
||||
</span><span id="line-10"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="c1"># Provider configurations go here</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">provider/model-name</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$API_KEY</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="c1"># Additional provider-specific options</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Common Configuration Fields:</strong></p>
|
||||
<ul class="simple">
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">model</span></code>: Provider prefix and model name (format: <code class="docutils literal notranslate"><span class="pre">provider/model-name</span></code>)</p></li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">access_key</span></code>: API key for authentication (supports environment variables)</p></li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">default</span></code>: Mark a model as the default (optional, boolean)</p></li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">name</span></code>: Custom name for the provider instance (optional)</p></li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">base_url</span></code>: Custom endpoint URL (required for some providers)</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="provider-categories">
|
||||
<h2>Provider Categories<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#provider-categories" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#provider-categories'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p><strong>First-Class Providers</strong>
|
||||
Native integrations with built-in support for provider-specific features and authentication.</p>
|
||||
<p><strong>OpenAI-Compatible Providers</strong>
|
||||
Any provider that implements the OpenAI API interface can be configured using custom endpoints.</p>
|
||||
</section>
|
||||
<section id="supported-api-endpoints">
|
||||
<h2>Supported API Endpoints<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#supported-api-endpoints" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#supported-api-endpoints'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>Arch supports the following standardized endpoints across providers:</p>
|
||||
<table class="docutils align-default">
|
||||
<colgroup>
|
||||
<col style="width: 30.0%"/>
|
||||
<col style="width: 30.0%"/>
|
||||
<col style="width: 40.0%"/>
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Endpoint</p></th>
|
||||
<th class="head"><p>Purpose</p></th>
|
||||
<th class="head"><p>Supported Clients</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">/v1/chat/completions</span></code></p></td>
|
||||
<td><p>OpenAI-style chat completions</p></td>
|
||||
<td><p>OpenAI SDK, cURL, custom clients</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">/v1/messages</span></code></p></td>
|
||||
<td><p>Anthropic-style messages</p></td>
|
||||
<td><p>Anthropic SDK, cURL, custom clients</p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</section>
|
||||
<section id="first-class-providers">
|
||||
<h2>First-Class Providers<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#first-class-providers" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#first-class-providers'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<section id="openai">
|
||||
<h3>OpenAI<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#openai" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#openai'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p><strong>Provider Prefix:</strong> <code class="docutils literal notranslate"><span class="pre">openai/</span></code></p>
|
||||
<p><strong>API Endpoint:</strong> <code class="docutils literal notranslate"><span class="pre">/v1/chat/completions</span></code></p>
|
||||
<p><strong>Authentication:</strong> API Key - Get your OpenAI API key from <a class="reference external" href="https://platform.openai.com/api-keys" rel="nofollow noopener">OpenAI Platform<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>.</p>
|
||||
<p><strong>Supported Chat Models:</strong> All OpenAI chat models including GPT-5, GPT-4o, GPT-4, GPT-3.5-turbo, and all future releases.</p>
|
||||
<table class="docutils align-default">
|
||||
<colgroup>
|
||||
<col style="width: 30.0%"/>
|
||||
<col style="width: 20.0%"/>
|
||||
<col style="width: 50.0%"/>
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Model Name</p></th>
|
||||
<th class="head"><p>Model ID for Config</p></th>
|
||||
<th class="head"><p>Description</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p>GPT-5</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">openai/gpt-5</span></code></p></td>
|
||||
<td><p>Next-generation model (use any model name from OpenAI’s API)</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>GPT-4o</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">openai/gpt-4o</span></code></p></td>
|
||||
<td><p>Latest multimodal model</p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p>GPT-4o mini</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">openai/gpt-4o-mini</span></code></p></td>
|
||||
<td><p>Fast, cost-effective model</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>GPT-4</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">openai/gpt-4</span></code></p></td>
|
||||
<td><p>High-capability reasoning model</p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p>GPT-3.5 Turbo</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">openai/gpt-3.5-turbo</span></code></p></td>
|
||||
<td><p>Balanced performance and cost</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>o3-mini</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">openai/o3-mini</span></code></p></td>
|
||||
<td><p>Reasoning-focused model (preview)</p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p>o3</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">openai/o3</span></code></p></td>
|
||||
<td><p>Advanced reasoning model (preview)</p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p><strong>Configuration Examples:</strong></p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="c1"># Latest models (examples - use any OpenAI chat model)</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-6">
|
||||
</span><span id="line-7"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-9">
|
||||
</span><span id="line-10"><span class="w"> </span><span class="c1"># Use any model name from OpenAI's API</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-5</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="anthropic">
|
||||
<h3>Anthropic<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#anthropic" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#anthropic'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p><strong>Provider Prefix:</strong> <code class="docutils literal notranslate"><span class="pre">anthropic/</span></code></p>
|
||||
<p><strong>API Endpoint:</strong> <code class="docutils literal notranslate"><span class="pre">/v1/messages</span></code></p>
|
||||
<p><strong>Authentication:</strong> API Key - Get your Anthropic API key from <a class="reference external" href="https://console.anthropic.com/settings/keys" rel="nofollow noopener">Anthropic Console<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>.</p>
|
||||
<p><strong>Supported Chat Models:</strong> All Anthropic Claude models including Claude Sonnet 4, Claude 3.5 Sonnet, Claude 3.5 Haiku, Claude 3 Opus, and all future releases.</p>
|
||||
<table class="docutils align-default">
|
||||
<colgroup>
|
||||
<col style="width: 30.0%"/>
|
||||
<col style="width: 20.0%"/>
|
||||
<col style="width: 50.0%"/>
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Model Name</p></th>
|
||||
<th class="head"><p>Model ID for Config</p></th>
|
||||
<th class="head"><p>Description</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p>Claude Sonnet 4</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">anthropic/claude-sonnet-4</span></code></p></td>
|
||||
<td><p>Next-generation model (use any model name from Anthropic’s API)</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>Claude 3.5 Sonnet</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">anthropic/claude-3-5-sonnet-20241022</span></code></p></td>
|
||||
<td><p>Latest high-performance model</p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p>Claude 3.5 Haiku</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">anthropic/claude-3-5-haiku-20241022</span></code></p></td>
|
||||
<td><p>Fast and efficient model</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>Claude 3 Opus</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">anthropic/claude-3-opus-20240229</span></code></p></td>
|
||||
<td><p>Most capable model for complex tasks</p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p>Claude 3 Sonnet</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">anthropic/claude-3-sonnet-20240229</span></code></p></td>
|
||||
<td><p>Balanced performance model</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>Claude 3 Haiku</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">anthropic/claude-3-haiku-20240307</span></code></p></td>
|
||||
<td><p>Fastest model</p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p><strong>Configuration Examples:</strong></p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="c1"># Latest models (examples - use any Anthropic chat model)</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-sonnet-20241022</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span><span id="line-5">
|
||||
</span><span id="line-6"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-haiku-20241022</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span><span id="line-8">
|
||||
</span><span id="line-9"><span class="w"> </span><span class="c1"># Use any model name from Anthropic's API</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-sonnet-4</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="deepseek">
|
||||
<h3>DeepSeek<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#deepseek" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#deepseek'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p><strong>Provider Prefix:</strong> <code class="docutils literal notranslate"><span class="pre">deepseek/</span></code></p>
|
||||
<p><strong>API Endpoint:</strong> <code class="docutils literal notranslate"><span class="pre">/v1/chat/completions</span></code></p>
|
||||
<p><strong>Authentication:</strong> API Key - Get your DeepSeek API key from <a class="reference external" href="https://platform.deepseek.com/api_keys" rel="nofollow noopener">DeepSeek Platform<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>.</p>
|
||||
<p><strong>Supported Chat Models:</strong> All DeepSeek chat models including DeepSeek-Chat, DeepSeek-Coder, and all future releases.</p>
|
||||
<table class="docutils align-default">
|
||||
<colgroup>
|
||||
<col style="width: 30.0%"/>
|
||||
<col style="width: 20.0%"/>
|
||||
<col style="width: 50.0%"/>
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Model Name</p></th>
|
||||
<th class="head"><p>Model ID for Config</p></th>
|
||||
<th class="head"><p>Description</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p>DeepSeek Chat</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">deepseek/deepseek-chat</span></code></p></td>
|
||||
<td><p>General purpose chat model</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>DeepSeek Coder</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">deepseek/deepseek-coder</span></code></p></td>
|
||||
<td><p>Code-specialized model</p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p><strong>Configuration Examples:</strong></p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">deepseek/deepseek-chat</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$DEEPSEEK_API_KEY</span>
|
||||
</span><span id="line-4">
|
||||
</span><span id="line-5"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">deepseek/deepseek-coder</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$DEEPSEEK_API_KEY</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="mistral-ai">
|
||||
<h3>Mistral AI<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#mistral-ai" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#mistral-ai'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p><strong>Provider Prefix:</strong> <code class="docutils literal notranslate"><span class="pre">mistral/</span></code></p>
|
||||
<p><strong>API Endpoint:</strong> <code class="docutils literal notranslate"><span class="pre">/v1/chat/completions</span></code></p>
|
||||
<p><strong>Authentication:</strong> API Key - Get your Mistral API key from <a class="reference external" href="https://console.mistral.ai/api-keys/" rel="nofollow noopener">Mistral AI Console<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>.</p>
|
||||
<p><strong>Supported Chat Models:</strong> All Mistral chat models including Mistral Large, Mistral Small, Ministral, and all future releases.</p>
|
||||
<table class="docutils align-default">
|
||||
<colgroup>
|
||||
<col style="width: 30.0%"/>
|
||||
<col style="width: 20.0%"/>
|
||||
<col style="width: 50.0%"/>
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Model Name</p></th>
|
||||
<th class="head"><p>Model ID for Config</p></th>
|
||||
<th class="head"><p>Description</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p>Mistral Large</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">mistral/mistral-large-latest</span></code></p></td>
|
||||
<td><p>Most capable model</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>Mistral Medium</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">mistral/mistral-medium-latest</span></code></p></td>
|
||||
<td><p>Balanced performance</p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p>Mistral Small</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">mistral/mistral-small-latest</span></code></p></td>
|
||||
<td><p>Fast and efficient</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>Ministral 3B</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">mistral/ministral-3b-latest</span></code></p></td>
|
||||
<td><p>Compact model</p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p><strong>Configuration Examples:</strong>
|
||||
<strong>Configuration Examples:</strong></p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral/mistral-large-latest</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_API_KEY</span>
|
||||
</span><span id="line-4">
|
||||
</span><span id="line-5"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral/mistral-small-latest</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_API_KEY</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="groq">
|
||||
<h3>Groq<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#groq" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#groq'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p><strong>Provider Prefix:</strong> <code class="docutils literal notranslate"><span class="pre">groq/</span></code></p>
|
||||
<p><strong>API Endpoint:</strong> <code class="docutils literal notranslate"><span class="pre">/openai/v1/chat/completions</span></code> (transformed internally)</p>
|
||||
<p><strong>Authentication:</strong> API Key - Get your Groq API key from <a class="reference external" href="https://console.groq.com/keys" rel="nofollow noopener">Groq Console<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>.</p>
|
||||
<p><strong>Supported Chat Models:</strong> All Groq chat models including Llama 3, Mixtral, Gemma, and all future releases.</p>
|
||||
<table class="docutils align-default">
|
||||
<colgroup>
|
||||
<col style="width: 30.0%"/>
|
||||
<col style="width: 20.0%"/>
|
||||
<col style="width: 50.0%"/>
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Model Name</p></th>
|
||||
<th class="head"><p>Model ID for Config</p></th>
|
||||
<th class="head"><p>Description</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p>Llama 3.1 8B</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">groq/llama3-8b-8192</span></code></p></td>
|
||||
<td><p>Fast inference Llama model</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>Llama 3.1 70B</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">groq/llama3-70b-8192</span></code></p></td>
|
||||
<td><p>Larger Llama model</p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p>Mixtral 8x7B</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">groq/mixtral-8x7b-32768</span></code></p></td>
|
||||
<td><p>Mixture of experts model</p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p><strong>Configuration Examples:</strong></p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">groq/llama3-8b-8192</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$GROQ_API_KEY</span>
|
||||
</span><span id="line-4">
|
||||
</span><span id="line-5"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">groq/mixtral-8x7b-32768</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$GROQ_API_KEY</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="google-gemini">
|
||||
<h3>Google Gemini<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#google-gemini" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#google-gemini'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p><strong>Provider Prefix:</strong> <code class="docutils literal notranslate"><span class="pre">gemini/</span></code></p>
|
||||
<p><strong>API Endpoint:</strong> <code class="docutils literal notranslate"><span class="pre">/v1beta/openai/chat/completions</span></code> (transformed internally)</p>
|
||||
<p><strong>Authentication:</strong> API Key - Get your Google AI API key from <a class="reference external" href="https://aistudio.google.com/app/apikey" rel="nofollow noopener">Google AI Studio<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>.</p>
|
||||
<p><strong>Supported Chat Models:</strong> All Google Gemini chat models including Gemini 1.5 Pro, Gemini 1.5 Flash, and all future releases.</p>
|
||||
<table class="docutils align-default">
|
||||
<colgroup>
|
||||
<col style="width: 30.0%"/>
|
||||
<col style="width: 20.0%"/>
|
||||
<col style="width: 50.0%"/>
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Model Name</p></th>
|
||||
<th class="head"><p>Model ID for Config</p></th>
|
||||
<th class="head"><p>Description</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p>Gemini 1.5 Pro</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">gemini/gemini-1.5-pro</span></code></p></td>
|
||||
<td><p>Advanced reasoning and creativity</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>Gemini 1.5 Flash</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">gemini/gemini-1.5-flash</span></code></p></td>
|
||||
<td><p>Fast and efficient model</p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p><strong>Configuration Examples:</strong></p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gemini/gemini-1.5-pro</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$GOOGLE_API_KEY</span>
|
||||
</span><span id="line-4">
|
||||
</span><span id="line-5"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gemini/gemini-1.5-flash</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$GOOGLE_API_KEY</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="together-ai">
|
||||
<h3>Together AI<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#together-ai" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#together-ai'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p><strong>Provider Prefix:</strong> <code class="docutils literal notranslate"><span class="pre">together_ai/</span></code></p>
|
||||
<p><strong>API Endpoint:</strong> <code class="docutils literal notranslate"><span class="pre">/v1/chat/completions</span></code></p>
|
||||
<p><strong>Authentication:</strong> API Key - Get your Together AI API key from <a class="reference external" href="https://api.together.xyz/settings/api-keys" rel="nofollow noopener">Together AI Settings<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>.</p>
|
||||
<p><strong>Supported Chat Models:</strong> All Together AI chat models including Llama, CodeLlama, Mixtral, Qwen, and hundreds of other open-source models.</p>
|
||||
<table class="docutils align-default">
|
||||
<colgroup>
|
||||
<col style="width: 30.0%"/>
|
||||
<col style="width: 20.0%"/>
|
||||
<col style="width: 50.0%"/>
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Model Name</p></th>
|
||||
<th class="head"><p>Model ID for Config</p></th>
|
||||
<th class="head"><p>Description</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p>Meta Llama 2 7B</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">together_ai/meta-llama/Llama-2-7b-chat-hf</span></code></p></td>
|
||||
<td><p>Open source chat model</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>Meta Llama 2 13B</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">together_ai/meta-llama/Llama-2-13b-chat-hf</span></code></p></td>
|
||||
<td><p>Larger open source model</p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p>Code Llama 34B</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">together_ai/codellama/CodeLlama-34b-Instruct-hf</span></code></p></td>
|
||||
<td><p>Code-specialized model</p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p><strong>Configuration Examples:</strong></p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">together_ai/meta-llama/Llama-2-7b-chat-hf</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$TOGETHER_API_KEY</span>
|
||||
</span><span id="line-4">
|
||||
</span><span id="line-5"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">together_ai/codellama/CodeLlama-34b-Instruct-hf</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$TOGETHER_API_KEY</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="xai">
|
||||
<h3>xAI<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#xai" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#xai'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p><strong>Provider Prefix:</strong> <code class="docutils literal notranslate"><span class="pre">xai/</span></code></p>
|
||||
<p><strong>API Endpoint:</strong> <code class="docutils literal notranslate"><span class="pre">/v1/chat/completions</span></code></p>
|
||||
<p><strong>Authentication:</strong> API Key - Get your xAI API key from <a class="reference external" href="https://console.x.ai/" rel="nofollow noopener">xAI Console<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>.</p>
|
||||
<p><strong>Supported Chat Models:</strong> All xAI chat models including Grok Beta and all future releases.</p>
|
||||
<table class="docutils align-default">
|
||||
<colgroup>
|
||||
<col style="width: 30.0%"/>
|
||||
<col style="width: 20.0%"/>
|
||||
<col style="width: 50.0%"/>
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Model Name</p></th>
|
||||
<th class="head"><p>Model ID for Config</p></th>
|
||||
<th class="head"><p>Description</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p>Grok Beta</p></td>
|
||||
<td><p><code class="docutils literal notranslate"><span class="pre">xai/grok-beta</span></code></p></td>
|
||||
<td><p>Conversational AI model</p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p><strong>Configuration Examples:</strong></p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">xai/grok-beta</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$XAI_API_KEY</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="providers-requiring-base-url">
|
||||
<h2>Providers Requiring Base URL<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#providers-requiring-base-url" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#providers-requiring-base-url'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<section id="azure-openai">
|
||||
<h3>Azure OpenAI<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#azure-openai" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#azure-openai'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p><strong>Provider Prefix:</strong> <code class="docutils literal notranslate"><span class="pre">azure_openai/</span></code></p>
|
||||
<p><strong>API Endpoint:</strong> <code class="docutils literal notranslate"><span class="pre">/openai/deployments/{deployment-name}/chat/completions</span></code> (constructed automatically)</p>
|
||||
<p><strong>Authentication:</strong> API Key + Base URL - Get your Azure OpenAI API key from <a class="reference external" href="https://portal.azure.com/" rel="nofollow noopener">Azure Portal<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a> → Your OpenAI Resource → Keys and Endpoint.</p>
|
||||
<p><strong>Supported Chat Models:</strong> All Azure OpenAI chat models including GPT-4o, GPT-4, GPT-3.5-turbo deployed in your Azure subscription.</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="c1"># Single deployment</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">azure_openai/gpt-4o</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$AZURE_OPENAI_API_KEY</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">https://your-resource.openai.azure.com</span>
|
||||
</span><span id="line-6">
|
||||
</span><span id="line-7"><span class="w"> </span><span class="c1"># Multiple deployments</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">azure_openai/gpt-4o-mini</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$AZURE_OPENAI_API_KEY</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">https://your-resource.openai.azure.com</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="ollama">
|
||||
<h3>Ollama<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#ollama" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#ollama'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p><strong>Provider Prefix:</strong> <code class="docutils literal notranslate"><span class="pre">ollama/</span></code></p>
|
||||
<p><strong>API Endpoint:</strong> <code class="docutils literal notranslate"><span class="pre">/v1/chat/completions</span></code> (Ollama’s OpenAI-compatible endpoint)</p>
|
||||
<p><strong>Authentication:</strong> None (Base URL only) - Install Ollama from <a class="reference external" href="https://ollama.com/" rel="nofollow noopener">Ollama.com<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a> and pull your desired models.</p>
|
||||
<p><strong>Supported Chat Models:</strong> All chat models available in your local Ollama installation. Use <code class="docutils literal notranslate"><span class="pre">ollama</span> <span class="pre">list</span></code> to see installed models.</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="c1"># Local Ollama installation</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">ollama/llama3.1</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://localhost:11434</span>
|
||||
</span><span id="line-5">
|
||||
</span><span id="line-6"><span class="w"> </span><span class="c1"># Ollama in Docker (from host)</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">ollama/codellama</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://host.docker.internal:11434</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="openai-compatible-providers">
|
||||
<h3>OpenAI-Compatible Providers<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#openai-compatible-providers" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#openai-compatible-providers'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p><strong>Supported Models:</strong> Any chat models from providers that implement the OpenAI Chat Completions API standard.</p>
|
||||
<p>For providers that implement the OpenAI API but aren’t natively supported:</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="c1"># Generic OpenAI-compatible provider</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">custom-provider/custom-model</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">https://api.customprovider.com</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">provider_interface</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$CUSTOM_API_KEY</span>
|
||||
</span><span id="line-7">
|
||||
</span><span id="line-8"><span class="w"> </span><span class="c1"># Local deployment</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">local/llama2-7b</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://localhost:8000</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">provider_interface</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="advanced-configuration">
|
||||
<h2>Advanced Configuration<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#advanced-configuration" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#advanced-configuration'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<section id="multiple-provider-instances">
|
||||
<h3>Multiple Provider Instances<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#multiple-provider-instances" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#multiple-provider-instances'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Configure multiple instances of the same provider:</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="c1"># Production OpenAI</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_PROD_KEY</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai-prod</span>
|
||||
</span><span id="line-6">
|
||||
</span><span id="line-7"><span class="w"> </span><span class="c1"># Development OpenAI (different key/quota)</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_DEV_KEY</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai-dev</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="default-model-configuration">
|
||||
<h3>Default Model Configuration<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#default-model-configuration" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#default-model-configuration'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Mark one model as the default for fallback scenarios:</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Used when no specific model is requested</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="routing-preferences">
|
||||
<h3>Routing Preferences<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#routing-preferences" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#routing-preferences'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Configure routing preferences for dynamic model selection:</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">complex_reasoning</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">deep analysis, mathematical problem solving, and logical reasoning</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">code_review</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reviewing and analyzing existing code for bugs and improvements</span>
|
||||
</span><span id="line-9">
|
||||
</span><span id="line-10"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-sonnet-20241022</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative_writing</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative content generation, storytelling, and writing assistance</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="model-selection-guidelines">
|
||||
<h2>Model Selection Guidelines<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#model-selection-guidelines" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#model-selection-guidelines'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p><strong>For Production Applications:</strong>
|
||||
- <strong>High Performance</strong>: OpenAI GPT-4o, Anthropic Claude 3.5 Sonnet
|
||||
- <strong>Cost-Effective</strong>: OpenAI GPT-4o mini, Anthropic Claude 3.5 Haiku
|
||||
- <strong>Code Tasks</strong>: DeepSeek Coder, Together AI Code Llama
|
||||
- <strong>Local Deployment</strong>: Ollama with Llama 3.1 or Code Llama</p>
|
||||
<p><strong>For Development/Testing:</strong>
|
||||
- <strong>Fast Iteration</strong>: Groq models (optimized inference)
|
||||
- <strong>Local Testing</strong>: Ollama models
|
||||
- <strong>Cost Control</strong>: Smaller models like GPT-4o mini or Mistral Small</p>
|
||||
</section>
|
||||
<section id="see-also">
|
||||
<h2>See Also<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#see-also" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#see-also'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<ul class="simple">
|
||||
<li><p><a class="reference internal" href="client_libraries.html#client-libraries"><span class="std std-ref">Client Libraries</span></a> - Using different client libraries with providers</p></li>
|
||||
<li><p><a class="reference internal" href="model_aliases.html#model-aliases"><span class="std std-ref">Model Aliases</span></a> - Creating semantic model names</p></li>
|
||||
<li><p><a class="reference internal" href="../../guides/llm_router.html#llm-router"><span class="std std-ref">LLM Routing</span></a> - Setting up intelligent routing</p></li>
|
||||
<li><p><a class="reference internal" href="client_libraries.html#client-libraries"><span class="std std-ref">Client Libraries</span></a> - Using different client libraries</p></li>
|
||||
<li><p><a class="reference internal" href="model_aliases.html#model-aliases"><span class="std std-ref">Model Aliases</span></a> - Creating semantic model names</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="llm_providers.html">
|
||||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="15 18 9 12 15 6"></polyline>
|
||||
</svg>
|
||||
LLM Providers
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="client_libraries.html">
|
||||
Client Libraries
|
||||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="9 18 15 12 9 6"></polyline>
|
||||
</svg>
|
||||
</a>
|
||||
</div>
|
||||
</div></div><aside class="hidden text-sm xl:block" id="right-sidebar">
|
||||
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
|
||||
<ul>
|
||||
<li><a :data-current="activeSection === '#configuration-structure'" class="reference internal" href="#configuration-structure">Configuration Structure</a></li>
|
||||
<li><a :data-current="activeSection === '#provider-categories'" class="reference internal" href="#provider-categories">Provider Categories</a></li>
|
||||
<li><a :data-current="activeSection === '#supported-api-endpoints'" class="reference internal" href="#supported-api-endpoints">Supported API Endpoints</a></li>
|
||||
<li><a :data-current="activeSection === '#first-class-providers'" class="reference internal" href="#first-class-providers">First-Class Providers</a><ul>
|
||||
<li><a :data-current="activeSection === '#openai'" class="reference internal" href="#openai">OpenAI</a></li>
|
||||
<li><a :data-current="activeSection === '#anthropic'" class="reference internal" href="#anthropic">Anthropic</a></li>
|
||||
<li><a :data-current="activeSection === '#deepseek'" class="reference internal" href="#deepseek">DeepSeek</a></li>
|
||||
<li><a :data-current="activeSection === '#mistral-ai'" class="reference internal" href="#mistral-ai">Mistral AI</a></li>
|
||||
<li><a :data-current="activeSection === '#groq'" class="reference internal" href="#groq">Groq</a></li>
|
||||
<li><a :data-current="activeSection === '#google-gemini'" class="reference internal" href="#google-gemini">Google Gemini</a></li>
|
||||
<li><a :data-current="activeSection === '#together-ai'" class="reference internal" href="#together-ai">Together AI</a></li>
|
||||
<li><a :data-current="activeSection === '#xai'" class="reference internal" href="#xai">xAI</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a :data-current="activeSection === '#providers-requiring-base-url'" class="reference internal" href="#providers-requiring-base-url">Providers Requiring Base URL</a><ul>
|
||||
<li><a :data-current="activeSection === '#azure-openai'" class="reference internal" href="#azure-openai">Azure OpenAI</a></li>
|
||||
<li><a :data-current="activeSection === '#ollama'" class="reference internal" href="#ollama">Ollama</a></li>
|
||||
<li><a :data-current="activeSection === '#openai-compatible-providers'" class="reference internal" href="#openai-compatible-providers">OpenAI-Compatible Providers</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a :data-current="activeSection === '#advanced-configuration'" class="reference internal" href="#advanced-configuration">Advanced Configuration</a><ul>
|
||||
<li><a :data-current="activeSection === '#multiple-provider-instances'" class="reference internal" href="#multiple-provider-instances">Multiple Provider Instances</a></li>
|
||||
<li><a :data-current="activeSection === '#default-model-configuration'" class="reference internal" href="#default-model-configuration">Default Model Configuration</a></li>
|
||||
<li><a :data-current="activeSection === '#routing-preferences'" class="reference internal" href="#routing-preferences">Routing Preferences</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a :data-current="activeSection === '#model-selection-guidelines'" class="reference internal" href="#model-selection-guidelines">Model Selection Guidelines</a></li>
|
||||
<li><a :data-current="activeSection === '#see-also'" class="reference internal" href="#see-also">See Also</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</aside>
|
||||
</main>
|
||||
</div>
|
||||
</div><footer class="py-6 border-t border-border md:py-0">
|
||||
<div class="container flex flex-col items-center justify-between gap-4 md:h-24 md:flex-row">
|
||||
<div class="flex flex-col items-center gap-4 px-8 md:flex-row md:gap-2 md:px-0">
|
||||
<p class="text-sm leading-loose text-center text-muted-foreground md:text-left">© 2025, Katanemo Labs, Inc Last updated: Sep 19, 2025. </p>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
</div>
|
||||
<script src="../../_static/documentation_options.js?v=a9d256b5"></script>
|
||||
<script src="../../_static/doctools.js?v=9bcbadda"></script>
|
||||
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script defer="defer" src="../../_static/theme.js?v=073f68d9"></script>
|
||||
<script src="../../_static/design-tabs.js?v=f930bc37"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -19,7 +19,7 @@
|
|||
<link href="../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../search.html" rel="search" title="Search"/>
|
||||
<link href="../guides/prompt_guard.html" rel="next" title="Prompt Guard"/>
|
||||
<link href="llm_provider.html" rel="prev" title="LLM Provider"/>
|
||||
<link href="llm_providers/model_aliases.html" rel="prev" title="Model Aliases"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
const userPreference = localStorage.getItem('darkMode');
|
||||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1 current"><a class="current reference internal" href="#">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
@ -323,11 +328,11 @@ Each parameter can be marked as required or optional. Here is a full list of par
|
|||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="llm_provider.html">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="llm_providers/model_aliases.html">
|
||||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="15 18 9 12 15 6"></polyline>
|
||||
</svg>
|
||||
LLM Provider
|
||||
Model Aliases
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@
|
|||
<link href="./docs/concepts/tech_overview/error_target.html" rel="canonical"/>
|
||||
<link href="../../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../../search.html" rel="search" title="Search"/>
|
||||
<link href="../llm_provider.html" rel="next" title="LLM Provider"/>
|
||||
<link href="../llm_providers/llm_providers.html" rel="next" title="LLM Providers"/>
|
||||
<link href="request_lifecycle.html" rel="prev" title="Request Lifecycle"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
@ -212,8 +217,8 @@ The errors are communicated to the application via headers like <code class="doc
|
|||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="../llm_provider.html">
|
||||
LLM Provider
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="../llm_providers/llm_providers.html">
|
||||
LLM Providers
|
||||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="9 18 15 12 9 6"></polyline>
|
||||
</svg>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
@ -176,7 +181,7 @@ see <a class="reference internal" href="prompt.html#arch-overview-prompt-handlin
|
|||
<p>Arch automatically configures a listener to route requests from your application to upstream LLM API providers (or hosts).
|
||||
When you start Arch, it creates a listener for egress traffic based on the presence of the <code class="docutils literal notranslate"><span class="pre">listener</span></code> configuration
|
||||
section in the configuration file. Arch binds itself to a local address such as <code class="docutils literal notranslate"><span class="pre">127.0.0.1:12000/v1</span></code> or a DNS-based
|
||||
address like <code class="docutils literal notranslate"><span class="pre">arch.local:12000/v1</span></code> for outgoing traffic. For more details on LLM providers, read <a class="reference internal" href="../llm_provider.html#llm-provider"><span class="std std-ref">here</span></a>.</p>
|
||||
address like <code class="docutils literal notranslate"><span class="pre">arch.local:12000/v1</span></code> for outgoing traffic. For more details on LLM providers, read <a class="reference internal" href="../llm_providers/llm_providers.html#llm-providers"><span class="std std-ref">here</span></a>.</p>
|
||||
</section>
|
||||
<section id="configure-listener">
|
||||
<h2>Configure Listener<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#configure-listener" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#configure-listener'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
@ -178,7 +183,7 @@ code to LLMs.</p>
|
|||
<p class="admonition-title">Note</p>
|
||||
<p>When you start Arch, you specify a listener address/port that you want to bind downstream. But, Arch uses are predefined port
|
||||
that you can use (<code class="docutils literal notranslate"><span class="pre">127.0.0.1:12000</span></code>) to proxy egress calls originating from your application to LLMs (API-based or hosted).
|
||||
For more details, check out <a class="reference internal" href="../llm_provider.html#llm-provider"><span class="std std-ref">LLM provider</span></a>.</p>
|
||||
For more details, check out <a class="reference internal" href="../llm_providers/llm_providers.html#llm-providers"><span class="std std-ref">LLM providers</span></a>.</p>
|
||||
</div>
|
||||
<p><strong>Prompt Target</strong>: Arch offers a primitive called <a class="reference internal" href="../prompt_target.html#prompt-target"><span class="std std-ref">prompt target</span></a> to help separate business logic from
|
||||
undifferentiated work in building generative AI apps. Prompt targets are endpoints that receive prompts that are processed by Arch.
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
@ -157,8 +162,8 @@
|
|||
<div id="content" role="main">
|
||||
<section id="overview">
|
||||
<span id="id1"></span><h1>Overview<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#overview"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p><a class="reference external" href="https://github.com/katanemo/arch" rel="nofollow noopener">Arch<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a> is a smart edge and AI gateway for AI-native apps - one that is natively designed to handle and process prompts, not just network traffic.</p>
|
||||
<p>Built by contributors to the widely adopted <a class="reference external" href="https://www.envoyproxy.io/" rel="nofollow noopener">Envoy Proxy<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>, Arch handles the <em>pesky low-level work</em> in building agentic apps — like applying guardrails, clarifying vague user input, routing prompts to the right agent, and unifying access to any LLM. It’s a language and framework friendly infrastructure layer designed to help you build and ship agentic apps faster.</p>
|
||||
<p><a class="reference external" href="https://github.com/katanemo/arch" rel="nofollow noopener">Arch<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a> is a smart edge and AI gateway for AI agents - one that is natively designed to handle and process prompts, not just network traffic.</p>
|
||||
<p>Built by contributors to the widely adopted <a class="reference external" href="https://www.envoyproxy.io/" rel="nofollow noopener">Envoy Proxy<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>, Arch handles the <em>pesky low-level work</em> in building agentic apps — like applying guardrails, clarifying vague user input, routing prompts to the right agent, and unifying access to any LLM. It’s a protocol-friendly and framework-agnostic infrastructure layer designed to help you build and ship agentic apps faster.</p>
|
||||
<p>In this documentation, you will learn how to quickly set up Arch to trigger API calls via prompts, apply prompt guardrails without writing any application-level logic,
|
||||
simplify the interaction with upstream LLMs, and improve observability all while simplifying your application development process.</p>
|
||||
<figure class="align-center" id="id2">
|
||||
|
|
@ -221,10 +226,10 @@ simplify the interaction with upstream LLMs, and improve observability all while
|
|||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
<div class="sd-card-body docutils">
|
||||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-webhook" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M5.5 4.25a2.25 2.25 0 0 1 4.5 0 .75.75 0 0 0 1.5 0 3.75 3.75 0 1 0-6.14 2.889l-2.272 4.258a.75.75 0 0 0 1.324.706L7 7.25a.75.75 0 0 0-.309-1.015A2.25 2.25 0 0 1 5.5 4.25Z"></path><path d="M7.364 3.607a.75.75 0 0 1 1.03.257l2.608 4.349a3.75 3.75 0 1 1-.628 6.785.75.75 0 0 1 .752-1.299 2.25 2.25 0 1 0-.033-3.88.75.75 0 0 1-1.03-.256L7.107 4.636a.75.75 0 0 1 .257-1.03Z"></path><path d="M2.9 8.776A.75.75 0 0 1 2.625 9.8 2.25 2.25 0 1 0 6 11.75a.75.75 0 0 1 .75-.751h5.5a.75.75 0 0 1 0 1.5H7.425a3.751 3.751 0 1 1-5.55-3.998.75.75 0 0 1 1.024.274Z"></path></svg> LLM Provider</div>
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-webhook" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M5.5 4.25a2.25 2.25 0 0 1 4.5 0 .75.75 0 0 0 1.5 0 3.75 3.75 0 1 0-6.14 2.889l-2.272 4.258a.75.75 0 0 0 1.324.706L7 7.25a.75.75 0 0 0-.309-1.015A2.25 2.25 0 0 1 5.5 4.25Z"></path><path d="M7.364 3.607a.75.75 0 0 1 1.03.257l2.608 4.349a3.75 3.75 0 1 1-.628 6.785.75.75 0 0 1 .752-1.299 2.25 2.25 0 1 0-.033-3.88.75.75 0 0 1-1.03-.256L7.107 4.636a.75.75 0 0 1 .257-1.03Z"></path><path d="M2.9 8.776A.75.75 0 0 1 2.625 9.8 2.25 2.25 0 1 0 6 11.75a.75.75 0 0 1 .75-.751h5.5a.75.75 0 0 1 0 1.5H7.425a3.751 3.751 0 1 1-5.55-3.998.75.75 0 0 1 1.024.274Z"></path></svg> LLM Providers</div>
|
||||
<p class="sd-card-text">Explore Arch’s LLM integration options</p>
|
||||
</div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../concepts/llm_provider.html"><span>../concepts/llm_provider.html</span></a></div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../concepts/llm_providers/llm_providers.html"><span>../concepts/llm_providers/llm_providers.html</span></a></div>
|
||||
</div>
|
||||
<div class="sd-col sd-d-flex-row docutils">
|
||||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
@ -296,7 +301,7 @@ It will automatically validate parameters, and ensure that the required paramete
|
|||
</figcaption>
|
||||
</figure>
|
||||
<p>Once a downstream function (API) is called, Arch Gateway takes the response and sends it an upstream LLM to complete the request (for summarization, Q/A, text generation tasks).
|
||||
For more details on how Arch Gateway enables you to centralize usage of LLMs, please read <a class="reference internal" href="../concepts/llm_provider.html#llm-provider"><span class="std std-ref">LLM providers</span></a>.</p>
|
||||
For more details on how Arch Gateway enables you to centralize usage of LLMs, please read <a class="reference internal" href="../concepts/llm_providers/llm_providers.html#llm-providers"><span class="std std-ref">LLM providers</span></a>.</p>
|
||||
<p>By completing these steps, you enable Arch to manage the process from validation to response, ensuring users receive consistent, reliable results - and that you are focused
|
||||
on the stuff that matters most.</p>
|
||||
</section>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
@ -158,15 +163,111 @@
|
|||
<section id="llm-routing">
|
||||
<span id="llm-router"></span><h1>LLM Routing<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#llm-routing"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p>With the rapid proliferation of large language models (LLM) — each optimized for different strengths, style, or latency/cost profile — routing has become an essential technique to operationalize the use of different models.</p>
|
||||
<p>Arch Router is an intelligent routing system that automatically selects the most appropriate LLM for each user request based on user-defined usage preferences. Specifically Arch-Router guides model selection by matching queries to user-defined domains (e.g., finance and healthcare) and action types (e.g., code generation, image editing, etc.).
|
||||
Our preference-aligned approach matches practical definitions of performance in the real world and makes routing decisions more transparent and adaptable.</p>
|
||||
<p>Arch provides three distinct routing approaches to meet different use cases:</p>
|
||||
<ol class="arabic simple">
|
||||
<li><p><strong>Model-based Routing</strong>: Direct routing to specific models using provider/model names</p></li>
|
||||
<li><p><strong>Alias-based Routing</strong>: Semantic routing using custom aliases that map to underlying models</p></li>
|
||||
<li><p><strong>Preference-aligned Routing</strong>: Intelligent routing using the Arch-Router model based on context and user-defined preferences</p></li>
|
||||
</ol>
|
||||
<p>This enables optimal performance, cost efficiency, and response quality by matching requests with the most suitable model from your available LLM fleet.</p>
|
||||
<section id="routing-workflow">
|
||||
<h2>Routing Workflow<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#routing-workflow" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#routing-workflow'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<section id="routing-methods">
|
||||
<h2>Routing Methods<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#routing-methods" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#routing-methods'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<section id="model-based-routing">
|
||||
<h3>Model-based Routing<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#model-based-routing" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#model-based-routing'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Direct routing allows you to specify exact provider and model combinations using the format <code class="docutils literal notranslate"><span class="pre">provider/model-name</span></code>:</p>
|
||||
<ul class="simple">
|
||||
<li><p>Use provider-specific names like <code class="docutils literal notranslate"><span class="pre">openai/gpt-4o</span></code> or <code class="docutils literal notranslate"><span class="pre">anthropic/claude-3-5-sonnet-20241022</span></code></p></li>
|
||||
<li><p>Provides full control and transparency over which model handles each request</p></li>
|
||||
<li><p>Ideal for production workloads where you want predictable routing behavior</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="alias-based-routing">
|
||||
<h3>Alias-based Routing<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#alias-based-routing" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#alias-based-routing'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Alias-based routing lets you create semantic model names that decouple your application from specific providers:</p>
|
||||
<ul class="simple">
|
||||
<li><p>Use meaningful names like <code class="docutils literal notranslate"><span class="pre">fast-model</span></code>, <code class="docutils literal notranslate"><span class="pre">reasoning-model</span></code>, or <code class="docutils literal notranslate"><span class="pre">arch.summarize.v1</span></code> (see <a class="reference internal" href="../concepts/llm_providers/model_aliases.html#model-aliases"><span class="std std-ref">Model Aliases</span></a>)</p></li>
|
||||
<li><p>Maps semantic names to underlying provider models for easier experimentation and provider switching</p></li>
|
||||
<li><p>Ideal for applications that want abstraction from specific model names while maintaining control</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="preference-aligned-routing-arch-router">
|
||||
<span id="preference-aligned-routing"></span><h3>Preference-aligned Routing (Arch-Router)<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#preference-aligned-routing-arch-router" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#preference-aligned-routing-arch-router'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Traditional LLM routing approaches face significant limitations: they evaluate performance using benchmarks that often fail to capture human preferences, select from fixed model pools, and operate as “black boxes” without practical mechanisms for encoding user preferences.</p>
|
||||
<p>Arch’s preference-aligned routing addresses these challenges by applying a fundamental engineering principle: decoupling. The framework separates route selection (matching queries to human-readable policies) from model assignment (mapping policies to specific LLMs). This separation allows you to define routing policies using descriptive labels like <code class="docutils literal notranslate"><span class="pre">Domain:</span> <span class="pre">'finance',</span> <span class="pre">Action:</span> <span class="pre">'analyze_earnings_report'</span></code> rather than cryptic identifiers, while independently configuring which models handle each policy.</p>
|
||||
<p>The <a class="reference external" href="https://huggingface.co/katanemo/Arch-Router-1.5B" rel="nofollow noopener">Arch-Router<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a> model automatically selects the most appropriate LLM based on:</p>
|
||||
<ul class="simple">
|
||||
<li><p>Domain Analysis: Identifies the subject matter (e.g., legal, healthcare, programming)</p></li>
|
||||
<li><p>Action Classification: Determines the type of operation (e.g., summarization, code generation, translation)</p></li>
|
||||
<li><p>User-Defined Preferences: Maps domains and actions to preferred models using transparent, configurable routing decisions</p></li>
|
||||
<li><p>Human Preference Alignment: Uses domain-action mappings that capture subjective evaluation criteria, ensuring routing aligns with real-world user needs rather than just benchmark scores</p></li>
|
||||
</ul>
|
||||
<p>This approach supports seamlessly adding new models without retraining and is ideal for dynamic, context-aware routing that adapts to request content and intent.</p>
|
||||
</section>
|
||||
</section>
|
||||
<section id="model-based-routing-workflow">
|
||||
<h2>Model-based Routing Workflow<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#model-based-routing-workflow" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#model-based-routing-workflow'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>For direct model routing, the process is straightforward:</p>
|
||||
<ol class="arabic">
|
||||
<li><p><strong>Client Request</strong></p>
|
||||
<blockquote>
|
||||
<div><p>The client specifies the exact model using provider/model format (<code class="docutils literal notranslate"><span class="pre">openai/gpt-4o</span></code>).</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><strong>Provider Validation</strong></p>
|
||||
<blockquote>
|
||||
<div><p>Arch validates that the specified provider and model are configured and available.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><strong>Direct Routing</strong></p>
|
||||
<blockquote>
|
||||
<div><p>The request is sent directly to the specified model without analysis or decision-making.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><strong>Response Handling</strong></p>
|
||||
<blockquote>
|
||||
<div><p>The response is returned to the client with optional metadata about the routing decision.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
</ol>
|
||||
</section>
|
||||
<section id="alias-based-routing-workflow">
|
||||
<h2>Alias-based Routing Workflow<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#alias-based-routing-workflow" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#alias-based-routing-workflow'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>For alias-based routing, the process includes name resolution:</p>
|
||||
<ol class="arabic">
|
||||
<li><p><strong>Client Request</strong></p>
|
||||
<blockquote>
|
||||
<div><p>The client specifies a semantic alias name (<code class="docutils literal notranslate"><span class="pre">reasoning-model</span></code>).</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><strong>Alias Resolution</strong></p>
|
||||
<blockquote>
|
||||
<div><p>Arch resolves the alias to the actual provider/model name based on configuration.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><strong>Model Selection</strong></p>
|
||||
<blockquote>
|
||||
<div><p>If the alias maps to multiple models, Arch selects one based on availability and load balancing.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><strong>Request Forwarding</strong></p>
|
||||
<blockquote>
|
||||
<div><p>The request is forwarded to the resolved model.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><strong>Response Handling</strong></p>
|
||||
<blockquote>
|
||||
<div><p>The response is returned with optional metadata about the alias resolution.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
</ol>
|
||||
</section>
|
||||
<section id="preference-aligned-routing-workflow-arch-router">
|
||||
<span id="preference-aligned-routing-workflow"></span><h2>Preference-aligned Routing Workflow (Arch-Router)<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#preference-aligned-routing-workflow-arch-router" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#preference-aligned-routing-workflow-arch-router'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>For preference-aligned dynamic routing, the process involves intelligent analysis:</p>
|
||||
<ol class="arabic">
|
||||
<li><p><strong>Prompt Analysis</strong></p>
|
||||
<blockquote>
|
||||
<div><p>When a user submits a prompt, the Router analyzes it to determine the domain (subject matter) or action (type of operation requested).</p>
|
||||
<div><p>When a user submits a prompt without specifying a model, the Arch-Router analyzes it to determine the domain (subject matter) and action (type of operation requested).</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><strong>Model Selection</strong></p>
|
||||
|
|
@ -186,9 +287,16 @@ Our preference-aligned approach matches practical definitions of performance in
|
|||
</li>
|
||||
</ol>
|
||||
</section>
|
||||
<section id="arch-router">
|
||||
<h2>Arch-Router<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#arch-router" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#arch-router'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>The <a class="reference external" href="https://huggingface.co/katanemo/Arch-Router-1.5B" rel="nofollow noopener">Arch-Router<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a> is a state-of-the-art <strong>preference-based routing model</strong> specifically designed for intelligent LLM selection. This model delivers production-ready performance with low latency and high accuracy.</p>
|
||||
<section id="id1">
|
||||
<h2>Arch-Router<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#id1'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>The <a class="reference external" href="https://huggingface.co/katanemo/Arch-Router-1.5B" rel="nofollow noopener">Arch-Router<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a> is a state-of-the-art <strong>preference-based routing model</strong> specifically designed to address the limitations of traditional LLM routing. This compact 1.5B model delivers production-ready performance with low latency and high accuracy while solving key routing challenges.</p>
|
||||
<p><strong>Addressing Traditional Routing Limitations:</strong></p>
|
||||
<p><strong>Human Preference Alignment</strong>
|
||||
Unlike benchmark-driven approaches, Arch-Router learns to match queries with human preferences by using domain-action mappings that capture subjective evaluation criteria, ensuring routing decisions align with real-world user needs.</p>
|
||||
<p><strong>Flexible Model Integration</strong>
|
||||
The system supports seamlessly adding new models for routing without requiring retraining or architectural modifications, enabling dynamic adaptation to evolving model landscapes.</p>
|
||||
<p><strong>Preference-Encoded Routing</strong>
|
||||
Provides a practical mechanism to encode user preferences through domain-action mappings, offering transparent and controllable routing decisions that can be customized for specific use cases.</p>
|
||||
<p>To support effective routing, Arch-Router introduces two key concepts:</p>
|
||||
<ul class="simple">
|
||||
<li><p><strong>Domain</strong> – the high-level thematic category or subject matter of a request (e.g., legal, healthcare, programming).</p></li>
|
||||
|
|
@ -203,48 +311,176 @@ Our preference-aligned approach matches practical definitions of performance in
|
|||
<li><p><strong>Production-Ready Performance</strong>: Optimized for low-latency, high-throughput applications in multi-model environments.</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="implementing-llm-routing">
|
||||
<h2>Implementing LLM Routing<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#implementing-llm-routing" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#implementing-llm-routing'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>To configure LLM routing in our gateway, you need to define a prompt target configuration that specifies the routing model and the LLM providers. This configuration will allow Arch Gateway to route incoming prompts to the appropriate model based on the defined routes.</p>
|
||||
<p>Below is an example to show how to set up a prompt target for the Arch Router:</p>
|
||||
<ul class="simple">
|
||||
<li><p><strong>Step 1: Define the routing model in the `routing` section</strong>. You can use the <cite>archgw-v1-router-model</cite> as the katanemo routing model or any other routing model you prefer.</p></li>
|
||||
<li><p><strong>Step 2: Define the listeners in the `listeners` section</strong>. This is where you specify the address and port for incoming traffic, as well as the message format (e.g., OpenAI).</p></li>
|
||||
<li><p><strong>Step 3: Define the LLM providers in the `llm_providers` section</strong>. This is where you specify the routing model, and any other models you want to use for specific tasks and their route usage descriptions (e.g., code generation, code understanding).</p></li>
|
||||
</ul>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>Make sure you define a model for default usage, such as <cite>gpt-4o</cite>, which will be used when no specific route is matched for an user prompt.</p>
|
||||
</div>
|
||||
<div class="literal-block-wrapper docutils container" id="id2">
|
||||
<div class="code-block-caption"><span class="caption-text">Route Config Example</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<section id="implementing-routing">
|
||||
<h2>Implementing Routing<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#implementing-routing" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#implementing-routing'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p><strong>Model-based Routing</strong></p>
|
||||
<p>For direct model routing, configure your LLM providers with specific provider/model names:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id3">
|
||||
<div class="code-block-caption"><span class="caption-text">Model-based Routing Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">listeners</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="nt">egress_traffic</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="nt">egress_traffic</span><span class="p">:</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">12000</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">30s</span>
|
||||
</span><span id="line-7">
|
||||
</span><span id="line-8"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-9">
|
||||
</span><span id="line-10"><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-13">
|
||||
</span><span id="line-14"><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||||
</span><span id="line-17"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">code understanding</span>
|
||||
</span><span id="line-18"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">understand and explain existing code snippets, functions, or libraries</span>
|
||||
</span><span id="line-19">
|
||||
</span><span id="line-20"><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4.1</span>
|
||||
</span><span id="line-21"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-22"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||||
</span><span id="line-23"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">code generation</span>
|
||||
</span><span id="line-24"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">generating new code snippets, functions, or boilerplate based on user prompts or requirements</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-12">
|
||||
</span><span id="line-13"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-15">
|
||||
</span><span id="line-16"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-sonnet-20241022</span>
|
||||
</span><span id="line-17"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<p>Clients specify exact models:</p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Direct provider/model specification</span>
|
||||
</span><span id="line-2"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-3"> <span class="n">model</span><span class="o">=</span><span class="s2">"openai/gpt-4o-mini"</span><span class="p">,</span>
|
||||
</span><span id="line-4"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Hello!"</span><span class="p">}]</span>
|
||||
</span><span id="line-5"><span class="p">)</span>
|
||||
</span><span id="line-6">
|
||||
</span><span id="line-7"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-8"> <span class="n">model</span><span class="o">=</span><span class="s2">"anthropic/claude-3-5-sonnet-20241022"</span><span class="p">,</span>
|
||||
</span><span id="line-9"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Write a story"</span><span class="p">}]</span>
|
||||
</span><span id="line-10"><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Alias-based Routing</strong></p>
|
||||
<p>Configure semantic aliases that map to underlying models:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id4">
|
||||
<div class="code-block-caption"><span class="caption-text">Alias-based Routing Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id4"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">listeners</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="nt">egress_traffic</span><span class="p">:</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">12000</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">30s</span>
|
||||
</span><span id="line-7">
|
||||
</span><span id="line-8"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-11">
|
||||
</span><span id="line-12"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-14">
|
||||
</span><span id="line-15"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-sonnet-20241022</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span><span id="line-17">
|
||||
</span><span id="line-18"><span class="nt">model_aliases</span><span class="p">:</span>
|
||||
</span><span id="line-19"><span class="w"> </span><span class="c1"># Model aliases - friendly names that map to actual provider names</span>
|
||||
</span><span id="line-20"><span class="w"> </span><span class="nt">fast-model</span><span class="p">:</span>
|
||||
</span><span id="line-21"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span><span id="line-22">
|
||||
</span><span id="line-23"><span class="w"> </span><span class="nt">reasoning-model</span><span class="p">:</span>
|
||||
</span><span id="line-24"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-25">
|
||||
</span><span id="line-26"><span class="w"> </span><span class="nt">creative-model</span><span class="p">:</span>
|
||||
</span><span id="line-27"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-5-sonnet-20241022</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<p>Clients use semantic names:</p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Using semantic aliases</span>
|
||||
</span><span id="line-2"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-3"> <span class="n">model</span><span class="o">=</span><span class="s2">"fast-model"</span><span class="p">,</span> <span class="c1"># Routes to best available fast model</span>
|
||||
</span><span id="line-4"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Quick summary please"</span><span class="p">}]</span>
|
||||
</span><span id="line-5"><span class="p">)</span>
|
||||
</span><span id="line-6">
|
||||
</span><span id="line-7"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-8"> <span class="n">model</span><span class="o">=</span><span class="s2">"reasoning-model"</span><span class="p">,</span> <span class="c1"># Routes to best reasoning model</span>
|
||||
</span><span id="line-9"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Solve this complex problem"</span><span class="p">}]</span>
|
||||
</span><span id="line-10"><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p><strong>Preference-aligned Routing (Arch-Router)</strong></p>
|
||||
<p>To configure preference-aligned dynamic routing, you need to define routing preferences that map domains and actions to specific models:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id5">
|
||||
<div class="code-block-caption"><span class="caption-text">Preference-Aligned Dynamic Routing Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id5"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">listeners</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="nt">egress_traffic</span><span class="p">:</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">12000</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">30s</span>
|
||||
</span><span id="line-7">
|
||||
</span><span id="line-8"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-12">
|
||||
</span><span id="line-13"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">code understanding</span>
|
||||
</span><span id="line-17"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">understand and explain existing code snippets, functions, or libraries</span>
|
||||
</span><span id="line-18"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">complex reasoning</span>
|
||||
</span><span id="line-19"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">deep analysis, mathematical problem solving, and logical reasoning</span>
|
||||
</span><span id="line-20">
|
||||
</span><span id="line-21"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-sonnet-20241022</span>
|
||||
</span><span id="line-22"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span><span id="line-23"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||||
</span><span id="line-24"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative writing</span>
|
||||
</span><span id="line-25"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative content generation, storytelling, and writing assistance</span>
|
||||
</span><span id="line-26"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">code generation</span>
|
||||
</span><span id="line-27"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">generating new code snippets, functions, or boilerplate based on user prompts</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<p>Clients can let the router decide or use aliases:</p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="c1"># Let Arch-Router choose based on content</span>
|
||||
</span><span id="line-2"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
</span><span id="line-3"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Write a creative story about space exploration"</span><span class="p">}]</span>
|
||||
</span><span id="line-4"> <span class="c1"># No model specified - router will analyze and choose claude-3-5-sonnet-20241022</span>
|
||||
</span><span id="line-5"><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="combining-routing-methods">
|
||||
<h2>Combining Routing Methods<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#combining-routing-methods" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#combining-routing-methods'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>You can combine static model selection with dynamic routing preferences for maximum flexibility:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id6">
|
||||
<div class="code-block-caption"><span class="caption-text">Hybrid Routing Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id6"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-5">
|
||||
</span><span id="line-6"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">complex_reasoning</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">deep analysis and complex problem solving</span>
|
||||
</span><span id="line-11">
|
||||
</span><span id="line-12"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-sonnet-20241022</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative_tasks</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative writing and content generation</span>
|
||||
</span><span id="line-17">
|
||||
</span><span id="line-18"><span class="nt">model_aliases</span><span class="p">:</span>
|
||||
</span><span id="line-19"><span class="w"> </span><span class="c1"># Model aliases - friendly names that map to actual provider names</span>
|
||||
</span><span id="line-20"><span class="w"> </span><span class="nt">fast-model</span><span class="p">:</span>
|
||||
</span><span id="line-21"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||||
</span><span id="line-22">
|
||||
</span><span id="line-23"><span class="w"> </span><span class="nt">reasoning-model</span><span class="p">:</span>
|
||||
</span><span id="line-24"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-25">
|
||||
</span><span id="line-26"><span class="w"> </span><span class="c1"># Aliases that can also participate in dynamic routing</span>
|
||||
</span><span id="line-27"><span class="w"> </span><span class="nt">creative-model</span><span class="p">:</span>
|
||||
</span><span id="line-28"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-5-sonnet-20241022</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<p>This configuration allows clients to:</p>
|
||||
<ol class="arabic simple">
|
||||
<li><p><strong>Use direct model selection</strong>: <code class="docutils literal notranslate"><span class="pre">model="fast-model"</span></code></p></li>
|
||||
<li><p><strong>Let the router decide</strong>: No model specified, router analyzes content</p></li>
|
||||
</ol>
|
||||
</section>
|
||||
<section id="example-use-cases">
|
||||
<h2>Example Use Cases<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#example-use-cases" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#example-use-cases'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
|
|
@ -256,8 +492,8 @@ Our preference-aligned approach matches practical definitions of performance in
|
|||
<li><p><strong>Conversational Routing</strong>: Track conversation context to identify when topics shift between domains or when the type of assistance needed changes mid-conversation.</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="best-practice">
|
||||
<h2>Best practice<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#best-practice" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#best-practice'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<section id="best-practicesm">
|
||||
<h2>Best practicesm<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#best-practicesm" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#best-practicesm'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<ul class="simple">
|
||||
<li><p><strong>💡Consistent Naming:</strong> Route names should align with their descriptions.</p>
|
||||
<ul>
|
||||
|
|
@ -308,11 +544,20 @@ Our preference-aligned approach matches practical definitions of performance in
|
|||
</div></div><aside class="hidden text-sm xl:block" id="right-sidebar">
|
||||
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
|
||||
<ul>
|
||||
<li><a :data-current="activeSection === '#routing-workflow'" class="reference internal" href="#routing-workflow">Routing Workflow</a></li>
|
||||
<li><a :data-current="activeSection === '#arch-router'" class="reference internal" href="#arch-router">Arch-Router</a></li>
|
||||
<li><a :data-current="activeSection === '#implementing-llm-routing'" class="reference internal" href="#implementing-llm-routing">Implementing LLM Routing</a></li>
|
||||
<li><a :data-current="activeSection === '#routing-methods'" class="reference internal" href="#routing-methods">Routing Methods</a><ul>
|
||||
<li><a :data-current="activeSection === '#model-based-routing'" class="reference internal" href="#model-based-routing">Model-based Routing</a></li>
|
||||
<li><a :data-current="activeSection === '#alias-based-routing'" class="reference internal" href="#alias-based-routing">Alias-based Routing</a></li>
|
||||
<li><a :data-current="activeSection === '#preference-aligned-routing-arch-router'" class="reference internal" href="#preference-aligned-routing-arch-router">Preference-aligned Routing (Arch-Router)</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a :data-current="activeSection === '#model-based-routing-workflow'" class="reference internal" href="#model-based-routing-workflow">Model-based Routing Workflow</a></li>
|
||||
<li><a :data-current="activeSection === '#alias-based-routing-workflow'" class="reference internal" href="#alias-based-routing-workflow">Alias-based Routing Workflow</a></li>
|
||||
<li><a :data-current="activeSection === '#preference-aligned-routing-workflow-arch-router'" class="reference internal" href="#preference-aligned-routing-workflow-arch-router">Preference-aligned Routing Workflow (Arch-Router)</a></li>
|
||||
<li><a :data-current="activeSection === '#id1'" class="reference internal" href="#id1">Arch-Router</a></li>
|
||||
<li><a :data-current="activeSection === '#implementing-routing'" class="reference internal" href="#implementing-routing">Implementing Routing</a></li>
|
||||
<li><a :data-current="activeSection === '#combining-routing-methods'" class="reference internal" href="#combining-routing-methods">Combining Routing Methods</a></li>
|
||||
<li><a :data-current="activeSection === '#example-use-cases'" class="reference internal" href="#example-use-cases">Example Use Cases</a></li>
|
||||
<li><a :data-current="activeSection === '#best-practice'" class="reference internal" href="#best-practice">Best practice</a></li>
|
||||
<li><a :data-current="activeSection === '#best-practicesm'" class="reference internal" href="#best-practicesm">Best practicesm</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</aside>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -108,7 +108,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
14
index.html
14
index.html
|
|
@ -107,7 +107,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
@ -185,7 +190,12 @@ Concepts</label><div class="sd-tab-content docutils">
|
|||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="concepts/llm_providers/llm_providers.html">LLM Providers</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
|
|
|||
BIN
objects.inv
BIN
objects.inv
Binary file not shown.
|
|
@ -107,7 +107,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/llm_providers/llm_providers.html">LLM Providers<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
|
|
@ -142,7 +142,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="concepts/llm_providers/llm_providers.html">LLM Providers</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/llm_providers/supported_providers.html">Supported Providers & Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/llm_providers/client_libraries.html">Client Libraries</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/llm_providers/model_aliases.html">Model Aliases</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="concepts/prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -1,2 +1,2 @@
|
|||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url><loc>./docsbuild_with_arch/agent.html</loc></url><url><loc>./docsbuild_with_arch/multi_turn.html</loc></url><url><loc>./docsbuild_with_arch/rag.html</loc></url><url><loc>./docsconcepts/llm_provider.html</loc></url><url><loc>./docsconcepts/prompt_target.html</loc></url><url><loc>./docsconcepts/tech_overview/error_target.html</loc></url><url><loc>./docsconcepts/tech_overview/listener.html</loc></url><url><loc>./docsconcepts/tech_overview/model_serving.html</loc></url><url><loc>./docsconcepts/tech_overview/prompt.html</loc></url><url><loc>./docsconcepts/tech_overview/request_lifecycle.html</loc></url><url><loc>./docsconcepts/tech_overview/tech_overview.html</loc></url><url><loc>./docsconcepts/tech_overview/terminology.html</loc></url><url><loc>./docsconcepts/tech_overview/threading_model.html</loc></url><url><loc>./docsget_started/intro_to_arch.html</loc></url><url><loc>./docsget_started/overview.html</loc></url><url><loc>./docsget_started/quickstart.html</loc></url><url><loc>./docsguides/agent_routing.html</loc></url><url><loc>./docsguides/function_calling.html</loc></url><url><loc>./docsguides/llm_router.html</loc></url><url><loc>./docsguides/observability/access_logging.html</loc></url><url><loc>./docsguides/observability/monitoring.html</loc></url><url><loc>./docsguides/observability/observability.html</loc></url><url><loc>./docsguides/observability/tracing.html</loc></url><url><loc>./docsguides/prompt_guard.html</loc></url><url><loc>./docsindex.html</loc></url><url><loc>./docsresources/configuration_reference.html</loc></url><url><loc>./docssearch.html</loc></url></urlset>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url><loc>./docsbuild_with_arch/agent.html</loc></url><url><loc>./docsbuild_with_arch/multi_turn.html</loc></url><url><loc>./docsbuild_with_arch/rag.html</loc></url><url><loc>./docsconcepts/llm_providers/client_libraries.html</loc></url><url><loc>./docsconcepts/llm_providers/llm_providers.html</loc></url><url><loc>./docsconcepts/llm_providers/model_aliases.html</loc></url><url><loc>./docsconcepts/llm_providers/supported_providers.html</loc></url><url><loc>./docsconcepts/prompt_target.html</loc></url><url><loc>./docsconcepts/tech_overview/error_target.html</loc></url><url><loc>./docsconcepts/tech_overview/listener.html</loc></url><url><loc>./docsconcepts/tech_overview/model_serving.html</loc></url><url><loc>./docsconcepts/tech_overview/prompt.html</loc></url><url><loc>./docsconcepts/tech_overview/request_lifecycle.html</loc></url><url><loc>./docsconcepts/tech_overview/tech_overview.html</loc></url><url><loc>./docsconcepts/tech_overview/terminology.html</loc></url><url><loc>./docsconcepts/tech_overview/threading_model.html</loc></url><url><loc>./docsget_started/intro_to_arch.html</loc></url><url><loc>./docsget_started/overview.html</loc></url><url><loc>./docsget_started/quickstart.html</loc></url><url><loc>./docsguides/agent_routing.html</loc></url><url><loc>./docsguides/function_calling.html</loc></url><url><loc>./docsguides/llm_router.html</loc></url><url><loc>./docsguides/observability/access_logging.html</loc></url><url><loc>./docsguides/observability/monitoring.html</loc></url><url><loc>./docsguides/observability/observability.html</loc></url><url><loc>./docsguides/observability/tracing.html</loc></url><url><loc>./docsguides/prompt_guard.html</loc></url><url><loc>./docsindex.html</loc></url><url><loc>./docsresources/configuration_reference.html</loc></url><url><loc>./docssearch.html</loc></url></urlset>
|
||||
Loading…
Add table
Add a link
Reference in a new issue