mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
448 lines
No EOL
56 KiB
HTML
Executable file
448 lines
No EOL
56 KiB
HTML
Executable file
<!DOCTYPE html>
|
||
|
||
<html :class="{ 'dark' : darkMode === true }" data-content_root="../../" lang="en" x-data="{ darkMode: $persist(window.matchMedia('(prefers-color-scheme: dark)').matches), activeSection: ''}">
|
||
<head>
|
||
<script>
|
||
(function () {
|
||
// Set initial color scheme
|
||
if ((localStorage.getItem("_x_darkMode") === "true") || (window.matchMedia("(prefers-color-scheme: dark)").matches)) {
|
||
document.documentElement.classList.add("dark");
|
||
}
|
||
|
||
// Watch for media preference changes
|
||
window.matchMedia("(prefers-color-scheme: dark)").addEventListener("change", (event) => {
|
||
localStorage.setItem("_x_darkMode", event.matches);
|
||
document.documentElement.classList.toggle("dark", event.matches);
|
||
});
|
||
})();
|
||
</script>
|
||
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
|
||
<meta charset="utf-8"/>
|
||
<meta content="#ffffff" media="(prefers-color-scheme: light)" name="theme-color"/>
|
||
<meta content="#030711" media="(prefers-color-scheme: dark)" name="theme-color"/>
|
||
<meta content="width=device-width, initial-scale=1" name="viewport"/>
|
||
<title>Model Aliases | Plano Docs v0.4.8</title>
|
||
<meta content="Model Aliases | Plano Docs v0.4.8" property="og:title"/>
|
||
<meta content="Model Aliases | Plano Docs v0.4.8" name="twitter:title"/>
|
||
<link href="../../_static/pygments.css?v=73db4dac" rel="stylesheet" type="text/css"/>
|
||
<link href="../../_static/theme.css?v=73505e79" rel="stylesheet" type="text/css"/>
|
||
<link href="../../_static/sphinx-design.min.css?v=95c83b7e" rel="stylesheet" type="text/css"/>
|
||
<link href="../../_static/css/custom.css?v=2929376a" rel="stylesheet" type="text/css"/>
|
||
<link href="../../_static/awesome-sphinx-design.css?v=c54898a4" rel="stylesheet" type="text/css"/>
|
||
<link href="./docs/concepts/llm_providers/model_aliases.html" rel="canonical"/>
|
||
<link href="../../_static/favicon.ico" rel="icon"/>
|
||
<link href="../../search.html" rel="search" title="Search"/>
|
||
<link href="../prompt_target.html" rel="next" title="Prompt Target"/>
|
||
<link href="client_libraries.html" rel="prev" title="Client Libraries"/>
|
||
</head>
|
||
<body :class="{ 'overflow-hidden': showSidebar }" class="min-h-screen font-sans antialiased bg-background text-foreground" x-data="{ showSidebar: false, showScrollTop: false }">
|
||
<div @click.self="showSidebar = false" class="fixed inset-0 z-50 overflow-hidden bg-background/80 backdrop-blur-sm md:hidden" x-cloak="" x-show="showSidebar"></div><div class="relative flex flex-col min-h-screen" id="page"><a class="absolute top-0 left-0 z-[100] block bg-background p-4 text-xl transition -translate-x-full opacity-0 focus:translate-x-0 focus:opacity-100" href="#content">
|
||
Skip to content
|
||
</a><header class="sticky top-0 z-40 w-full border-b shadow-xs border-border bg-background/90 backdrop-blur"><div class="container flex items-center h-14">
|
||
<div class="hidden mr-4 md:flex">
|
||
<a class="flex items-center mr-6" href="../../index.html">
|
||
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Plano Docs v0.4.8</span>
|
||
</a></div><button @click="showSidebar = true" class="inline-flex items-center justify-center h-10 px-0 py-2 mr-2 text-base font-medium transition-colors rounded-md hover:text-accent-foreground hover:bg-transparent md:hidden" type="button">
|
||
<svg aria-hidden="true" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||
<path d="M152.587 825.087q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440Zm0-203.587q-19.152 0-32.326-13.174T107.087 576q0-19.152 13.174-32.326t32.326-13.174h320q19.152 0 32.326 13.174T518.087 576q0 19.152-13.174 32.326T472.587 621.5h-320Zm0-203.587q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440ZM708.913 576l112.174 112.174q12.674 12.674 12.674 31.826t-12.674 31.826Q808.413 764.5 789.261 764.5t-31.826-12.674l-144-144Q600 594.391 600 576t13.435-31.826l144-144q12.674-12.674 31.826-12.674t31.826 12.674q12.674 12.674 12.674 31.826t-12.674 31.826L708.913 576Z"></path>
|
||
</svg>
|
||
<span class="sr-only">Toggle navigation menu</span>
|
||
</button>
|
||
<div class="flex items-center justify-between flex-1 gap-2 sm:gap-4 md:justify-end">
|
||
<div class="flex-1 w-full md:w-auto md:flex-none"><form @keydown.k.window.meta="$refs.search.focus()" action="../../search.html" class="relative flex items-center group" id="searchbox" method="get">
|
||
<input aria-label="Search the docs" class="inline-flex items-center font-medium transition-colors bg-transparent focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 ring-offset-background border border-input hover:bg-accent focus:bg-accent hover:text-accent-foreground focus:text-accent-foreground hover:placeholder-accent-foreground py-2 px-4 relative h-9 w-full justify-start rounded-[0.5rem] text-sm text-muted-foreground sm:pr-12 md:w-40 lg:w-64" id="search-input" name="q" placeholder="Search ..." type="search" x-ref="search"/>
|
||
<kbd class="pointer-events-none absolute right-1.5 top-2 hidden h-5 select-none text-muted-foreground items-center gap-1 rounded border border-border bg-muted px-1.5 font-mono text-[10px] font-medium opacity-100 sm:flex group-hover:bg-accent group-hover:text-accent-foreground">
|
||
<span class="text-xs">⌘</span>
|
||
K
|
||
</kbd>
|
||
</form>
|
||
</div>
|
||
<nav class="flex items-center gap-1">
|
||
<a href="https://github.com/katanemo/plano" rel="noopener nofollow" title="Visit repository on GitHub">
|
||
<div class="inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md hover:bg-accent hover:text-accent-foreground h-9 w-9">
|
||
<svg fill="currentColor" height="26px" style="margin-top:-2px;display:inline" viewbox="0 0 45 44" xmlns="http://www.w3.org/2000/svg"><path clip-rule="evenodd" d="M22.477.927C10.485.927.76 10.65.76 22.647c0 9.596 6.223 17.736 14.853 20.608 1.087.2 1.483-.47 1.483-1.047 0-.516-.019-1.881-.03-3.693-6.04 1.312-7.315-2.912-7.315-2.912-.988-2.51-2.412-3.178-2.412-3.178-1.972-1.346.149-1.32.149-1.32 2.18.154 3.327 2.24 3.327 2.24 1.937 3.318 5.084 2.36 6.321 1.803.197-1.403.759-2.36 1.379-2.903-4.823-.548-9.894-2.412-9.894-10.734 0-2.37.847-4.31 2.236-5.828-.224-.55-.969-2.759.214-5.748 0 0 1.822-.584 5.972 2.226 1.732-.482 3.59-.722 5.437-.732 1.845.01 3.703.25 5.437.732 4.147-2.81 5.967-2.226 5.967-2.226 1.185 2.99.44 5.198.217 5.748 1.392 1.517 2.232 3.457 2.232 5.828 0 8.344-5.078 10.18-9.916 10.717.779.67 1.474 1.996 1.474 4.021 0 2.904-.027 5.247-.027 5.96 0 .58.392 1.256 1.493 1.044C37.981 40.375 44.2 32.24 44.2 22.647c0-11.996-9.726-21.72-21.722-21.72" fill="currentColor" fill-rule="evenodd"></path></svg>
|
||
</div>
|
||
</a>
|
||
<button @click="darkMode = !darkMode" class="relative inline-flex items-center justify-center px-0 text-sm font-medium transition-colors rounded-md hover:bg-accent hover:text-accent-foreground h-9 w-9" title="Toggle color scheme" type="button">
|
||
<svg class="absolute transition-all scale-100 rotate-0 dark:-rotate-90 dark:scale-0" fill="currentColor" height="16" viewbox="0 96 960 960" width="16" xmlns="http://www.w3.org/2000/svg">
|
||
<path d="M480 685q45.456 0 77.228-31.772Q589 621.456 589 576q0-45.456-31.772-77.228Q525.456 467 480 467q-45.456 0-77.228 31.772Q371 530.544 371 576q0 45.456 31.772 77.228Q434.544 685 480 685Zm0 91q-83 0-141.5-58.5T280 576q0-83 58.5-141.5T480 376q83 0 141.5 58.5T680 576q0 83-58.5 141.5T480 776ZM80 621.5q-19.152 0-32.326-13.174T34.5 576q0-19.152 13.174-32.326T80 530.5h80q19.152 0 32.326 13.174T205.5 576q0 19.152-13.174 32.326T160 621.5H80Zm720 0q-19.152 0-32.326-13.174T754.5 576q0-19.152 13.174-32.326T800 530.5h80q19.152 0 32.326 13.174T925.5 576q0 19.152-13.174 32.326T880 621.5h-80Zm-320-320q-19.152 0-32.326-13.174T434.5 256v-80q0-19.152 13.174-32.326T480 130.5q19.152 0 32.326 13.174T525.5 176v80q0 19.152-13.174 32.326T480 301.5Zm0 720q-19.152 0-32.326-13.17Q434.5 995.152 434.5 976v-80q0-19.152 13.174-32.326T480 850.5q19.152 0 32.326 13.174T525.5 896v80q0 19.152-13.174 32.33-13.174 13.17-32.326 13.17ZM222.174 382.065l-43-42Q165.5 327.391 166 308.239t13.174-33.065q13.435-13.674 32.587-13.674t32.065 13.674l42.239 43q12.674 13.435 12.555 31.706-.12 18.272-12.555 31.946-12.674 13.674-31.445 13.413-18.772-.261-32.446-13.174Zm494 494.761-42.239-43q-12.674-13.435-12.674-32.087t12.674-31.565Q686.609 756.5 705.38 757q18.772.5 32.446 13.174l43 41.761Q794.5 824.609 794 843.761t-13.174 33.065Q767.391 890.5 748.239 890.5t-32.065-13.674Zm-42-494.761Q660.5 369.391 661 350.62q.5-18.772 13.174-32.446l41.761-43Q728.609 261.5 747.761 262t33.065 13.174q13.674 13.435 13.674 32.587t-13.674 32.065l-43 42.239q-13.435 12.674-31.706 12.555-18.272-.12-31.946-12.555Zm-495 494.761Q165.5 863.391 165.5 844.239t13.674-32.065l43-42.239q13.435-12.674 32.087-12.674t31.565 12.674Q299.5 782.609 299 801.38q-.5 18.772-13.174 32.446l-41.761 43Q231.391 890.5 212.239 890t-33.065-13.174ZM480 576Z"></path>
|
||
</svg>
|
||
<svg class="absolute transition-all scale-0 rotate-90 dark:rotate-0 dark:scale-100" fill="currentColor" height="16" viewbox="0 96 960 960" width="16" xmlns="http://www.w3.org/2000/svg">
|
||
<path d="M480 936q-151 0-255.5-104.5T120 576q0-138 90-239.5T440 218q25-3 39 18t-1 44q-17 26-25.5 55t-8.5 61q0 90 63 153t153 63q31 0 61.5-9t54.5-25q21-14 43-1.5t19 39.5q-14 138-117.5 229T480 936Zm0-80q88 0 158-48.5T740 681q-20 5-40 8t-40 3q-123 0-209.5-86.5T364 396q0-20 3-40t8-40q-78 32-126.5 102T200 576q0 116 82 198t198 82Zm-10-270Z"></path>
|
||
</svg>
|
||
</button>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</header>
|
||
<div class="flex-1"><div class="container md:grid md:grid-cols-[220px_minmax(0,1fr)] md:gap-6 lg:grid-cols-[240px_minmax(0,1fr)] lg:gap-10"><aside :aria-hidden="!showSidebar" :class="{ 'translate-x-0': showSidebar }" class="fixed inset-y-0 left-0 md:top-14 z-50 md:z-30 bg-background md:bg-transparent transition-all duration-100 -translate-x-full md:translate-x-0 ml-0 p-6 md:p-0 md:-ml-2 md:h-[calc(100vh-3.5rem)] w-5/6 md:w-full overflow-y-auto border-r border-border md:sticky" id="left-sidebar">
|
||
<a class="justify-start text-sm md:!hidden bg-background" href="../../index.html">
|
||
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Plano Docs v0.4.8</span>
|
||
</a>
|
||
<div class="relative overflow-hidden md:overflow-auto my-4 md:my-0">
|
||
<div class="overflow-y-auto h-full w-full relative pr-6">
|
||
|
||
<script async="" src="https://www.googletagmanager.com/gtag/js?id=G-EH2VW19FXE"></script>
|
||
<script>
|
||
window.dataLayer = window.dataLayer || [];
|
||
function gtag(){dataLayer.push(arguments);}
|
||
gtag('js', new Date());
|
||
|
||
gtag('config', 'G-EH2VW19FXE');
|
||
</script>
|
||
<nav class="table w-full min-w-full my-6 lg:my-8">
|
||
<p class="caption" role="heading"><span class="caption-text">Get Started</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/overview.html">Overview</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/intro_to_plano.html">Intro to Plano</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/quickstart.html">Quickstart</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/quickstart.html#next-steps">Next Steps</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Concepts</span></p>
|
||
<ul class="current">
|
||
<li class="toctree-l1"><a class="reference internal" href="../listeners.html">Listeners</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../agents.html">Agents</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../filter_chain.html">Filter Chains</a></li>
|
||
<li class="toctree-l1 current" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="llm_providers.html">Model (LLM) Providers<button @click.prevent.stop="expanded = !expanded" type="button" x-cloak=""><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul class="current" x-show="expanded">
|
||
<li class="toctree-l2"><a class="reference internal" href="supported_providers.html">Supported Providers & Configuration</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="client_libraries.html">Client Libraries</a></li>
|
||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Model Aliases</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../signals.html">Signals™</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../guides/orchestration.html">Orchestration</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../guides/llm_router.html">LLM Routing</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../guides/function_calling.html">Function Calling</a></li>
|
||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../../guides/observability/observability.html">Observability<button @click.prevent.stop="expanded = !expanded" type="button" x-cloak=""><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-cloak="" x-show="expanded">
|
||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/tracing.html">Tracing</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/monitoring.html">Monitoring</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/access_logging.html">Access Logging</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../guides/prompt_guard.html">Guardrails</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../guides/state.html">Conversational State</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||
<ul>
|
||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../../resources/tech_overview/tech_overview.html">Tech Overview<button @click.prevent.stop="expanded = !expanded" type="button" x-cloak=""><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-cloak="" x-show="expanded">
|
||
<li class="toctree-l2"><a class="reference internal" href="../../resources/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../resources/tech_overview/model_serving.html">Bright Staff</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../resources/tech_overview/threading_model.html">Threading Model</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../resources/deployment.html">Deployment</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../resources/cli_reference.html">CLI Reference</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../resources/llms_txt.html">llms.txt</a></li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
<button @click="showSidebar = false" class="absolute md:hidden right-4 top-4 rounded-sm opacity-70 transition-opacity hover:opacity-100" type="button">
|
||
<svg class="h-4 w-4" fill="currentColor" height="24" stroke="none" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||
<path d="M480 632 284 828q-11 11-28 11t-28-11q-11-11-11-28t11-28l196-196-196-196q-11-11-11-28t11-28q11-11 28-11t28 11l196 196 196-196q11-11 28-11t28 11q11 11 11 28t-11 28L536 576l196 196q11 11 11 28t-11 28q-11 11-28 11t-28-11L480 632Z"></path>
|
||
</svg>
|
||
</button>
|
||
</aside>
|
||
<main class="relative py-6 lg:gap-10 lg:py-8 xl:grid xl:grid-cols-[1fr_300px]">
|
||
<div class="w-full min-w-0 mx-auto">
|
||
<nav aria-label="breadcrumbs" class="flex items-center mb-4 space-x-1 text-sm text-muted-foreground">
|
||
<a class="overflow-hidden text-ellipsis whitespace-nowrap hover:text-foreground" href="../../index.html">
|
||
<span class="hidden md:inline">Plano Docs v0.4.8</span>
|
||
<svg aria-label="Home" class="md:hidden" fill="currentColor" height="18" stroke="none" viewbox="0 96 960 960" width="18" xmlns="http://www.w3.org/2000/svg">
|
||
<path d="M240 856h120V616h240v240h120V496L480 316 240 496v360Zm-80 80V456l320-240 320 240v480H520V696h-80v240H160Zm320-350Z"></path>
|
||
</svg>
|
||
</a>
|
||
<div class="mr-1">/</div><a class="hover:text-foreground overflow-hidden text-ellipsis whitespace-nowrap" href="llm_providers.html">Model (LLM) Providers</a>
|
||
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">Model Aliases</span>
|
||
</nav>
|
||
<div id="content" role="main">
|
||
<section id="model-aliases">
|
||
<span id="id1"></span><h1>Model Aliases<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#model-aliases"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||
<p>Model aliases provide semantic, version-controlled names for your models, enabling cleaner client code, easier model management, and advanced routing capabilities. Instead of using provider-specific model names like <code class="docutils literal notranslate"><span class="pre">gpt-4o-mini</span></code> or <code class="docutils literal notranslate"><span class="pre">claude-3-5-sonnet-20241022</span></code>, you can create meaningful aliases like <code class="docutils literal notranslate"><span class="pre">fast-model</span></code> or <code class="docutils literal notranslate"><span class="pre">arch.summarize.v1</span></code>.</p>
|
||
<p><strong>Benefits of Model Aliases:</strong></p>
|
||
<ul class="simple">
|
||
<li><p><strong>Semantic Naming</strong>: Use descriptive names that reflect the model’s purpose</p></li>
|
||
<li><p><strong>Version Control</strong>: Implement versioning schemes (e.g., <code class="docutils literal notranslate"><span class="pre">v1</span></code>, <code class="docutils literal notranslate"><span class="pre">v2</span></code>) for model upgrades</p></li>
|
||
<li><p><strong>Environment Management</strong>: Different aliases can point to different models across environments</p></li>
|
||
<li><p><strong>Client Simplification</strong>: Clients use consistent, meaningful names regardless of underlying provider</p></li>
|
||
<li><p><strong>Advanced Routing (Coming Soon)</strong>: Enable guardrails, fallbacks, and traffic splitting at the alias level</p></li>
|
||
</ul>
|
||
<section id="basic-configuration">
|
||
<h2>Basic Configuration<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#basic-configuration" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#basic-configuration'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<p><strong>Simple Alias Mapping</strong></p>
|
||
<div class="literal-block-wrapper docutils container" id="id2">
|
||
<div class="code-block-caption"><span class="caption-text">Basic Model Aliases</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">llm_providers</span><span class="p">:</span>
|
||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
|
||
</span><span id="line-3"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||
</span><span id="line-4">
|
||
</span><span id="line-5"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
|
||
</span><span id="line-6"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||
</span><span id="line-7">
|
||
</span><span id="line-8"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-3-5-sonnet-20241022</span>
|
||
</span><span id="line-9"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||
</span><span id="line-10">
|
||
</span><span id="line-11"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">ollama/llama3.1</span>
|
||
</span><span id="line-12"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://host.docker.internal:11434</span>
|
||
</span><span id="line-13">
|
||
</span><span id="line-14"><span class="c1"># Define aliases that map to the models above</span>
|
||
</span><span id="line-15"><span class="nt">model_aliases</span><span class="p">:</span>
|
||
</span><span id="line-16"><span class="w"> </span><span class="c1"># Semantic versioning approach</span>
|
||
</span><span id="line-17"><span class="w"> </span><span class="nt">arch.summarize.v1</span><span class="p">:</span>
|
||
</span><span id="line-18"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span><span id="line-19">
|
||
</span><span id="line-20"><span class="w"> </span><span class="nt">arch.reasoning.v1</span><span class="p">:</span>
|
||
</span><span id="line-21"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||
</span><span id="line-22">
|
||
</span><span id="line-23"><span class="w"> </span><span class="nt">arch.creative.v1</span><span class="p">:</span>
|
||
</span><span id="line-24"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-5-sonnet-20241022</span>
|
||
</span><span id="line-25">
|
||
</span><span id="line-26"><span class="w"> </span><span class="c1"># Functional aliases</span>
|
||
</span><span id="line-27"><span class="w"> </span><span class="nt">fast-model</span><span class="p">:</span>
|
||
</span><span id="line-28"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span><span id="line-29">
|
||
</span><span id="line-30"><span class="w"> </span><span class="nt">smart-model</span><span class="p">:</span>
|
||
</span><span id="line-31"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||
</span><span id="line-32">
|
||
</span><span id="line-33"><span class="w"> </span><span class="nt">creative-model</span><span class="p">:</span>
|
||
</span><span id="line-34"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-5-sonnet-20241022</span>
|
||
</span><span id="line-35">
|
||
</span><span id="line-36"><span class="w"> </span><span class="c1"># Local model alias</span>
|
||
</span><span id="line-37"><span class="w"> </span><span class="nt">local-chat</span><span class="p">:</span>
|
||
</span><span id="line-38"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">llama3.1</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="using-aliases">
|
||
<h2>Using Aliases<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#using-aliases" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#using-aliases'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<p><strong>Client Code Examples</strong></p>
|
||
<p>Once aliases are configured, clients can use semantic names instead of provider-specific model names:</p>
|
||
<div class="literal-block-wrapper docutils container" id="id3">
|
||
<div class="code-block-caption"><span class="caption-text">Python Client Usage</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
|
||
</span><span id="line-2">
|
||
</span><span id="line-3"><span class="n">client</span> <span class="o">=</span> <span class="n">OpenAI</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:12000/"</span><span class="p">)</span>
|
||
</span><span id="line-4">
|
||
</span><span id="line-5"><span class="c1"># Use semantic alias instead of provider model name</span>
|
||
</span><span id="line-6"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||
</span><span id="line-7"> <span class="n">model</span><span class="o">=</span><span class="s2">"arch.summarize.v1"</span><span class="p">,</span> <span class="c1"># Points to gpt-4o-mini</span>
|
||
</span><span id="line-8"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Summarize this document..."</span><span class="p">}]</span>
|
||
</span><span id="line-9"><span class="p">)</span>
|
||
</span><span id="line-10">
|
||
</span><span id="line-11"><span class="c1"># Switch to a different capability</span>
|
||
</span><span id="line-12"><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||
</span><span id="line-13"> <span class="n">model</span><span class="o">=</span><span class="s2">"arch.reasoning.v1"</span><span class="p">,</span> <span class="c1"># Points to gpt-4o</span>
|
||
</span><span id="line-14"> <span class="n">messages</span><span class="o">=</span><span class="p">[{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Solve this complex problem..."</span><span class="p">}]</span>
|
||
</span><span id="line-15"><span class="p">)</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="literal-block-wrapper docutils container" id="id4">
|
||
<div class="code-block-caption"><span class="caption-text">cURL Example</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id4"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><code><span id="line-1">curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://127.0.0.1:12000/v1/chat/completions<span class="w"> </span><span class="se">\</span>
|
||
</span><span id="line-2"><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||
</span><span id="line-3"><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||
</span><span id="line-4"><span class="s1"> "model": "fast-model",</span>
|
||
</span><span id="line-5"><span class="s1"> "messages": [{"role": "user", "content": "Hello!"}]</span>
|
||
</span><span id="line-6"><span class="s1"> }'</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="naming-best-practices">
|
||
<h2>Naming Best Practices<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#naming-best-practices" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#naming-best-practices'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<p><strong>Semantic Versioning</strong></p>
|
||
<p>Use version numbers for backward compatibility and gradual model upgrades:</p>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||
</span><span id="line-2"><span class="w"> </span><span class="c1"># Current production version</span>
|
||
</span><span id="line-3"><span class="w"> </span><span class="nt">arch.summarize.v1</span><span class="p">:</span>
|
||
</span><span id="line-4"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span><span id="line-5">
|
||
</span><span id="line-6"><span class="w"> </span><span class="c1"># Beta version for testing</span>
|
||
</span><span id="line-7"><span class="w"> </span><span class="nt">arch.summarize.v2</span><span class="p">:</span>
|
||
</span><span id="line-8"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||
</span><span id="line-9">
|
||
</span><span id="line-10"><span class="w"> </span><span class="c1"># Stable alias that always points to latest</span>
|
||
</span><span id="line-11"><span class="w"> </span><span class="nt">arch.summarize.latest</span><span class="p">:</span>
|
||
</span><span id="line-12"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
<p><strong>Purpose-Based Naming</strong></p>
|
||
<p>Create aliases that reflect the intended use case:</p>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||
</span><span id="line-2"><span class="w"> </span><span class="c1"># Task-specific</span>
|
||
</span><span id="line-3"><span class="w"> </span><span class="nt">code-reviewer</span><span class="p">:</span>
|
||
</span><span id="line-4"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||
</span><span id="line-5">
|
||
</span><span id="line-6"><span class="w"> </span><span class="nt">document-summarizer</span><span class="p">:</span>
|
||
</span><span id="line-7"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span><span id="line-8">
|
||
</span><span id="line-9"><span class="w"> </span><span class="nt">creative-writer</span><span class="p">:</span>
|
||
</span><span id="line-10"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-5-sonnet-20241022</span>
|
||
</span><span id="line-11">
|
||
</span><span id="line-12"><span class="w"> </span><span class="nt">data-analyst</span><span class="p">:</span>
|
||
</span><span id="line-13"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
<p><strong>Environment-Specific Aliases</strong></p>
|
||
<p>Different environments can use different underlying models:</p>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||
</span><span id="line-2"><span class="w"> </span><span class="c1"># Development environment - use faster/cheaper models</span>
|
||
</span><span id="line-3"><span class="w"> </span><span class="nt">dev.chat.v1</span><span class="p">:</span>
|
||
</span><span id="line-4"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span><span id="line-5">
|
||
</span><span id="line-6"><span class="w"> </span><span class="c1"># Production environment - use more capable models</span>
|
||
</span><span id="line-7"><span class="w"> </span><span class="nt">prod.chat.v1</span><span class="p">:</span>
|
||
</span><span id="line-8"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||
</span><span id="line-9">
|
||
</span><span id="line-10"><span class="w"> </span><span class="c1"># Staging environment - test new models</span>
|
||
</span><span id="line-11"><span class="w"> </span><span class="nt">staging.chat.v1</span><span class="p">:</span>
|
||
</span><span id="line-12"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-5-sonnet-20241022</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="advanced-features-coming-soon">
|
||
<h2>Advanced Features (Coming Soon)<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#advanced-features-coming-soon" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#advanced-features-coming-soon'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<p>The following features are planned for future releases of model aliases:</p>
|
||
<p><strong>Guardrails Integration</strong></p>
|
||
<p>Apply safety, cost, or latency rules at the alias level:</p>
|
||
<div class="literal-block-wrapper docutils container" id="id5">
|
||
<div class="code-block-caption"><span class="caption-text">Future Feature - Guardrails</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id5"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||
</span><span id="line-2"><span class="w"> </span><span class="nt">arch.reasoning.v1</span><span class="p">:</span>
|
||
</span><span id="line-3"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-oss-120b</span>
|
||
</span><span id="line-4"><span class="w"> </span><span class="nt">guardrails</span><span class="p">:</span>
|
||
</span><span id="line-5"><span class="w"> </span><span class="nt">max_latency</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">5s</span>
|
||
</span><span id="line-6"><span class="w"> </span><span class="nt">max_cost_per_request</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.10</span>
|
||
</span><span id="line-7"><span class="w"> </span><span class="nt">block_categories</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"jailbreak"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"PII"</span><span class="p p-Indicator">]</span>
|
||
</span><span id="line-8"><span class="w"> </span><span class="nt">content_filters</span><span class="p">:</span>
|
||
</span><span id="line-9"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"profanity"</span>
|
||
</span><span id="line-10"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"sensitive_data"</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</div>
|
||
<p><strong>Fallback Chains</strong></p>
|
||
<p>Provide a chain of models if the primary target fails or hits quota limits:</p>
|
||
<div class="literal-block-wrapper docutils container" id="id6">
|
||
<div class="code-block-caption"><span class="caption-text">Future Feature - Fallbacks</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id6"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||
</span><span id="line-2"><span class="w"> </span><span class="nt">arch.summarize.v1</span><span class="p">:</span>
|
||
</span><span id="line-3"><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span><span id="line-4"><span class="w"> </span><span class="nt">fallbacks</span><span class="p">:</span>
|
||
</span><span id="line-5"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">llama3.1</span>
|
||
</span><span id="line-6"><span class="w"> </span><span class="nt">conditions</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"quota_exceeded"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"timeout"</span><span class="p p-Indicator">]</span>
|
||
</span><span id="line-7"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">claude-3-haiku-20240307</span>
|
||
</span><span id="line-8"><span class="w"> </span><span class="nt">conditions</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"primary_and_first_fallback_failed"</span><span class="p p-Indicator">]</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</div>
|
||
<p><strong>Traffic Splitting & Canary Deployments</strong></p>
|
||
<p>Distribute traffic across multiple models for A/B testing or gradual rollouts:</p>
|
||
<div class="literal-block-wrapper docutils container" id="id7">
|
||
<div class="code-block-caption"><span class="caption-text">Future Feature - Traffic Splitting</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id7"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||
</span><span id="line-2"><span class="w"> </span><span class="nt">arch.v1</span><span class="p">:</span>
|
||
</span><span id="line-3"><span class="w"> </span><span class="nt">targets</span><span class="p">:</span>
|
||
</span><span id="line-4"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">llama3.1</span>
|
||
</span><span id="line-5"><span class="w"> </span><span class="nt">weight</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">80</span>
|
||
</span><span id="line-6"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span><span id="line-7"><span class="w"> </span><span class="nt">weight</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">20</span>
|
||
</span><span id="line-8">
|
||
</span><span id="line-9"><span class="w"> </span><span class="c1"># Canary deployment</span>
|
||
</span><span id="line-10"><span class="w"> </span><span class="nt">arch.experimental.v1</span><span class="p">:</span>
|
||
</span><span id="line-11"><span class="w"> </span><span class="nt">targets</span><span class="p">:</span>
|
||
</span><span id="line-12"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span><span class="w"> </span><span class="c1"># Current stable</span>
|
||
</span><span id="line-13"><span class="w"> </span><span class="nt">weight</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">95</span>
|
||
</span><span id="line-14"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">o1-preview</span><span class="w"> </span><span class="c1"># New model being tested</span>
|
||
</span><span id="line-15"><span class="w"> </span><span class="nt">weight</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">5</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</div>
|
||
<p><strong>Load Balancing</strong></p>
|
||
<p>Distribute requests across multiple instances of the same model:</p>
|
||
<div class="literal-block-wrapper docutils container" id="id8">
|
||
<div class="code-block-caption"><span class="caption-text">Future Feature - Load Balancing</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id8"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">model_aliases</span><span class="p">:</span>
|
||
</span><span id="line-2"><span class="w"> </span><span class="nt">high-throughput-chat</span><span class="p">:</span>
|
||
</span><span id="line-3"><span class="w"> </span><span class="nt">load_balance</span><span class="p">:</span>
|
||
</span><span id="line-4"><span class="w"> </span><span class="nt">algorithm</span><span class="p">:</span><span class="w"> </span><span class="s">"round_robin"</span><span class="w"> </span><span class="c1"># or "least_connections", "weighted"</span>
|
||
</span><span id="line-5"><span class="w"> </span><span class="nt">targets</span><span class="p">:</span>
|
||
</span><span id="line-6"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span><span id="line-7"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"https://api-1.example.com"</span>
|
||
</span><span id="line-8"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span><span id="line-9"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"https://api-2.example.com"</span>
|
||
</span><span id="line-10"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
|
||
</span><span id="line-11"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"https://api-3.example.com"</span>
|
||
</span></code></pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="validation-rules">
|
||
<h2>Validation Rules<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#validation-rules" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#validation-rules'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<ul class="simple">
|
||
<li><p>Alias names must be valid identifiers (alphanumeric, dots, hyphens, underscores)</p></li>
|
||
<li><p>Target models must be defined in the <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> section</p></li>
|
||
<li><p>Circular references between aliases are not allowed</p></li>
|
||
<li><p>Weights in traffic splitting must sum to 100</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="see-also">
|
||
<h2>See Also<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#see-also" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#see-also'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||
<ul class="simple">
|
||
<li><p><a class="reference internal" href="llm_providers.html#llm-providers"><span class="std std-ref">Model (LLM) Providers</span></a> - Learn about configuring LLM providers</p></li>
|
||
<li><p><a class="reference internal" href="../../guides/llm_router.html#llm-router"><span class="std std-ref">LLM Routing</span></a> - Understand how aliases work with intelligent routing</p></li>
|
||
</ul>
|
||
</section>
|
||
</section>
|
||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||
<div class="mr-auto">
|
||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="client_libraries.html">
|
||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||
<polyline points="15 18 9 12 15 6"></polyline>
|
||
</svg>
|
||
Client Libraries
|
||
</a>
|
||
</div>
|
||
<div class="ml-auto">
|
||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="../prompt_target.html">
|
||
Prompt Target
|
||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||
<polyline points="9 18 15 12 9 6"></polyline>
|
||
</svg>
|
||
</a>
|
||
</div>
|
||
</div></div><aside class="hidden text-sm xl:block" id="right-sidebar">
|
||
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] h-full overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
|
||
<ul>
|
||
<li><a :data-current="activeSection === '#basic-configuration'" class="reference internal" href="#basic-configuration">Basic Configuration</a></li>
|
||
<li><a :data-current="activeSection === '#using-aliases'" class="reference internal" href="#using-aliases">Using Aliases</a></li>
|
||
<li><a :data-current="activeSection === '#naming-best-practices'" class="reference internal" href="#naming-best-practices">Naming Best Practices</a></li>
|
||
<li><a :data-current="activeSection === '#advanced-features-coming-soon'" class="reference internal" href="#advanced-features-coming-soon">Advanced Features (Coming Soon)</a></li>
|
||
<li><a :data-current="activeSection === '#validation-rules'" class="reference internal" href="#validation-rules">Validation Rules</a></li>
|
||
<li><a :data-current="activeSection === '#see-also'" class="reference internal" href="#see-also">See Also</a></li>
|
||
</ul>
|
||
</div>
|
||
</aside>
|
||
</main>
|
||
</div>
|
||
</div><footer class="py-6 border-t border-border md:py-0">
|
||
<div class="container flex flex-col items-center justify-between gap-4 md:h-24 md:flex-row">
|
||
<div class="flex flex-col items-center gap-4 px-8 md:flex-row md:gap-2 md:px-0">
|
||
<p class="text-sm leading-loose text-center text-muted-foreground md:text-left">© 2025, Katanemo Labs, Inc Last updated: Feb 25, 2026. </p>
|
||
</div>
|
||
</div>
|
||
</footer>
|
||
</div>
|
||
<script src="../../_static/documentation_options.js?v=f516ef65"></script>
|
||
<script src="../../_static/doctools.js?v=9bcbadda"></script>
|
||
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||
<script defer="defer" src="../../_static/theme.js?v=582b20c5"></script>
|
||
<script src="../../_static/design-tabs.js?v=f930bc37"></script>
|
||
</body>
|
||
</html> |