mirror of
https://github.com/katanemo/plano.git
synced 2026-05-10 16:22:42 +02:00
deploy: bc059aed4d
This commit is contained in:
parent
fb1cdee926
commit
0962b810d7
33 changed files with 228 additions and 93 deletions
|
|
@ -380,24 +380,22 @@ Provides a practical mechanism to encode user preferences through domain-action
|
|||
<p>This downloads the quantized GGUF model from HuggingFace and starts serving on <code class="docutils literal notranslate"><span class="pre">http://localhost:11434</span></code>.</p>
|
||||
</li>
|
||||
<li><p><strong>Configure Plano to use local Arch-Router</strong></p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">routing</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Arch-Router</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">llm_provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">arch-router</span>
|
||||
</span><span id="line-4">
|
||||
</span><span id="line-5"><span class="nt">model_providers</span><span class="p">:</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">arch-router</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://localhost:11434</span>
|
||||
</span><span id="line-9">
|
||||
</span><span id="line-10"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-5.2</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-13">
|
||||
</span><span id="line-14"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-sonnet-4-5</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||||
</span><span id="line-17"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative writing</span>
|
||||
</span><span id="line-18"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative content generation, storytelling, and writing assistance</span>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">overrides</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="nt">llm_routing_model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M</span>
|
||||
</span><span id="line-3">
|
||||
</span><span id="line-4"><span class="nt">model_providers</span><span class="p">:</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://localhost:11434</span>
|
||||
</span><span id="line-7">
|
||||
</span><span id="line-8"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-5.2</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-11">
|
||||
</span><span id="line-12"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-sonnet-4-5</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative writing</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative content generation, storytelling, and writing assistance</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</li>
|
||||
|
|
@ -444,24 +442,22 @@ Provides a practical mechanism to encode user preferences through domain-action
|
|||
</div>
|
||||
</li>
|
||||
<li><p><strong>Configure Plano to use the vLLM endpoint</strong></p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">routing</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Arch-Router</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">llm_provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">arch-router</span>
|
||||
</span><span id="line-4">
|
||||
</span><span id="line-5"><span class="nt">model_providers</span><span class="p">:</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">arch-router</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Arch-Router</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://<your-server-ip>:10000</span>
|
||||
</span><span id="line-9">
|
||||
</span><span id="line-10"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-5.2</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-13">
|
||||
</span><span id="line-14"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-sonnet-4-5</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||||
</span><span id="line-17"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative writing</span>
|
||||
</span><span id="line-18"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative content generation, storytelling, and writing assistance</span>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">overrides</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="nt">llm_routing_model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">plano/Arch-Router</span>
|
||||
</span><span id="line-3">
|
||||
</span><span id="line-4"><span class="nt">model_providers</span><span class="p">:</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">plano/Arch-Router</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://<your-server-ip>:10000</span>
|
||||
</span><span id="line-7">
|
||||
</span><span id="line-8"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-5.2</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-11">
|
||||
</span><span id="line-12"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-sonnet-4-5</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="nt">routing_preferences</span><span class="p">:</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative writing</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">creative content generation, storytelling, and writing assistance</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</li>
|
||||
|
|
@ -623,7 +619,7 @@ Provides a practical mechanism to encode user preferences through domain-action
|
|||
</div><footer class="py-6 border-t border-border md:py-0">
|
||||
<div class="container flex flex-col items-center justify-between gap-4 md:h-24 md:flex-row">
|
||||
<div class="flex flex-col items-center gap-4 px-8 md:flex-row md:gap-2 md:px-0">
|
||||
<p class="text-sm leading-loose text-center text-muted-foreground md:text-left">© 2025, Katanemo Labs, Inc Last updated: Mar 13, 2026. </p>
|
||||
<p class="text-sm leading-loose text-center text-muted-foreground md:text-left">© 2025, Katanemo Labs, Inc Last updated: Mar 15, 2026. </p>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue