mirror of
https://github.com/katanemo/plano.git
synced 2026-04-26 09:16:24 +02:00
deploy: b30ad791f7
This commit is contained in:
parent
f4b686c7fc
commit
3e881c6eec
28 changed files with 819 additions and 820 deletions
|
|
@ -19,7 +19,7 @@
|
|||
<link href="../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../search.html" rel="search" title="Search"/>
|
||||
<link href="prompt_target.html" rel="next" title="Prompt Target"/>
|
||||
<link href="tech_overview/request_lifecycle.html" rel="prev" title="Request Lifecycle"/>
|
||||
<link href="tech_overview/error_target.html" rel="prev" title="Error Target"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
const userPreference = localStorage.getItem('darkMode');
|
||||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1 current"><a class="current reference internal" href="#">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -153,7 +153,7 @@
|
|||
<div id="content" role="main">
|
||||
<section id="llm-provider">
|
||||
<span id="id1"></span><h1>LLM Provider<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#llm-provider"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p><code class="docutils literal notranslate"><span class="pre">llm_provider</span></code> is a top-level primitive in Arch, helping developers centrally define, secure, observe,
|
||||
<p><strong>LLM provider</strong> is a top-level primitive in Arch, helping developers centrally define, secure, observe,
|
||||
and manage the usage of of their LLMs. Arch builds on Envoy’s reliable <a class="reference external" href="https://www.envoyproxy.io/docs/envoy/v1.31.2/intro/arch_overview/upstream/cluster_manager" rel="nofollow noopener">cluster subsystem<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>
|
||||
to manage egress traffic to LLMs, which includes intelligent routing, retry and fail-over mechanisms,
|
||||
ensuring high availability and fault tolerance. This abstraction also enables developers to seamlessly
|
||||
|
|
@ -162,7 +162,7 @@ across applications.</p>
|
|||
<p>Below is an example of how you can configure <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> with an instance of an Arch gateway.</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id2">
|
||||
<div class="code-block-caption"><span class="caption-text">Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
|
||||
|
|
@ -172,16 +172,15 @@ across applications.</p>
|
|||
</span><span id="line-8"><span class="linenos"> 8</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</span><span id="line-10"><mark><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</mark></span><span id="line-11"><mark><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
</mark></span><span id="line-12"><mark><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
|
||||
</mark></span><span id="line-13"><mark><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</mark></span><span id="line-11"><mark><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
|
||||
</mark></span><span id="line-12"><mark><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</mark></span><span id="line-13"><mark><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</mark></span><span id="line-14"><mark><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</mark></span><span id="line-15"><mark><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</mark></span><span id="line-16"><mark><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</mark></span><span id="line-17"><span class="linenos">17</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span><span class="c1"># default system prompt used by all prompt targets</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -219,11 +218,11 @@ make outbound LLM calls.</p>
|
|||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="tech_overview/request_lifecycle.html">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="tech_overview/error_target.html">
|
||||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="15 18 9 12 15 6"></polyline>
|
||||
</svg>
|
||||
Request Lifecycle
|
||||
Error Target
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue