mirror of
https://github.com/katanemo/plano.git
synced 2026-04-26 01:06:25 +02:00
deploy: e40b13be05
This commit is contained in:
parent
c62b95e38e
commit
2b5bfe09dc
8 changed files with 262 additions and 298 deletions
|
|
@ -218,20 +218,20 @@ enables scaling to very high core count CPUs.</p>
|
|||
<p>Today, only support a static bootstrap configuration file for simplicity today:</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
|
||||
</span><span id="line-2">
|
||||
</span><span id="line-3"><span class="nt">listener</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="c1"># Defines how Arch should parse the content from application/json or text/pain Content-type in the http request</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">huggingface</span>
|
||||
</span><span id="line-8">
|
||||
</span><span id="line-9"><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</span><span id="line-10"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">provider_interface</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-3"><span class="nt">listeners</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">ingress_traffic</span><span class="p">:</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="nt">timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">30s</span>
|
||||
</span><span id="line-9">
|
||||
</span><span id="line-10"><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</span><span id="line-11"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">provider_interface</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-17">
|
||||
</span><span id="line-18"><span class="c1"># default system prompt used by all prompt targets</span>
|
||||
</span><span id="line-19"><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
|
|
@ -270,20 +270,15 @@ enables scaling to very high core count CPUs.</p>
|
|||
</span><span id="line-52"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</span><span id="line-53"><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">true</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">false</span><span class="p p-Indicator">]</span>
|
||||
</span><span id="line-54">
|
||||
</span><span id="line-55"><span class="nt">error_target</span><span class="p">:</span>
|
||||
</span><span id="line-56"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-57"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
|
||||
</span><span id="line-58"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
|
||||
</span><span id="line-59">
|
||||
</span><span id="line-60"><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-61"><span class="nt">endpoints</span><span class="p">:</span>
|
||||
</span><span id="line-62"><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||||
</span><span id="line-63"><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-64"><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||||
</span><span id="line-65"><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-66"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
|
||||
</span><span id="line-67"><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-68"><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span><span id="line-55"><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-56"><span class="nt">endpoints</span><span class="p">:</span>
|
||||
</span><span id="line-57"><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||||
</span><span id="line-58"><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-59"><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||||
</span><span id="line-60"><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-61"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
|
||||
</span><span id="line-62"><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-63"><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue