mirror of
https://github.com/katanemo/plano.git
synced 2026-05-02 04:12:56 +02:00
541 lines
No EOL
34 KiB
HTML
541 lines
No EOL
34 KiB
HTML
|
||
<!DOCTYPE html>
|
||
|
||
|
||
<html lang="en" data-content_root="./" >
|
||
|
||
<head>
|
||
<meta charset="utf-8" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||
|
||
<title>Configuration Reference — Arch 0.1-beta documentation</title>
|
||
|
||
|
||
|
||
<script data-cfasync="false">
|
||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||
</script>
|
||
|
||
<!-- Loaded before other Sphinx assets -->
|
||
<link href="_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||
<link href="_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||
<link href="_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||
|
||
|
||
<link href="_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||
|
||
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=a746c00c" />
|
||
<link rel="stylesheet" type="text/css" href="_static/styles/sphinx-book-theme.css?v=a3416100" />
|
||
<link rel="stylesheet" type="text/css" href="_static/copybutton.css?v=76b2166b" />
|
||
|
||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||
<link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||
<script src="_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||
|
||
<script src="_static/documentation_options.js?v=2742c0eb"></script>
|
||
<script src="_static/doctools.js?v=9a2dae69"></script>
|
||
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
|
||
<script src="_static/clipboard.min.js?v=a7894cd8"></script>
|
||
<script src="_static/copybutton.js?v=f281be69"></script>
|
||
<script src="_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||
<script>DOCUMENTATION_OPTIONS.pagename = 'configuration_reference';</script>
|
||
<link rel="icon" href="_static/favicon.ico"/>
|
||
<link rel="index" title="Index" href="genindex.html" />
|
||
<link rel="search" title="Search" href="search.html" />
|
||
<link rel="prev" title="LLMs" href="llms/llms.html" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||
<meta name="docsearch:language" content="en"/>
|
||
</head>
|
||
|
||
|
||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||
|
||
|
||
|
||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||
|
||
<div id="pst-scroll-pixel-helper"></div>
|
||
|
||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||
|
||
|
||
<input type="checkbox"
|
||
class="sidebar-toggle"
|
||
id="pst-primary-sidebar-checkbox"/>
|
||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||
|
||
<input type="checkbox"
|
||
class="sidebar-toggle"
|
||
id="pst-secondary-sidebar-checkbox"/>
|
||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||
|
||
<div class="search-button__wrapper">
|
||
<div class="search-button__overlay"></div>
|
||
<div class="search-button__search-container">
|
||
<form class="bd-search d-flex align-items-center"
|
||
action="search.html"
|
||
method="get">
|
||
<i class="fa-solid fa-magnifying-glass"></i>
|
||
<input type="search"
|
||
class="form-control"
|
||
name="q"
|
||
id="search-input"
|
||
placeholder="Search..."
|
||
aria-label="Search..."
|
||
autocomplete="off"
|
||
autocorrect="off"
|
||
autocapitalize="off"
|
||
spellcheck="false"/>
|
||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||
</form></div>
|
||
</div>
|
||
|
||
<div class="pst-async-banner-revealer d-none">
|
||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||
</div>
|
||
|
||
|
||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||
</header>
|
||
|
||
|
||
<div class="bd-container">
|
||
<div class="bd-container__inner bd-page-width">
|
||
|
||
|
||
|
||
<div class="bd-sidebar-primary bd-sidebar">
|
||
|
||
|
||
|
||
<div class="sidebar-header-items sidebar-primary__section">
|
||
|
||
|
||
|
||
|
||
</div>
|
||
|
||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||
<div class="sidebar-primary-item">
|
||
|
||
|
||
|
||
|
||
|
||
<a class="navbar-brand logo" href="root.html">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<img src="_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
||
<script>document.write(`<img src="_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
||
|
||
|
||
</a></div>
|
||
<div class="sidebar-primary-item">
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="fa-solid fa-magnifying-glass"></i>
|
||
<span class="search-button__default-text">Search</span>
|
||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||
</button>
|
||
`);
|
||
</script></div>
|
||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||
<div class="bd-toc-item navbar-nav active">
|
||
<ul class="current nav bd-sidenav">
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="intro/intro.html">Introduction</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="intro/what_is_arch.html">What is Arch</a></li>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="intro/architecture/architecture.html">Technical Architecture</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="intro/architecture/intro/terminology.html">Terminology</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="intro/architecture/intro/threading_model.html">Threading model</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="intro/architecture/listeners/listeners.html">Listener</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="intro/architecture/prompt_processing/prompt_processing.html">Prompts</a></li>
|
||
|
||
|
||
|
||
|
||
<li class="toctree-l3"><a class="reference internal" href="intro/architecture/listeners/llm_provider.html">LLM Provider</a></li>
|
||
|
||
<li class="toctree-l3"><a class="reference internal" href="intro/architecture/model_serving/model_serving.html">Model Serving</a></li>
|
||
</ul>
|
||
</details></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="intro/life_of_a_request.html">Life of a Request</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="intro/getting_help.html">Getting help</a></li>
|
||
</ul>
|
||
</details></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="getting_started/getting_started.html">Getting Started</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="observability/tracing.html">Tracing</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="observability/stats.html">Monitoring</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="observability/access_logs.html">Access Logging</a></li>
|
||
</ul>
|
||
</details></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="llms/llms.html">LLMs</a></li>
|
||
<li class="toctree-l1 current active"><a class="current reference internal" href="#">Configuration Reference</a></li>
|
||
</ul>
|
||
|
||
</div>
|
||
</nav></div>
|
||
</div>
|
||
|
||
|
||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||
</div>
|
||
|
||
<div id="rtd-footer-container"></div>
|
||
|
||
|
||
</div>
|
||
|
||
<main id="main-content" class="bd-main" role="main">
|
||
|
||
|
||
|
||
<div class="sbt-scroll-pixel-helper"></div>
|
||
|
||
<div class="bd-content">
|
||
<div class="bd-article-container">
|
||
|
||
<div class="bd-header-article d-print-none">
|
||
<div class="header-article-items header-article__inner">
|
||
|
||
<div class="header-article-items__start">
|
||
|
||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<span class="fa-solid fa-bars"></span>
|
||
</button></div>
|
||
|
||
</div>
|
||
|
||
|
||
<div class="header-article-items__end">
|
||
|
||
<div class="header-article-item">
|
||
|
||
<div class="article-header-buttons">
|
||
|
||
|
||
|
||
|
||
|
||
<div class="dropdown dropdown-download-buttons">
|
||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||
<i class="fas fa-download"></i>
|
||
</button>
|
||
<ul class="dropdown-menu">
|
||
|
||
|
||
|
||
<li><a href="_sources/configuration_reference.rst" target="_blank"
|
||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||
title="Download source file"
|
||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fas fa-file"></i>
|
||
</span>
|
||
<span class="btn__text-container">.rst</span>
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
<li>
|
||
<button onclick="window.print()"
|
||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||
title="Print to PDF"
|
||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fas fa-file-pdf"></i>
|
||
</span>
|
||
<span class="btn__text-container">.pdf</span>
|
||
</button>
|
||
</li>
|
||
|
||
</ul>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
<button onclick="toggleFullScreen()"
|
||
class="btn btn-sm btn-fullscreen-button"
|
||
title="Fullscreen mode"
|
||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fas fa-expand"></i>
|
||
</span>
|
||
|
||
</button>
|
||
|
||
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||
</button>
|
||
`);
|
||
</script>
|
||
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||
</button>
|
||
`);
|
||
</script>
|
||
|
||
</div></div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div id="jb-print-docs-body" class="onlyprint">
|
||
<h1>Configuration Reference</h1>
|
||
<!-- Table of contents -->
|
||
<div id="print-main-content">
|
||
<div id="jb-print-toc">
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div id="searchbox"></div>
|
||
<article class="bd-article">
|
||
|
||
<section id="configuration-reference">
|
||
<h1>Configuration Reference<a class="headerlink" href="#configuration-reference" title="Link to this heading">#</a></h1>
|
||
<p>The following is a complete reference of the <code class="docutils literal notranslate"><span class="pre">prompt-conifg.yml</span></code> that controls the behavior of a single instance of
|
||
the Arch gateway. We’ve kept things simple (less than 80 lines) and held off on exposing additional functionality (for
|
||
e.g. suppporting push observability stats, managing prompt-endpoints as virtual cluster, exposing more load balancing
|
||
options, etc). Our belief that the simple things, should be simple. So we offert good defaults for developers, so
|
||
that they can spend more of their time in building features unique to their AI experience.</p>
|
||
<div class="literal-block-wrapper docutils container" id="id1">
|
||
<div class="code-block-caption"><span class="caption-text"><a class="reference download internal" download="" href="_downloads/594e1d60c04207f4d07a934ce337cbce/prompt-config-full-reference.yml"><code class="xref download docutils literal notranslate"><span class="pre">prompt-config-full-reference-beta-1-0.yml</span></code></a></span><a class="headerlink" href="#id1" title="Link to this code">#</a></div>
|
||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||
<span class="linenos"> 2</span>
|
||
<span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
|
||
<span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
|
||
<span class="linenos"> 5</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
|
||
<span class="linenos"> 6</span><span class="w"> </span><span class="c1"># Defines how Arch should parse the content from application/json or text/pain Content-type in the http request</span>
|
||
<span class="linenos"> 7</span><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">huggingface</span>
|
||
<span class="linenos"> 8</span><span class="w"> </span><span class="nt">common_tls_context</span><span class="p">:</span><span class="w"> </span><span class="c1"># If you configure port 443, you'll need to update the listener with your TLS certificates</span>
|
||
<span class="linenos"> 9</span><span class="w"> </span><span class="nt">tls_certificates</span><span class="p">:</span>
|
||
<span class="linenos"> 10</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">certificate_chain</span><span class="p">:</span>
|
||
<span class="linenos"> 11</span><span class="w"> </span><span class="nt">filename</span><span class="p">:</span><span class="w"> </span><span class="s">"/etc/certs/cert.pem"</span>
|
||
<span class="linenos"> 12</span><span class="w"> </span><span class="nt">private_key</span><span class="p">:</span>
|
||
<span class="linenos"> 13</span><span class="w"> </span><span class="nt">filename</span><span class="p">:</span><span class="w"> </span><span class="s">"/etc/certs/key.pem"</span>
|
||
<span class="linenos"> 14</span>
|
||
<span class="linenos"> 15</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||
<span class="linenos"> 16</span><span class="nt">endpoints</span><span class="p">:</span>
|
||
<span class="linenos"> 17</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||
<span class="linenos"> 18</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||
<span class="linenos"> 19</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||
<span class="linenos"> 20</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||
<span class="linenos"> 21</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
|
||
<span class="linenos"> 22</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||
<span class="linenos"> 23</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||
<span class="linenos"> 24</span>
|
||
<span class="linenos"> 25</span><span class="w"> </span><span class="nt">mistral_local</span><span class="p">:</span>
|
||
<span class="linenos"> 26</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:8001"</span>
|
||
<span class="linenos"> 27</span>
|
||
<span class="linenos"> 28</span><span class="w"> </span><span class="nt">error_target</span><span class="p">:</span>
|
||
<span class="linenos"> 29</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"error_target_1"</span>
|
||
<span class="linenos"> 30</span>
|
||
<span class="linenos"> 31</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||
<span class="linenos"> 32</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||
<span class="linenos"> 33</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||
<span class="linenos"> 34</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
|
||
<span class="linenos"> 35</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||
<span class="linenos"> 36</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||
<span class="linenos"> 37</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||
<span class="linenos"> 38</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||
<span class="linenos"> 39</span><span class="w"> </span><span class="nt">rate_limits</span><span class="p">:</span>
|
||
<span class="linenos"> 40</span><span class="w"> </span><span class="nt">selector</span><span class="p">:</span><span class="w"> </span><span class="c1">#optional headers, to add rate limiting based on http headers like JWT tokens or API keys</span>
|
||
<span class="linenos"> 41</span><span class="w"> </span><span class="nt">http_header</span><span class="p">:</span>
|
||
<span class="linenos"> 42</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Authorization"</span>
|
||
<span class="linenos"> 43</span><span class="w"> </span><span class="nt">value</span><span class="p">:</span><span class="w"> </span><span class="s">""</span><span class="w"> </span><span class="c1"># Empty value means each separate value has a separate limit</span>
|
||
<span class="linenos"> 44</span><span class="w"> </span><span class="nt">limit</span><span class="p">:</span>
|
||
<span class="linenos"> 45</span><span class="w"> </span><span class="nt">tokens</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">100000</span><span class="w"> </span><span class="c1"># Tokens per unit</span>
|
||
<span class="linenos"> 46</span><span class="w"> </span><span class="nt">unit</span><span class="p">:</span><span class="w"> </span><span class="s">"minute"</span>
|
||
<span class="linenos"> 47</span>
|
||
<span class="linenos"> 48</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Mistral8x7b"</span>
|
||
<span class="linenos"> 49</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"mistral"</span>
|
||
<span class="linenos"> 50</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_API_KEY</span>
|
||
<span class="linenos"> 51</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="s">"mistral-8x7b"</span>
|
||
<span class="linenos"> 52</span>
|
||
<span class="linenos"> 53</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"MistralLocal7b"</span>
|
||
<span class="linenos"> 54</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"local"</span>
|
||
<span class="linenos"> 55</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="s">"mistral-7b-instruct"</span>
|
||
<span class="linenos"> 56</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"mistral_local"</span>
|
||
<span class="linenos"> 57</span>
|
||
<span class="linenos"> 58</span><span class="c1"># provides a way to override default settings for the arch system</span>
|
||
<span class="linenos"> 59</span><span class="nt">overrides</span><span class="p">:</span>
|
||
<span class="linenos"> 60</span><span class="w"> </span><span class="c1"># By default Arch uses an NLI + embedding approach to match an incomming prompt to a prompt target.</span>
|
||
<span class="linenos"> 61</span><span class="w"> </span><span class="c1"># The intent matching threshold is kept at 0.80, you can overide this behavior if you would like</span>
|
||
<span class="linenos"> 62</span><span class="w"> </span><span class="nt">prompt_target_intent_matching_threshold</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.60</span>
|
||
<span class="linenos"> 63</span>
|
||
<span class="linenos"> 64</span><span class="c1"># default system prompt used by all prompt targets</span>
|
||
<span class="linenos"> 65</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||
<span class="linenos"> 66</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||
<span class="linenos"> 67</span>
|
||
<span class="linenos"> 68</span><span class="nt">prompt_guards</span><span class="p">:</span>
|
||
<span class="linenos"> 69</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
|
||
<span class="linenos"> 70</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
|
||
<span class="linenos"> 71</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
|
||
<span class="linenos"> 72</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="s">"Looks</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">you're</span><span class="nv"> </span><span class="s">curious</span><span class="nv"> </span><span class="s">about</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">abilities,</span><span class="nv"> </span><span class="s">but</span><span class="nv"> </span><span class="s">I</span><span class="nv"> </span><span class="s">can</span><span class="nv"> </span><span class="s">only</span><span class="nv"> </span><span class="s">provide</span><span class="nv"> </span><span class="s">assistance</span><span class="nv"> </span><span class="s">within</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">programmed</span><span class="nv"> </span><span class="s">parameters."</span>
|
||
<span class="linenos"> 73</span>
|
||
<span class="linenos"> 74</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||
<span class="linenos"> 75</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"reboot_network_device"</span>
|
||
<span class="linenos"> 76</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Helps</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">operators</span><span class="nv"> </span><span class="s">perform</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">operations</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">rebooting</span><span class="nv"> </span><span class="s">a</span><span class="nv"> </span><span class="s">device."</span>
|
||
<span class="linenos"> 77</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||
<span class="linenos"> 78</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||
<span class="linenos"> 79</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/action"</span>
|
||
<span class="linenos"> 80</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||
<span class="linenos"> 81</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_id"</span>
|
||
<span class="linenos"> 82</span><span class="w"> </span><span class="c1"># additional type options include: int | float | bool | string | list | dict</span>
|
||
<span class="linenos"> 83</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
|
||
<span class="linenos"> 84</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Identifier</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
|
||
<span class="linenos"> 85</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||
<span class="linenos"> 86</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"confirmation"</span>
|
||
<span class="linenos"> 87</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
|
||
<span class="linenos"> 88</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Confirmation</span><span class="nv"> </span><span class="s">flag</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">proceed</span><span class="nv"> </span><span class="s">with</span><span class="nv"> </span><span class="s">reboot."</span>
|
||
<span class="linenos"> 89</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="s">"no"</span>
|
||
<span class="linenos"> 90</span><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">yes</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">no</span><span class="p p-Indicator">]</span>
|
||
<span class="linenos"> 91</span>
|
||
<span class="linenos"> 92</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"information_extraction"</span>
|
||
<span class="linenos"> 93</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||
<span class="linenos"> 94</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"This</span><span class="nv"> </span><span class="s">prompt</span><span class="nv"> </span><span class="s">handles</span><span class="nv"> </span><span class="s">all</span><span class="nv"> </span><span class="s">scenarios</span><span class="nv"> </span><span class="s">that</span><span class="nv"> </span><span class="s">are</span><span class="nv"> </span><span class="s">question</span><span class="nv"> </span><span class="s">and</span><span class="nv"> </span><span class="s">answer</span><span class="nv"> </span><span class="s">in</span><span class="nv"> </span><span class="s">nature.</span><span class="nv"> </span><span class="s">Like</span><span class="nv"> </span><span class="s">summarization,</span><span class="nv"> </span><span class="s">information</span><span class="nv"> </span><span class="s">extraction,</span><span class="nv"> </span><span class="s">etc."</span>
|
||
<span class="linenos"> 95</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||
<span class="linenos"> 96</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||
<span class="linenos"> 97</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/summary"</span>
|
||
<span class="linenos"> 98</span><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
|
||
<span class="linenos"> 99</span><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||
<span class="linenos">100</span><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
|
||
<span class="linenos">101</span><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||
<span class="linenos">102</span><span class="w"> </span><span class="no">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
|
||
<span class="linenos">103</span>
|
||
<span class="linenos">104</span><span class="nt">error_target</span><span class="p">:</span>
|
||
<span class="linenos">105</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||
<span class="linenos">106</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
|
||
<span class="linenos">107</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
|
||
<span class="linenos">108</span>
|
||
<span class="linenos">109</span><span class="nt">tracing</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">100</span><span class="w"> </span><span class="c1">#sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.</span>
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
|
||
</article>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<footer class="prev-next-footer d-print-none">
|
||
|
||
<div class="prev-next-area">
|
||
<a class="left-prev"
|
||
href="llms/llms.html"
|
||
title="previous page">
|
||
<i class="fa-solid fa-angle-left"></i>
|
||
<div class="prev-next-info">
|
||
<p class="prev-next-subtitle">previous</p>
|
||
<p class="prev-next-title">LLMs</p>
|
||
</div>
|
||
</a>
|
||
</div>
|
||
</footer>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
|
||
</div>
|
||
<footer class="bd-footer-content">
|
||
|
||
<div class="bd-footer-content__inner container">
|
||
|
||
<div class="footer-item">
|
||
|
||
<p class="component-author">
|
||
By Katanemo Labs, Inc
|
||
</p>
|
||
|
||
</div>
|
||
|
||
<div class="footer-item">
|
||
|
||
|
||
<p class="copyright">
|
||
|
||
© Copyright 2024, Katanemo Labs, Inc.
|
||
<br/>
|
||
|
||
</p>
|
||
|
||
</div>
|
||
|
||
<div class="footer-item">
|
||
|
||
</div>
|
||
|
||
<div class="footer-item">
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</footer>
|
||
|
||
|
||
</main>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||
<script src="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||
<script src="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||
|
||
<footer class="bd-footer">
|
||
</footer>
|
||
</body>
|
||
</html> |