mirror of
https://github.com/katanemo/plano.git
synced 2026-05-15 11:02:39 +02:00
deploy: c9b95c7c9f15441738c938bb27cca66eb86990e3
This commit is contained in:
parent
c901be3b22
commit
fd31269b72
205 changed files with 20640 additions and 1 deletions
465
intro/architecture/architecture.html
Normal file
465
intro/architecture/architecture.html
Normal file
|
|
@ -0,0 +1,465 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en" data-content_root="../../" >
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<title>Technical Architecture — Arch 0.1-beta documentation</title>
|
||||
|
||||
|
||||
|
||||
<script data-cfasync="false">
|
||||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||||
</script>
|
||||
|
||||
<!-- Loaded before other Sphinx assets -->
|
||||
<link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
|
||||
|
||||
<link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=a746c00c" />
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/styles/sphinx-book-theme.css?v=a3416100" />
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
|
||||
|
||||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||
<link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<script src="../../_static/documentation_options.js?v=2742c0eb"></script>
|
||||
<script src="../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
|
||||
<script src="../../_static/copybutton.js?v=f281be69"></script>
|
||||
<script src="../../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||||
<script>DOCUMENTATION_OPTIONS.pagename = 'intro/architecture/architecture';</script>
|
||||
<link rel="icon" href="../../_static/favicon.ico"/>
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
<link rel="next" title="Terminology" href="intro/terminology.html" />
|
||||
<link rel="prev" title="What is Arch" href="../what_is_arch.html" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
</head>
|
||||
|
||||
|
||||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||
|
||||
|
||||
|
||||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||||
|
||||
<div id="pst-scroll-pixel-helper"></div>
|
||||
|
||||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||||
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-primary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-secondary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||||
|
||||
<div class="search-button__wrapper">
|
||||
<div class="search-button__overlay"></div>
|
||||
<div class="search-button__search-container">
|
||||
<form class="bd-search d-flex align-items-center"
|
||||
action="../../search.html"
|
||||
method="get">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<input type="search"
|
||||
class="form-control"
|
||||
name="q"
|
||||
id="search-input"
|
||||
placeholder="Search..."
|
||||
aria-label="Search..."
|
||||
autocomplete="off"
|
||||
autocorrect="off"
|
||||
autocapitalize="off"
|
||||
spellcheck="false"/>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||
</form></div>
|
||||
</div>
|
||||
|
||||
<div class="pst-async-banner-revealer d-none">
|
||||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||||
</div>
|
||||
|
||||
|
||||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||||
</header>
|
||||
|
||||
|
||||
<div class="bd-container">
|
||||
<div class="bd-container__inner bd-page-width">
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-primary bd-sidebar">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items sidebar-primary__section">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a class="navbar-brand logo" href="../../root.html">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="../../_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
||||
<script>document.write(`<img src="../../_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
||||
|
||||
|
||||
</a></div>
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||||
<div class="bd-toc-item navbar-nav active">
|
||||
<ul class="current nav bd-sidenav">
|
||||
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../intro.html">Introduction</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../what_is_arch.html">What is Arch</a></li>
|
||||
<li class="toctree-l2 current active has-children"><a class="current reference internal" href="#">Technical Architecture</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="intro/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="intro/threading_model.html">Threading model</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="listeners/listeners.html">Listener</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="prompt_processing/prompt_processing.html">Prompts</a></li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="listeners/llm_provider.html">LLM Provider</a></li>
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="model_serving/model_serving.html">Model Serving</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../life_of_a_request.html">Life of a Request</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../getting_help.html">Getting help</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../getting_started/getting_started.html">Getting Started</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
||||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../observability/stats.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../observability/access_logs.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../llms/llms.html">LLMs</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||
</div>
|
||||
|
||||
<div id="rtd-footer-container"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<main id="main-content" class="bd-main" role="main">
|
||||
|
||||
|
||||
|
||||
<div class="sbt-scroll-pixel-helper"></div>
|
||||
|
||||
<div class="bd-content">
|
||||
<div class="bd-article-container">
|
||||
|
||||
<div class="bd-header-article d-print-none">
|
||||
<div class="header-article-items header-article__inner">
|
||||
|
||||
<div class="header-article-items__start">
|
||||
|
||||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-bars"></span>
|
||||
</button></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="header-article-items__end">
|
||||
|
||||
<div class="header-article-item">
|
||||
|
||||
<div class="article-header-buttons">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="dropdown dropdown-download-buttons">
|
||||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||||
<i class="fas fa-download"></i>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
|
||||
|
||||
|
||||
<li><a href="../../_sources/intro/architecture/architecture.rst" target="_blank"
|
||||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||||
title="Download source file"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.rst</span>
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li>
|
||||
<button onclick="window.print()"
|
||||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||||
title="Print to PDF"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file-pdf"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.pdf</span>
|
||||
</button>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<button onclick="toggleFullScreen()"
|
||||
class="btn btn-sm btn-fullscreen-button"
|
||||
title="Fullscreen mode"
|
||||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-expand"></i>
|
||||
</span>
|
||||
|
||||
</button>
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="jb-print-docs-body" class="onlyprint">
|
||||
<h1>Technical Architecture</h1>
|
||||
<!-- Table of contents -->
|
||||
<div id="print-main-content">
|
||||
<div id="jb-print-toc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="searchbox"></div>
|
||||
<article class="bd-article">
|
||||
|
||||
<section id="technical-architecture">
|
||||
<h1>Technical Architecture<a class="headerlink" href="#technical-architecture" title="Link to this heading">#</a></h1>
|
||||
<div class="toctree-wrapper compound">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="intro/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="intro/threading_model.html">Threading model</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="listeners/listeners.html">Listener</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="listeners/listeners.html#downstream-ingress">Downstream (Ingress)</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="listeners/listeners.html#upstream-egress">Upstream (Egress)</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="listeners/listeners.html#configure-listener">Configure Listener</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="prompt_processing/prompt_processing.html">Prompts</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="prompt_processing/prompt_processing.html#messages">Messages</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="prompt_processing/prompt_processing.html#prompt-guardrails">Prompt Guardrails</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="prompt_processing/prompt_processing.html#prompt-targets">Prompt Targets</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt_processing/prompt_processing.html#intent-detection-and-prompt-matching">Intent Detection and Prompt Matching:</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt_processing/prompt_processing.html#agentic-apps-via-prompt-targets">Agentic Apps via Prompt Targets</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="prompt_processing/prompt_processing.html#prompting-llms">Prompting LLMs</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt_processing/prompt_processing.html#example-using-the-arch-python-sdk">Example: Using the Arch Python SDK</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt_processing/prompt_processing.html#example-using-openai-client-with-arch-as-an-egress-gateway">Example: Using OpenAI Client with Arch as an Egress Gateway</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="listeners/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="listeners/llm_provider.html#example-using-the-arch-python-sdk">Example: Using the Arch Python SDK</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="model_serving/model_serving.html">Model Serving</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving/model_serving.html#local-serving-cpu-moderate">Local Serving (CPU - Moderate)</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving/model_serving.html#local-serving-gpu-fast">Local Serving (GPU- Fast)</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving/model_serving.html#cloud-serving-gpu-blazing-fast">Cloud Serving (GPU - Blazing Fast)</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
|
||||
</article>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<footer class="prev-next-footer d-print-none">
|
||||
|
||||
<div class="prev-next-area">
|
||||
<a class="left-prev"
|
||||
href="../what_is_arch.html"
|
||||
title="previous page">
|
||||
<i class="fa-solid fa-angle-left"></i>
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">previous</p>
|
||||
<p class="prev-next-title">What is Arch</p>
|
||||
</div>
|
||||
</a>
|
||||
<a class="right-next"
|
||||
href="intro/terminology.html"
|
||||
title="next page">
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">next</p>
|
||||
<p class="prev-next-title">Terminology</p>
|
||||
</div>
|
||||
<i class="fa-solid fa-angle-right"></i>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
<footer class="bd-footer-content">
|
||||
|
||||
<div class="bd-footer-content__inner container">
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="component-author">
|
||||
By Katanemo Labs, Inc
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
|
||||
<p class="copyright">
|
||||
|
||||
© Copyright 2024, Katanemo Labs, Inc.
|
||||
<br/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
|
||||
</main>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||
<script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<footer class="bd-footer">
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
464
intro/architecture/intro/terminology.html
Normal file
464
intro/architecture/intro/terminology.html
Normal file
|
|
@ -0,0 +1,464 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en" data-content_root="../../../" >
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<title>Terminology — Arch 0.1-beta documentation</title>
|
||||
|
||||
|
||||
|
||||
<script data-cfasync="false">
|
||||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||||
</script>
|
||||
|
||||
<!-- Loaded before other Sphinx assets -->
|
||||
<link href="../../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
|
||||
|
||||
<link href="../../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=a746c00c" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/styles/sphinx-book-theme.css?v=a3416100" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
|
||||
|
||||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<script src="../../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<script src="../../../_static/documentation_options.js?v=2742c0eb"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/clipboard.min.js?v=a7894cd8"></script>
|
||||
<script src="../../../_static/copybutton.js?v=f281be69"></script>
|
||||
<script src="../../../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||||
<script>DOCUMENTATION_OPTIONS.pagename = 'intro/architecture/intro/terminology';</script>
|
||||
<link rel="icon" href="../../../_static/favicon.ico"/>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
<link rel="next" title="Threading model" href="threading_model.html" />
|
||||
<link rel="prev" title="Technical Architecture" href="../architecture.html" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
</head>
|
||||
|
||||
|
||||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||
|
||||
|
||||
|
||||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||||
|
||||
<div id="pst-scroll-pixel-helper"></div>
|
||||
|
||||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||||
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-primary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-secondary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||||
|
||||
<div class="search-button__wrapper">
|
||||
<div class="search-button__overlay"></div>
|
||||
<div class="search-button__search-container">
|
||||
<form class="bd-search d-flex align-items-center"
|
||||
action="../../../search.html"
|
||||
method="get">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<input type="search"
|
||||
class="form-control"
|
||||
name="q"
|
||||
id="search-input"
|
||||
placeholder="Search..."
|
||||
aria-label="Search..."
|
||||
autocomplete="off"
|
||||
autocorrect="off"
|
||||
autocapitalize="off"
|
||||
spellcheck="false"/>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||
</form></div>
|
||||
</div>
|
||||
|
||||
<div class="pst-async-banner-revealer d-none">
|
||||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||||
</div>
|
||||
|
||||
|
||||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||||
</header>
|
||||
|
||||
|
||||
<div class="bd-container">
|
||||
<div class="bd-container__inner bd-page-width">
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-primary bd-sidebar">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items sidebar-primary__section">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a class="navbar-brand logo" href="../../../root.html">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="../../../_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
||||
<script>document.write(`<img src="../../../_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
||||
|
||||
|
||||
</a></div>
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||||
<div class="bd-toc-item navbar-nav active">
|
||||
<ul class="current nav bd-sidenav">
|
||||
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../../intro.html">Introduction</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../what_is_arch.html">What is Arch</a></li>
|
||||
<li class="toctree-l2 current active has-children"><a class="reference internal" href="../architecture.html">Technical Architecture</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l3 current active"><a class="current reference internal" href="#">Terminology</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="threading_model.html">Threading model</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../listeners/listeners.html">Listener</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../prompt_processing/prompt_processing.html">Prompts</a></li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="../listeners/llm_provider.html">LLM Provider</a></li>
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="../model_serving/model_serving.html">Model Serving</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../life_of_a_request.html">Life of a Request</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../getting_help.html">Getting help</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/getting_started.html">Getting Started</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
||||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/stats.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/access_logs.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../llms/llms.html">LLMs</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||
</div>
|
||||
|
||||
<div id="rtd-footer-container"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<main id="main-content" class="bd-main" role="main">
|
||||
|
||||
|
||||
|
||||
<div class="sbt-scroll-pixel-helper"></div>
|
||||
|
||||
<div class="bd-content">
|
||||
<div class="bd-article-container">
|
||||
|
||||
<div class="bd-header-article d-print-none">
|
||||
<div class="header-article-items header-article__inner">
|
||||
|
||||
<div class="header-article-items__start">
|
||||
|
||||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-bars"></span>
|
||||
</button></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="header-article-items__end">
|
||||
|
||||
<div class="header-article-item">
|
||||
|
||||
<div class="article-header-buttons">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="dropdown dropdown-download-buttons">
|
||||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||||
<i class="fas fa-download"></i>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
|
||||
|
||||
|
||||
<li><a href="../../../_sources/intro/architecture/intro/terminology.rst" target="_blank"
|
||||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||||
title="Download source file"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.rst</span>
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li>
|
||||
<button onclick="window.print()"
|
||||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||||
title="Print to PDF"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file-pdf"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.pdf</span>
|
||||
</button>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<button onclick="toggleFullScreen()"
|
||||
class="btn btn-sm btn-fullscreen-button"
|
||||
title="Fullscreen mode"
|
||||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-expand"></i>
|
||||
</span>
|
||||
|
||||
</button>
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="jb-print-docs-body" class="onlyprint">
|
||||
<h1>Terminology</h1>
|
||||
<!-- Table of contents -->
|
||||
<div id="print-main-content">
|
||||
<div id="jb-print-toc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="searchbox"></div>
|
||||
<article class="bd-article">
|
||||
|
||||
<section id="terminology">
|
||||
<span id="arch-terminology"></span><h1>Terminology<a class="headerlink" href="#terminology" title="Link to this heading">#</a></h1>
|
||||
<p>A few definitions before we dive into the main architecture documentation. Arch borrows from Envoy’s terminology
|
||||
to keep things consistent in logs, traces and in code.</p>
|
||||
<p><strong>Downstream(Ingress)</strong>: An downstream client (web application, etc.) connects to Arch, sends prompts, and receives responses.</p>
|
||||
<p><strong>Upstream(Egress)</strong>: An upstream host that receives connections and prompts from Arch, and returns context or responses for a prompt</p>
|
||||
<a class="reference internal image-reference" href="../../../_images/network-topology-ingress-egress.jpg"><img alt="../../../_images/network-topology-ingress-egress.jpg" class="align-center" src="../../../_images/network-topology-ingress-egress.jpg" style="width: 100%;" />
|
||||
</a>
|
||||
<p><strong>Listener</strong>: A listener is a named network location (e.g., port, address, path etc.) that Arch listens on to process prompts
|
||||
before forwarding them to your application server endpoints. rch enables you to configure one listener for downstream connections
|
||||
(like port 80, 443) and creates a separate internal listener for calls that initiate from your application code to LLMs.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>When you start Arch, you specify a listener address/port that you want to bind downstream. But, Arch uses are predefined port
|
||||
that you can use (<code class="docutils literal notranslate"><span class="pre">127.0.0.1:10000</span></code>) to proxy egress calls originating from your application to LLMs (API-based or hosted).
|
||||
For more details, check out <a class="reference internal" href="../listeners/llm_provider.html#llm-providers"><span class="std std-ref">LLM providers</span></a></p>
|
||||
</div>
|
||||
<p><strong>Instance</strong>: An instance of the Arch gateway. When you start Arch it creates at most two processes. One to handle Layer 7
|
||||
networking operations (auth, tls, observability, etc) and the second process to serve models that enable it to make smart
|
||||
decisions on how to accept, handle and forward prompts. The second process is optional, as the model serving sevice could be
|
||||
hosted on a different network (an API call). But these two processes are considered a single instance of Arch.</p>
|
||||
<p><strong>Prompt Targets</strong>: Arch offers a primitive called <code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code> to help separate business logic from undifferentiated
|
||||
work in building generative AI apps. Prompt targets are endpoints that receive prompts that are processed by Arch.
|
||||
For example, Arch enriches incoming prompts with metadata like knowing when a request is a follow-up or clarifying prompt
|
||||
so that you can build faster, more accurate retrieval (RAG) apps. To support agentic apps, like scheduling travel plans or
|
||||
sharing comments on a document - via prompts, Bolt uses its function calling abilities to extract critical information from
|
||||
the incoming prompt (or a set of prompts) needed by a downstream backend API or function call before calling it directly.</p>
|
||||
<p><strong>Error Targets</strong>: Error targets are those endpoints that receive forwarded errors from Arch when issues arise,
|
||||
such as failing to properly call a function/API, detecting violations of guardrails, or encountering other processing errors.
|
||||
These errors are communicated to the application via headers (X-Arch-[ERROR-TYPE]), allowing it to handle the errors gracefully
|
||||
and take appropriate actions.</p>
|
||||
<p><strong>Model Serving</strong>: Arch is a set of <strong>two</strong> self-contained processes that are designed to run alongside your application servers
|
||||
(or on a separate hostconnected via a network).The <strong>model serving</strong> process helps Arch make intelligent decisions about the
|
||||
incoming prompts. The model server is designed to call the (fast) purpose-built <a class="reference internal" href="../../../llms/llms.html#llms-in-arch"><span class="std std-ref">LLMs</span></a> in Arch.</p>
|
||||
</section>
|
||||
|
||||
|
||||
</article>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<footer class="prev-next-footer d-print-none">
|
||||
|
||||
<div class="prev-next-area">
|
||||
<a class="left-prev"
|
||||
href="../architecture.html"
|
||||
title="previous page">
|
||||
<i class="fa-solid fa-angle-left"></i>
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">previous</p>
|
||||
<p class="prev-next-title">Technical Architecture</p>
|
||||
</div>
|
||||
</a>
|
||||
<a class="right-next"
|
||||
href="threading_model.html"
|
||||
title="next page">
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">next</p>
|
||||
<p class="prev-next-title">Threading model</p>
|
||||
</div>
|
||||
<i class="fa-solid fa-angle-right"></i>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
<footer class="bd-footer-content">
|
||||
|
||||
<div class="bd-footer-content__inner container">
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="component-author">
|
||||
By Katanemo Labs, Inc
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
|
||||
<p class="copyright">
|
||||
|
||||
© Copyright 2024, Katanemo Labs, Inc.
|
||||
<br/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
|
||||
</main>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||
<script src="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<footer class="bd-footer">
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
445
intro/architecture/intro/threading_model.html
Normal file
445
intro/architecture/intro/threading_model.html
Normal file
|
|
@ -0,0 +1,445 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en" data-content_root="../../../" >
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<title>Threading model — Arch 0.1-beta documentation</title>
|
||||
|
||||
|
||||
|
||||
<script data-cfasync="false">
|
||||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||||
</script>
|
||||
|
||||
<!-- Loaded before other Sphinx assets -->
|
||||
<link href="../../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
|
||||
|
||||
<link href="../../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=a746c00c" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/styles/sphinx-book-theme.css?v=a3416100" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
|
||||
|
||||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<script src="../../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<script src="../../../_static/documentation_options.js?v=2742c0eb"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/clipboard.min.js?v=a7894cd8"></script>
|
||||
<script src="../../../_static/copybutton.js?v=f281be69"></script>
|
||||
<script src="../../../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||||
<script>DOCUMENTATION_OPTIONS.pagename = 'intro/architecture/intro/threading_model';</script>
|
||||
<link rel="icon" href="../../../_static/favicon.ico"/>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
<link rel="next" title="Listener" href="../listeners/listeners.html" />
|
||||
<link rel="prev" title="Terminology" href="terminology.html" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
</head>
|
||||
|
||||
|
||||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||
|
||||
|
||||
|
||||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||||
|
||||
<div id="pst-scroll-pixel-helper"></div>
|
||||
|
||||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||||
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-primary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-secondary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||||
|
||||
<div class="search-button__wrapper">
|
||||
<div class="search-button__overlay"></div>
|
||||
<div class="search-button__search-container">
|
||||
<form class="bd-search d-flex align-items-center"
|
||||
action="../../../search.html"
|
||||
method="get">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<input type="search"
|
||||
class="form-control"
|
||||
name="q"
|
||||
id="search-input"
|
||||
placeholder="Search..."
|
||||
aria-label="Search..."
|
||||
autocomplete="off"
|
||||
autocorrect="off"
|
||||
autocapitalize="off"
|
||||
spellcheck="false"/>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||
</form></div>
|
||||
</div>
|
||||
|
||||
<div class="pst-async-banner-revealer d-none">
|
||||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||||
</div>
|
||||
|
||||
|
||||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||||
</header>
|
||||
|
||||
|
||||
<div class="bd-container">
|
||||
<div class="bd-container__inner bd-page-width">
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-primary bd-sidebar">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items sidebar-primary__section">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a class="navbar-brand logo" href="../../../root.html">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="../../../_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
||||
<script>document.write(`<img src="../../../_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
||||
|
||||
|
||||
</a></div>
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||||
<div class="bd-toc-item navbar-nav active">
|
||||
<ul class="current nav bd-sidenav">
|
||||
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../../intro.html">Introduction</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../what_is_arch.html">What is Arch</a></li>
|
||||
<li class="toctree-l2 current active has-children"><a class="reference internal" href="../architecture.html">Technical Architecture</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l3"><a class="reference internal" href="terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l3 current active"><a class="current reference internal" href="#">Threading model</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../listeners/listeners.html">Listener</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../prompt_processing/prompt_processing.html">Prompts</a></li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="../listeners/llm_provider.html">LLM Provider</a></li>
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="../model_serving/model_serving.html">Model Serving</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../life_of_a_request.html">Life of a Request</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../getting_help.html">Getting help</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/getting_started.html">Getting Started</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
||||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/stats.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/access_logs.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../llms/llms.html">LLMs</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||
</div>
|
||||
|
||||
<div id="rtd-footer-container"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<main id="main-content" class="bd-main" role="main">
|
||||
|
||||
|
||||
|
||||
<div class="sbt-scroll-pixel-helper"></div>
|
||||
|
||||
<div class="bd-content">
|
||||
<div class="bd-article-container">
|
||||
|
||||
<div class="bd-header-article d-print-none">
|
||||
<div class="header-article-items header-article__inner">
|
||||
|
||||
<div class="header-article-items__start">
|
||||
|
||||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-bars"></span>
|
||||
</button></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="header-article-items__end">
|
||||
|
||||
<div class="header-article-item">
|
||||
|
||||
<div class="article-header-buttons">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="dropdown dropdown-download-buttons">
|
||||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||||
<i class="fas fa-download"></i>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
|
||||
|
||||
|
||||
<li><a href="../../../_sources/intro/architecture/intro/threading_model.rst" target="_blank"
|
||||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||||
title="Download source file"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.rst</span>
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li>
|
||||
<button onclick="window.print()"
|
||||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||||
title="Print to PDF"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file-pdf"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.pdf</span>
|
||||
</button>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<button onclick="toggleFullScreen()"
|
||||
class="btn btn-sm btn-fullscreen-button"
|
||||
title="Fullscreen mode"
|
||||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-expand"></i>
|
||||
</span>
|
||||
|
||||
</button>
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="jb-print-docs-body" class="onlyprint">
|
||||
<h1>Threading model</h1>
|
||||
<!-- Table of contents -->
|
||||
<div id="print-main-content">
|
||||
<div id="jb-print-toc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="searchbox"></div>
|
||||
<article class="bd-article">
|
||||
|
||||
<section id="threading-model">
|
||||
<span id="arch-overview-threading"></span><h1>Threading model<a class="headerlink" href="#threading-model" title="Link to this heading">#</a></h1>
|
||||
<p>Arch builds on top of Envoy’s single process with multiple threads architecture.</p>
|
||||
<p>A single <em>primary</em> thread controls various sporadic coordination tasks while some number of <em>worker</em>
|
||||
threads perform filtering, and forwarding.</p>
|
||||
<p>Once a connection is accepted, the connection spends the rest of its lifetime bound to a single worker
|
||||
thread. All the functionality around prompt handling from a downstream client is handled in a separate worker thread.
|
||||
This allows the majority of Arch to be largely single threaded (embarrassingly parallel) with a small amount
|
||||
of more complex code handling coordination between the worker threads.</p>
|
||||
<p>Generally Arch is written to be 100% non-blocking.</p>
|
||||
<div class="admonition tip">
|
||||
<p class="admonition-title">Tip</p>
|
||||
<p>For most workloads we recommend configuring the number of worker threads to be equal to the number of
|
||||
hardware threads on the machine.</p>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
|
||||
</article>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<footer class="prev-next-footer d-print-none">
|
||||
|
||||
<div class="prev-next-area">
|
||||
<a class="left-prev"
|
||||
href="terminology.html"
|
||||
title="previous page">
|
||||
<i class="fa-solid fa-angle-left"></i>
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">previous</p>
|
||||
<p class="prev-next-title">Terminology</p>
|
||||
</div>
|
||||
</a>
|
||||
<a class="right-next"
|
||||
href="../listeners/listeners.html"
|
||||
title="next page">
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">next</p>
|
||||
<p class="prev-next-title">Listener</p>
|
||||
</div>
|
||||
<i class="fa-solid fa-angle-right"></i>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
<footer class="bd-footer-content">
|
||||
|
||||
<div class="bd-footer-content__inner container">
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="component-author">
|
||||
By Katanemo Labs, Inc
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
|
||||
<p class="copyright">
|
||||
|
||||
© Copyright 2024, Katanemo Labs, Inc.
|
||||
<br/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
|
||||
</main>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||
<script src="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<footer class="bd-footer">
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
508
intro/architecture/listeners/listeners.html
Normal file
508
intro/architecture/listeners/listeners.html
Normal file
|
|
@ -0,0 +1,508 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en" data-content_root="../../../" >
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<title>Listener — Arch 0.1-beta documentation</title>
|
||||
|
||||
|
||||
|
||||
<script data-cfasync="false">
|
||||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||||
</script>
|
||||
|
||||
<!-- Loaded before other Sphinx assets -->
|
||||
<link href="../../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
|
||||
|
||||
<link href="../../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=a746c00c" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/styles/sphinx-book-theme.css?v=a3416100" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
|
||||
|
||||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<script src="../../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<script src="../../../_static/documentation_options.js?v=2742c0eb"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/clipboard.min.js?v=a7894cd8"></script>
|
||||
<script src="../../../_static/copybutton.js?v=f281be69"></script>
|
||||
<script src="../../../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||||
<script>DOCUMENTATION_OPTIONS.pagename = 'intro/architecture/listeners/listeners';</script>
|
||||
<link rel="icon" href="../../../_static/favicon.ico"/>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
<link rel="next" title="Prompts" href="../prompt_processing/prompt_processing.html" />
|
||||
<link rel="prev" title="Threading model" href="../intro/threading_model.html" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
</head>
|
||||
|
||||
|
||||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||
|
||||
|
||||
|
||||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||||
|
||||
<div id="pst-scroll-pixel-helper"></div>
|
||||
|
||||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||||
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-primary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-secondary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||||
|
||||
<div class="search-button__wrapper">
|
||||
<div class="search-button__overlay"></div>
|
||||
<div class="search-button__search-container">
|
||||
<form class="bd-search d-flex align-items-center"
|
||||
action="../../../search.html"
|
||||
method="get">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<input type="search"
|
||||
class="form-control"
|
||||
name="q"
|
||||
id="search-input"
|
||||
placeholder="Search..."
|
||||
aria-label="Search..."
|
||||
autocomplete="off"
|
||||
autocorrect="off"
|
||||
autocapitalize="off"
|
||||
spellcheck="false"/>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||
</form></div>
|
||||
</div>
|
||||
|
||||
<div class="pst-async-banner-revealer d-none">
|
||||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||||
</div>
|
||||
|
||||
|
||||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||||
</header>
|
||||
|
||||
|
||||
<div class="bd-container">
|
||||
<div class="bd-container__inner bd-page-width">
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-primary bd-sidebar">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items sidebar-primary__section">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a class="navbar-brand logo" href="../../../root.html">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="../../../_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
||||
<script>document.write(`<img src="../../../_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
||||
|
||||
|
||||
</a></div>
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||||
<div class="bd-toc-item navbar-nav active">
|
||||
<ul class="current nav bd-sidenav">
|
||||
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../../intro.html">Introduction</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../what_is_arch.html">What is Arch</a></li>
|
||||
<li class="toctree-l2 current active has-children"><a class="reference internal" href="../architecture.html">Technical Architecture</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l3"><a class="reference internal" href="../intro/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../intro/threading_model.html">Threading model</a></li>
|
||||
<li class="toctree-l3 current active"><a class="current reference internal" href="#">Listener</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../prompt_processing/prompt_processing.html">Prompts</a></li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="llm_provider.html">LLM Provider</a></li>
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="../model_serving/model_serving.html">Model Serving</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../life_of_a_request.html">Life of a Request</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../getting_help.html">Getting help</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/getting_started.html">Getting Started</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
||||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/stats.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/access_logs.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../llms/llms.html">LLMs</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||
</div>
|
||||
|
||||
<div id="rtd-footer-container"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<main id="main-content" class="bd-main" role="main">
|
||||
|
||||
|
||||
|
||||
<div class="sbt-scroll-pixel-helper"></div>
|
||||
|
||||
<div class="bd-content">
|
||||
<div class="bd-article-container">
|
||||
|
||||
<div class="bd-header-article d-print-none">
|
||||
<div class="header-article-items header-article__inner">
|
||||
|
||||
<div class="header-article-items__start">
|
||||
|
||||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-bars"></span>
|
||||
</button></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="header-article-items__end">
|
||||
|
||||
<div class="header-article-item">
|
||||
|
||||
<div class="article-header-buttons">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="dropdown dropdown-download-buttons">
|
||||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||||
<i class="fas fa-download"></i>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
|
||||
|
||||
|
||||
<li><a href="../../../_sources/intro/architecture/listeners/listeners.rst" target="_blank"
|
||||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||||
title="Download source file"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.rst</span>
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li>
|
||||
<button onclick="window.print()"
|
||||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||||
title="Print to PDF"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file-pdf"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.pdf</span>
|
||||
</button>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<button onclick="toggleFullScreen()"
|
||||
class="btn btn-sm btn-fullscreen-button"
|
||||
title="Fullscreen mode"
|
||||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-expand"></i>
|
||||
</span>
|
||||
|
||||
</button>
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
<button class="sidebar-toggle secondary-toggle btn btn-sm" title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-list"></span>
|
||||
</button>
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="jb-print-docs-body" class="onlyprint">
|
||||
<h1>Listener</h1>
|
||||
<!-- Table of contents -->
|
||||
<div id="print-main-content">
|
||||
<div id="jb-print-toc">
|
||||
|
||||
<div>
|
||||
<h2> Contents </h2>
|
||||
</div>
|
||||
<nav aria-label="Page">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#downstream-ingress">Downstream (Ingress)</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#upstream-egress">Upstream (Egress)</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#configure-listener">Configure Listener</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="searchbox"></div>
|
||||
<article class="bd-article">
|
||||
|
||||
<section id="listener">
|
||||
<span id="arch-overview-listeners"></span><h1>Listener<a class="headerlink" href="#listener" title="Link to this heading">#</a></h1>
|
||||
<p>Listener is a top level primitive in Arch, which simplifies the configuration required to bind incoming
|
||||
connections from downstream clients, and for egress connections to LLMs (hosted or API)</p>
|
||||
<p>Arch builds on Envoy’s Listener subsystem to streamline connection managemet for developers. Arch minimizes
|
||||
the complexity of Envoy’s listener setup by using best-practices and exposing only essential settings,
|
||||
making it easier for developers to bind connections without deep knowledge of Envoy’s configuration model. This
|
||||
simplification ensures that connections are secure, reliable, and optimized for performance.</p>
|
||||
<section id="downstream-ingress">
|
||||
<h2>Downstream (Ingress)<a class="headerlink" href="#downstream-ingress" title="Link to this heading">#</a></h2>
|
||||
<p>Developers can configure Arch to accept connections from downstream clients. A downstream listener acts as the
|
||||
primary entry point for incoming traffic, handling initial connection setup, including network filtering, gurdrails,
|
||||
and additional network security checks. For more details on prompt security and safety,
|
||||
see <a class="reference internal" href="../prompt_processing/prompt_processing.html#arch-overview-prompt-handling"><span class="std std-ref">here</span></a></p>
|
||||
</section>
|
||||
<section id="upstream-egress">
|
||||
<h2>Upstream (Egress)<a class="headerlink" href="#upstream-egress" title="Link to this heading">#</a></h2>
|
||||
<p>Arch automatically configures a listener to route requests from your application to upstream LLM API providers (or hosts).
|
||||
When you start Arch, it creates a listener for egress traffic based on the presence of the <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> configuration
|
||||
section in the <code class="docutils literal notranslate"><span class="pre">prompt_config.yml</span></code> file. Arch binds itself to a local address such as <code class="docutils literal notranslate"><span class="pre">127.0.0.1:9000/v1</span></code> or a DNS-based
|
||||
address like <code class="docutils literal notranslate"><span class="pre">arch.local:9000/v1</span></code> for outgoing traffic. For more details on LLM providers, read <a class="reference internal" href="llm_provider.html#llm-providers"><span class="std std-ref">here</span></a></p>
|
||||
</section>
|
||||
<section id="configure-listener">
|
||||
<h2>Configure Listener<a class="headerlink" href="#configure-listener" title="Link to this heading">#</a></h2>
|
||||
<p>To configure a Downstream (Ingress) Listner, simply add the <code class="docutils literal notranslate"><span class="pre">listener</span></code> directive to your <code class="docutils literal notranslate"><span class="pre">prompt_config.yml</span></code> file:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id1">
|
||||
<div class="code-block-caption"><span class="caption-text"><a class="reference download internal" download="" href="../../../_downloads/a0b0970c8fa22d4fe861e8362965b77c/getting-started.yml"><code class="xref download docutils literal notranslate"><span class="pre">arch-getting-started.yml</span></code></a></span><a class="headerlink" href="#id1" title="Link to this code">#</a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
<span class="hll"><span class="linenos"> 2</span><span class="nt">listener</span><span class="p">:</span>
|
||||
</span><span class="hll"><span class="linenos"> 3</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
|
||||
</span><span class="hll"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
|
||||
</span><span class="hll"><span class="linenos"> 5</span><span class="w"> </span><span class="nt">messages</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">tuple | hugging-face-messages-api</span>
|
||||
</span><span class="linenos"> 6</span>
|
||||
<span class="linenos"> 7</span><span class="nt">system_prompts</span><span class="p">:</span>
|
||||
<span class="linenos"> 8</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">network_assistant</span>
|
||||
<span class="linenos"> 9</span><span class="w"> </span><span class="nt">content</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts about the operational health of the network</span>
|
||||
<span class="linenos">10</span>
|
||||
<span class="linenos">11</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
<span class="linenos">12</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
<span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPEN_AI_KEY</span>
|
||||
<span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
<span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
<span class="linenos">16</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Mistral"</span>
|
||||
<span class="linenos">17</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_KEY</span>
|
||||
<span class="linenos">18</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mixtral8-7B</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
|
||||
</article>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<footer class="prev-next-footer d-print-none">
|
||||
|
||||
<div class="prev-next-area">
|
||||
<a class="left-prev"
|
||||
href="../intro/threading_model.html"
|
||||
title="previous page">
|
||||
<i class="fa-solid fa-angle-left"></i>
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">previous</p>
|
||||
<p class="prev-next-title">Threading model</p>
|
||||
</div>
|
||||
</a>
|
||||
<a class="right-next"
|
||||
href="../prompt_processing/prompt_processing.html"
|
||||
title="next page">
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">next</p>
|
||||
<p class="prev-next-title">Prompts</p>
|
||||
</div>
|
||||
<i class="fa-solid fa-angle-right"></i>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
||||
|
||||
|
||||
<div class="sidebar-secondary-item">
|
||||
<div class="page-toc tocsection onthispage">
|
||||
<i class="fa-solid fa-list"></i> Contents
|
||||
</div>
|
||||
<nav class="bd-toc-nav page-toc">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#downstream-ingress">Downstream (Ingress)</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#upstream-egress">Upstream (Egress)</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#configure-listener">Configure Listener</a></li>
|
||||
</ul>
|
||||
</nav></div>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
</div>
|
||||
<footer class="bd-footer-content">
|
||||
|
||||
<div class="bd-footer-content__inner container">
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="component-author">
|
||||
By Katanemo Labs, Inc
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
|
||||
<p class="copyright">
|
||||
|
||||
© Copyright 2024, Katanemo Labs, Inc.
|
||||
<br/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
|
||||
</main>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||
<script src="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<footer class="bd-footer">
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
523
intro/architecture/listeners/llm_provider.html
Normal file
523
intro/architecture/listeners/llm_provider.html
Normal file
|
|
@ -0,0 +1,523 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en" data-content_root="../../../" >
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<title>LLM Provider — Arch 0.1-beta documentation</title>
|
||||
|
||||
|
||||
|
||||
<script data-cfasync="false">
|
||||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||||
</script>
|
||||
|
||||
<!-- Loaded before other Sphinx assets -->
|
||||
<link href="../../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
|
||||
|
||||
<link href="../../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=a746c00c" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/styles/sphinx-book-theme.css?v=a3416100" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
|
||||
|
||||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<script src="../../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<script src="../../../_static/documentation_options.js?v=2742c0eb"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/clipboard.min.js?v=a7894cd8"></script>
|
||||
<script src="../../../_static/copybutton.js?v=f281be69"></script>
|
||||
<script src="../../../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||||
<script>DOCUMENTATION_OPTIONS.pagename = 'intro/architecture/listeners/llm_provider';</script>
|
||||
<link rel="icon" href="../../../_static/favicon.ico"/>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
<link rel="next" title="Model Serving" href="../model_serving/model_serving.html" />
|
||||
<link rel="prev" title="Prompts" href="../prompt_processing/prompt_processing.html" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
</head>
|
||||
|
||||
|
||||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||
|
||||
|
||||
|
||||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||||
|
||||
<div id="pst-scroll-pixel-helper"></div>
|
||||
|
||||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||||
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-primary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-secondary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||||
|
||||
<div class="search-button__wrapper">
|
||||
<div class="search-button__overlay"></div>
|
||||
<div class="search-button__search-container">
|
||||
<form class="bd-search d-flex align-items-center"
|
||||
action="../../../search.html"
|
||||
method="get">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<input type="search"
|
||||
class="form-control"
|
||||
name="q"
|
||||
id="search-input"
|
||||
placeholder="Search..."
|
||||
aria-label="Search..."
|
||||
autocomplete="off"
|
||||
autocorrect="off"
|
||||
autocapitalize="off"
|
||||
spellcheck="false"/>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||
</form></div>
|
||||
</div>
|
||||
|
||||
<div class="pst-async-banner-revealer d-none">
|
||||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||||
</div>
|
||||
|
||||
|
||||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||||
</header>
|
||||
|
||||
|
||||
<div class="bd-container">
|
||||
<div class="bd-container__inner bd-page-width">
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-primary bd-sidebar">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items sidebar-primary__section">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a class="navbar-brand logo" href="../../../root.html">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="../../../_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
||||
<script>document.write(`<img src="../../../_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
||||
|
||||
|
||||
</a></div>
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||||
<div class="bd-toc-item navbar-nav active">
|
||||
<ul class="current nav bd-sidenav">
|
||||
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../../intro.html">Introduction</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../what_is_arch.html">What is Arch</a></li>
|
||||
<li class="toctree-l2 current active has-children"><a class="reference internal" href="../architecture.html">Technical Architecture</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l3"><a class="reference internal" href="../intro/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../intro/threading_model.html">Threading model</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="listeners.html">Listener</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../prompt_processing/prompt_processing.html">Prompts</a></li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l3 current active"><a class="current reference internal" href="#">LLM Provider</a></li>
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="../model_serving/model_serving.html">Model Serving</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../life_of_a_request.html">Life of a Request</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../getting_help.html">Getting help</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/getting_started.html">Getting Started</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
||||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/stats.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/access_logs.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../llms/llms.html">LLMs</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||
</div>
|
||||
|
||||
<div id="rtd-footer-container"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<main id="main-content" class="bd-main" role="main">
|
||||
|
||||
|
||||
|
||||
<div class="sbt-scroll-pixel-helper"></div>
|
||||
|
||||
<div class="bd-content">
|
||||
<div class="bd-article-container">
|
||||
|
||||
<div class="bd-header-article d-print-none">
|
||||
<div class="header-article-items header-article__inner">
|
||||
|
||||
<div class="header-article-items__start">
|
||||
|
||||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-bars"></span>
|
||||
</button></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="header-article-items__end">
|
||||
|
||||
<div class="header-article-item">
|
||||
|
||||
<div class="article-header-buttons">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="dropdown dropdown-download-buttons">
|
||||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||||
<i class="fas fa-download"></i>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
|
||||
|
||||
|
||||
<li><a href="../../../_sources/intro/architecture/listeners/llm_provider.rst" target="_blank"
|
||||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||||
title="Download source file"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.rst</span>
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li>
|
||||
<button onclick="window.print()"
|
||||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||||
title="Print to PDF"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file-pdf"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.pdf</span>
|
||||
</button>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<button onclick="toggleFullScreen()"
|
||||
class="btn btn-sm btn-fullscreen-button"
|
||||
title="Fullscreen mode"
|
||||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-expand"></i>
|
||||
</span>
|
||||
|
||||
</button>
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
<button class="sidebar-toggle secondary-toggle btn btn-sm" title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-list"></span>
|
||||
</button>
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="jb-print-docs-body" class="onlyprint">
|
||||
<h1>LLM Provider</h1>
|
||||
<!-- Table of contents -->
|
||||
<div id="print-main-content">
|
||||
<div id="jb-print-toc">
|
||||
|
||||
<div>
|
||||
<h2> Contents </h2>
|
||||
</div>
|
||||
<nav aria-label="Page">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">LLM Provider</a></li>
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#example-using-the-arch-python-sdk">Example: Using the Arch Python SDK</a></li>
|
||||
</ul>
|
||||
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="searchbox"></div>
|
||||
<article class="bd-article">
|
||||
|
||||
<section id="llm-provider">
|
||||
<span id="llm-providers"></span><h1>LLM Provider<a class="headerlink" href="#llm-provider" title="Link to this heading">#</a></h1>
|
||||
<p><code class="docutils literal notranslate"><span class="pre">llm_provider</span></code> is a top-level primitive in Arch, helping developers centrally define, secure, observe,
|
||||
and manage the usage of of their LLMs. Arch builds on Envoy’s reliable <a class="reference external" href="https://www.envoyproxy.io/docs/envoy/v1.31.2/intro/arch_overview/upstream/cluster_manager">cluster subsystem</a>
|
||||
to manage egress traffic to LLMs, which includes intelligent routing, retry and fail-over mechanisms,
|
||||
ensuring high availability and fault tolerance. This abstraction also enables developers to seamlessly switching between LLM providers or upgrade LLM versions, simplifying the integration and scaling of LLMs across
|
||||
applications.</p>
|
||||
<p>Below is an example of how you can configure <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> with an instance of an Arch gateway.</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id1">
|
||||
<div class="code-block-caption"><span class="caption-text"><a class="reference download internal" download="" href="../../../_downloads/a0b0970c8fa22d4fe861e8362965b77c/getting-started.yml"><code class="xref download docutils literal notranslate"><span class="pre">arch-getting-started.yml</span></code></a></span><a class="headerlink" href="#id1" title="Link to this code">#</a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
<span class="linenos"> 2</span><span class="nt">listener</span><span class="p">:</span>
|
||||
<span class="linenos"> 3</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
|
||||
<span class="linenos"> 4</span><span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
|
||||
<span class="linenos"> 5</span><span class="w"> </span><span class="nt">messages</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">tuple | hugging-face-messages-api</span>
|
||||
<span class="linenos"> 6</span>
|
||||
<span class="linenos"> 7</span><span class="nt">system_prompts</span><span class="p">:</span>
|
||||
<span class="linenos"> 8</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">network_assistant</span>
|
||||
<span class="linenos"> 9</span><span class="w"> </span><span class="nt">content</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts about the operational health of the network</span>
|
||||
<span class="linenos">10</span>
|
||||
<span class="hll"><span class="linenos">11</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span class="hll"><span class="linenos">12</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
</span><span class="hll"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPEN_AI_KEY</span>
|
||||
</span><span class="hll"><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span class="hll"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span class="hll"><span class="linenos">16</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Mistral"</span>
|
||||
</span><span class="hll"><span class="linenos">17</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_KEY</span>
|
||||
</span><span class="hll"><span class="linenos">18</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mixtral8-7B</span>
|
||||
</span><span class="linenos">19</span>
|
||||
<span class="linenos">20</span><span class="nt">prompt_endpoints</span><span class="p">:</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>When you start Arch, it creates a listener port for egress traffic based on the presence of <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code>
|
||||
configuration section in the <code class="docutils literal notranslate"><span class="pre">prompt_config.yml</span></code> file. Arch binds itself to a local address such as
|
||||
<code class="docutils literal notranslate"><span class="pre">127.0.0.1:9000/v1</span></code> or a DNS-based address like <code class="docutils literal notranslate"><span class="pre">arch.local:9000/v1</span></code> for egress traffic.</p>
|
||||
</div>
|
||||
<p>Arch also offers vendor-agnostic SDKs and libraries to make LLM calls to API-based LLM providers (like OpenAI,
|
||||
Anthropic, Mistral, Cohere, etc.) and supports calls to OSS LLMs that are hosted on your infrastructure. Arch
|
||||
abstracts the complexities of integrating with different LLM providers, providing a unified interface for making
|
||||
calls, handling retries, managing rate limits, and ensuring seamless integration with cloud-based and on-premise
|
||||
LLMs. Simply configure the details of the LLMs your application will use, and Arch offers a unified interface to
|
||||
make outbound LLM calls.</p>
|
||||
</section>
|
||||
<section id="example-using-the-arch-python-sdk">
|
||||
<h1>Example: Using the Arch Python SDK<a class="headerlink" href="#example-using-the-arch-python-sdk" title="Link to this heading">#</a></h1>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">arch_client</span> <span class="kn">import</span> <span class="n">ArchClient</span>
|
||||
|
||||
<span class="c1"># Initialize the Arch client</span>
|
||||
<span class="n">client</span> <span class="o">=</span> <span class="n">ArchClient</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:9000/v1"</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Define your LLM provider and prompt</span>
|
||||
<span class="n">model_id</span> <span class="o">=</span> <span class="s2">"openai"</span>
|
||||
<span class="n">prompt</span> <span class="o">=</span> <span class="s2">"What is the capital of France?"</span>
|
||||
|
||||
<span class="c1"># Send the prompt to the LLM through Arch</span>
|
||||
<span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">llm_provider</span><span class="o">=</span><span class="n">llm_provider</span><span class="p">,</span> <span class="n">prompt</span><span class="o">=</span><span class="n">prompt</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Print the response</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"LLM Response:"</span><span class="p">,</span> <span class="n">response</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
|
||||
</article>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<footer class="prev-next-footer d-print-none">
|
||||
|
||||
<div class="prev-next-area">
|
||||
<a class="left-prev"
|
||||
href="../prompt_processing/prompt_processing.html"
|
||||
title="previous page">
|
||||
<i class="fa-solid fa-angle-left"></i>
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">previous</p>
|
||||
<p class="prev-next-title">Prompts</p>
|
||||
</div>
|
||||
</a>
|
||||
<a class="right-next"
|
||||
href="../model_serving/model_serving.html"
|
||||
title="next page">
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">next</p>
|
||||
<p class="prev-next-title">Model Serving</p>
|
||||
</div>
|
||||
<i class="fa-solid fa-angle-right"></i>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
||||
|
||||
|
||||
<div class="sidebar-secondary-item">
|
||||
<div class="page-toc tocsection onthispage">
|
||||
<i class="fa-solid fa-list"></i> Contents
|
||||
</div>
|
||||
<nav class="bd-toc-nav page-toc">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">LLM Provider</a></li>
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#example-using-the-arch-python-sdk">Example: Using the Arch Python SDK</a></li>
|
||||
</ul>
|
||||
|
||||
</nav></div>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
</div>
|
||||
<footer class="bd-footer-content">
|
||||
|
||||
<div class="bd-footer-content__inner container">
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="component-author">
|
||||
By Katanemo Labs, Inc
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
|
||||
<p class="copyright">
|
||||
|
||||
© Copyright 2024, Katanemo Labs, Inc.
|
||||
<br/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
|
||||
</main>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||
<script src="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<footer class="bd-footer">
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
506
intro/architecture/model_serving/model_serving.html
Normal file
506
intro/architecture/model_serving/model_serving.html
Normal file
|
|
@ -0,0 +1,506 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en" data-content_root="../../../" >
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<title>Model Serving — Arch 0.1-beta documentation</title>
|
||||
|
||||
|
||||
|
||||
<script data-cfasync="false">
|
||||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||||
</script>
|
||||
|
||||
<!-- Loaded before other Sphinx assets -->
|
||||
<link href="../../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
|
||||
|
||||
<link href="../../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=a746c00c" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/styles/sphinx-book-theme.css?v=a3416100" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
|
||||
|
||||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<script src="../../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<script src="../../../_static/documentation_options.js?v=2742c0eb"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/clipboard.min.js?v=a7894cd8"></script>
|
||||
<script src="../../../_static/copybutton.js?v=f281be69"></script>
|
||||
<script src="../../../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||||
<script>DOCUMENTATION_OPTIONS.pagename = 'intro/architecture/model_serving/model_serving';</script>
|
||||
<link rel="icon" href="../../../_static/favicon.ico"/>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
<link rel="next" title="Life of a Request" href="../../life_of_a_request.html" />
|
||||
<link rel="prev" title="LLM Provider" href="../listeners/llm_provider.html" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
</head>
|
||||
|
||||
|
||||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||
|
||||
|
||||
|
||||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||||
|
||||
<div id="pst-scroll-pixel-helper"></div>
|
||||
|
||||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||||
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-primary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-secondary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||||
|
||||
<div class="search-button__wrapper">
|
||||
<div class="search-button__overlay"></div>
|
||||
<div class="search-button__search-container">
|
||||
<form class="bd-search d-flex align-items-center"
|
||||
action="../../../search.html"
|
||||
method="get">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<input type="search"
|
||||
class="form-control"
|
||||
name="q"
|
||||
id="search-input"
|
||||
placeholder="Search..."
|
||||
aria-label="Search..."
|
||||
autocomplete="off"
|
||||
autocorrect="off"
|
||||
autocapitalize="off"
|
||||
spellcheck="false"/>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||
</form></div>
|
||||
</div>
|
||||
|
||||
<div class="pst-async-banner-revealer d-none">
|
||||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||||
</div>
|
||||
|
||||
|
||||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||||
</header>
|
||||
|
||||
|
||||
<div class="bd-container">
|
||||
<div class="bd-container__inner bd-page-width">
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-primary bd-sidebar">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items sidebar-primary__section">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a class="navbar-brand logo" href="../../../root.html">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="../../../_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
||||
<script>document.write(`<img src="../../../_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
||||
|
||||
|
||||
</a></div>
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||||
<div class="bd-toc-item navbar-nav active">
|
||||
<ul class="current nav bd-sidenav">
|
||||
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../../intro.html">Introduction</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../what_is_arch.html">What is Arch</a></li>
|
||||
<li class="toctree-l2 current active has-children"><a class="reference internal" href="../architecture.html">Technical Architecture</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l3"><a class="reference internal" href="../intro/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../intro/threading_model.html">Threading model</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../listeners/listeners.html">Listener</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../prompt_processing/prompt_processing.html">Prompts</a></li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="../listeners/llm_provider.html">LLM Provider</a></li>
|
||||
|
||||
<li class="toctree-l3 current active"><a class="current reference internal" href="#">Model Serving</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../life_of_a_request.html">Life of a Request</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../getting_help.html">Getting help</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/getting_started.html">Getting Started</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
||||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/stats.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/access_logs.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../llms/llms.html">LLMs</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||
</div>
|
||||
|
||||
<div id="rtd-footer-container"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<main id="main-content" class="bd-main" role="main">
|
||||
|
||||
|
||||
|
||||
<div class="sbt-scroll-pixel-helper"></div>
|
||||
|
||||
<div class="bd-content">
|
||||
<div class="bd-article-container">
|
||||
|
||||
<div class="bd-header-article d-print-none">
|
||||
<div class="header-article-items header-article__inner">
|
||||
|
||||
<div class="header-article-items__start">
|
||||
|
||||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-bars"></span>
|
||||
</button></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="header-article-items__end">
|
||||
|
||||
<div class="header-article-item">
|
||||
|
||||
<div class="article-header-buttons">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="dropdown dropdown-download-buttons">
|
||||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||||
<i class="fas fa-download"></i>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
|
||||
|
||||
|
||||
<li><a href="../../../_sources/intro/architecture/model_serving/model_serving.rst" target="_blank"
|
||||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||||
title="Download source file"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.rst</span>
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li>
|
||||
<button onclick="window.print()"
|
||||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||||
title="Print to PDF"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file-pdf"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.pdf</span>
|
||||
</button>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<button onclick="toggleFullScreen()"
|
||||
class="btn btn-sm btn-fullscreen-button"
|
||||
title="Fullscreen mode"
|
||||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-expand"></i>
|
||||
</span>
|
||||
|
||||
</button>
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
<button class="sidebar-toggle secondary-toggle btn btn-sm" title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-list"></span>
|
||||
</button>
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="jb-print-docs-body" class="onlyprint">
|
||||
<h1>Model Serving</h1>
|
||||
<!-- Table of contents -->
|
||||
<div id="print-main-content">
|
||||
<div id="jb-print-toc">
|
||||
|
||||
<div>
|
||||
<h2> Contents </h2>
|
||||
</div>
|
||||
<nav aria-label="Page">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#local-serving-cpu-moderate">Local Serving (CPU - Moderate)</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#local-serving-gpu-fast">Local Serving (GPU- Fast)</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#cloud-serving-gpu-blazing-fast">Cloud Serving (GPU - Blazing Fast)</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="searchbox"></div>
|
||||
<article class="bd-article">
|
||||
|
||||
<section id="model-serving">
|
||||
<span id="arch-model-serving"></span><h1>Model Serving<a class="headerlink" href="#model-serving" title="Link to this heading">#</a></h1>
|
||||
<p>Arch is a set of <strong>two</strong> self-contained processes that are designed to run alongside your application
|
||||
servers (or on a separate host connected via a network). The first process is designated to manage low-level
|
||||
networking and HTTP related comcerns, and the other process is for <strong>model serving</strong>, which helps Arch make
|
||||
intelligent decisions about the incoming prompts. The model server is designed to call the purpose-built
|
||||
<a class="reference internal" href="../../../llms/llms.html#llms-in-arch"><span class="std std-ref">LLMs</span></a> in Arch.</p>
|
||||
<a class="reference internal image-reference" href="../../../_images/arch-system-architecture.jpg"><img alt="../../../_images/arch-system-architecture.jpg" class="align-center" src="../../../_images/arch-system-architecture.jpg" style="width: 50%;" />
|
||||
</a>
|
||||
<hr class="docutils" />
|
||||
<p>Arch’ is designed to be deployed in your cloud VPC, on a on-premises host, and can work on devices that don’t
|
||||
have a GPU. Note, GPU devices are need for fast and cost-efficient use, so that Arch (model server, specifically)
|
||||
can process prompts quickly and forward control back to the applicaton host. There are three modes in which Arch
|
||||
can be configured to run its <strong>model server</strong> subsystem:</p>
|
||||
<section id="local-serving-cpu-moderate">
|
||||
<h2>Local Serving (CPU - Moderate)<a class="headerlink" href="#local-serving-cpu-moderate" title="Link to this heading">#</a></h2>
|
||||
<p>The following bash commands enable you to configure the model server subsystem in Arch to run local on device
|
||||
and only use CPU devices. This will be the slowest option but can be useful in dev/test scenarios where GPUs
|
||||
might not be available.</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>archgw<span class="w"> </span>up<span class="w"> </span>--local<span class="w"> </span>-cpu
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="local-serving-gpu-fast">
|
||||
<h2>Local Serving (GPU- Fast)<a class="headerlink" href="#local-serving-gpu-fast" title="Link to this heading">#</a></h2>
|
||||
<p>The following bash commands enable you to configure the model server subsystem in Arch to run locally on the
|
||||
machine and utilize the GPU available for fast inference across all model use cases, including function calling
|
||||
guardails, etc.</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>archgw<span class="w"> </span>up<span class="w"> </span>--local
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="cloud-serving-gpu-blazing-fast">
|
||||
<h2>Cloud Serving (GPU - Blazing Fast)<a class="headerlink" href="#cloud-serving-gpu-blazing-fast" title="Link to this heading">#</a></h2>
|
||||
<p>The command below instructs Arch to intelligently use GPUs locally for fast intent detection, but default to
|
||||
cloud serving for function calling and guardails scenarios to dramatically improve the speed and overall performance
|
||||
of your applications.</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>archgw<span class="w"> </span>up
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>Arch’s model serving in the cloud is priced at $0.05M/token (156x cheaper than GPT-4o) with averlage latency
|
||||
of 200ms (10x faster than GPT-4o). Please refer to our <a class="reference internal" href="../../../getting_started/getting_started.html#getting-started"><span class="std std-ref">getting started guide</span></a> to know
|
||||
how to generate API keys for model serving</p>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
|
||||
</article>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<footer class="prev-next-footer d-print-none">
|
||||
|
||||
<div class="prev-next-area">
|
||||
<a class="left-prev"
|
||||
href="../listeners/llm_provider.html"
|
||||
title="previous page">
|
||||
<i class="fa-solid fa-angle-left"></i>
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">previous</p>
|
||||
<p class="prev-next-title">LLM Provider</p>
|
||||
</div>
|
||||
</a>
|
||||
<a class="right-next"
|
||||
href="../../life_of_a_request.html"
|
||||
title="next page">
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">next</p>
|
||||
<p class="prev-next-title">Life of a Request</p>
|
||||
</div>
|
||||
<i class="fa-solid fa-angle-right"></i>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
||||
|
||||
|
||||
<div class="sidebar-secondary-item">
|
||||
<div class="page-toc tocsection onthispage">
|
||||
<i class="fa-solid fa-list"></i> Contents
|
||||
</div>
|
||||
<nav class="bd-toc-nav page-toc">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#local-serving-cpu-moderate">Local Serving (CPU - Moderate)</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#local-serving-gpu-fast">Local Serving (GPU- Fast)</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#cloud-serving-gpu-blazing-fast">Cloud Serving (GPU - Blazing Fast)</a></li>
|
||||
</ul>
|
||||
</nav></div>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
</div>
|
||||
<footer class="bd-footer-content">
|
||||
|
||||
<div class="bd-footer-content__inner container">
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="component-author">
|
||||
By Katanemo Labs, Inc
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
|
||||
<p class="copyright">
|
||||
|
||||
© Copyright 2024, Katanemo Labs, Inc.
|
||||
<br/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
|
||||
</main>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||
<script src="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<footer class="bd-footer">
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
707
intro/architecture/prompt_processing/prompt_processing.html
Normal file
707
intro/architecture/prompt_processing/prompt_processing.html
Normal file
|
|
@ -0,0 +1,707 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en" data-content_root="../../../" >
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<title>Prompts — Arch 0.1-beta documentation</title>
|
||||
|
||||
|
||||
|
||||
<script data-cfasync="false">
|
||||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||||
</script>
|
||||
|
||||
<!-- Loaded before other Sphinx assets -->
|
||||
<link href="../../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
|
||||
|
||||
<link href="../../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=a746c00c" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/styles/sphinx-book-theme.css?v=a3416100" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
|
||||
|
||||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<script src="../../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<script src="../../../_static/documentation_options.js?v=2742c0eb"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/clipboard.min.js?v=a7894cd8"></script>
|
||||
<script src="../../../_static/copybutton.js?v=f281be69"></script>
|
||||
<script src="../../../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||||
<script>DOCUMENTATION_OPTIONS.pagename = 'intro/architecture/prompt_processing/prompt_processing';</script>
|
||||
<link rel="icon" href="../../../_static/favicon.ico"/>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
<link rel="next" title="LLM Provider" href="../listeners/llm_provider.html" />
|
||||
<link rel="prev" title="Listener" href="../listeners/listeners.html" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
</head>
|
||||
|
||||
|
||||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||
|
||||
|
||||
|
||||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||||
|
||||
<div id="pst-scroll-pixel-helper"></div>
|
||||
|
||||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||||
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-primary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-secondary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||||
|
||||
<div class="search-button__wrapper">
|
||||
<div class="search-button__overlay"></div>
|
||||
<div class="search-button__search-container">
|
||||
<form class="bd-search d-flex align-items-center"
|
||||
action="../../../search.html"
|
||||
method="get">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<input type="search"
|
||||
class="form-control"
|
||||
name="q"
|
||||
id="search-input"
|
||||
placeholder="Search..."
|
||||
aria-label="Search..."
|
||||
autocomplete="off"
|
||||
autocorrect="off"
|
||||
autocapitalize="off"
|
||||
spellcheck="false"/>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||
</form></div>
|
||||
</div>
|
||||
|
||||
<div class="pst-async-banner-revealer d-none">
|
||||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||||
</div>
|
||||
|
||||
|
||||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||||
</header>
|
||||
|
||||
|
||||
<div class="bd-container">
|
||||
<div class="bd-container__inner bd-page-width">
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-primary bd-sidebar">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items sidebar-primary__section">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a class="navbar-brand logo" href="../../../root.html">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="../../../_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
||||
<script>document.write(`<img src="../../../_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
||||
|
||||
|
||||
</a></div>
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||||
<div class="bd-toc-item navbar-nav active">
|
||||
<ul class="current nav bd-sidenav">
|
||||
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../../intro.html">Introduction</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../what_is_arch.html">What is Arch</a></li>
|
||||
<li class="toctree-l2 current active has-children"><a class="reference internal" href="../architecture.html">Technical Architecture</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l3"><a class="reference internal" href="../intro/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../intro/threading_model.html">Threading model</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="../listeners/listeners.html">Listener</a></li>
|
||||
<li class="toctree-l3 current active"><a class="current reference internal" href="#">Prompts</a></li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="../listeners/llm_provider.html">LLM Provider</a></li>
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="../model_serving/model_serving.html">Model Serving</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../life_of_a_request.html">Life of a Request</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../getting_help.html">Getting help</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/getting_started.html">Getting Started</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
||||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/stats.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../../observability/access_logs.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../llms/llms.html">LLMs</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||
</div>
|
||||
|
||||
<div id="rtd-footer-container"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<main id="main-content" class="bd-main" role="main">
|
||||
|
||||
|
||||
|
||||
<div class="sbt-scroll-pixel-helper"></div>
|
||||
|
||||
<div class="bd-content">
|
||||
<div class="bd-article-container">
|
||||
|
||||
<div class="bd-header-article d-print-none">
|
||||
<div class="header-article-items header-article__inner">
|
||||
|
||||
<div class="header-article-items__start">
|
||||
|
||||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-bars"></span>
|
||||
</button></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="header-article-items__end">
|
||||
|
||||
<div class="header-article-item">
|
||||
|
||||
<div class="article-header-buttons">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="dropdown dropdown-download-buttons">
|
||||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||||
<i class="fas fa-download"></i>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
|
||||
|
||||
|
||||
<li><a href="../../../_sources/intro/architecture/prompt_processing/prompt_processing.rst" target="_blank"
|
||||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||||
title="Download source file"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.rst</span>
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li>
|
||||
<button onclick="window.print()"
|
||||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||||
title="Print to PDF"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file-pdf"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.pdf</span>
|
||||
</button>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<button onclick="toggleFullScreen()"
|
||||
class="btn btn-sm btn-fullscreen-button"
|
||||
title="Fullscreen mode"
|
||||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-expand"></i>
|
||||
</span>
|
||||
|
||||
</button>
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
<button class="sidebar-toggle secondary-toggle btn btn-sm" title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-list"></span>
|
||||
</button>
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="jb-print-docs-body" class="onlyprint">
|
||||
<h1>Prompts</h1>
|
||||
<!-- Table of contents -->
|
||||
<div id="print-main-content">
|
||||
<div id="jb-print-toc">
|
||||
|
||||
<div>
|
||||
<h2> Contents </h2>
|
||||
</div>
|
||||
<nav aria-label="Page">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Prompts</a></li>
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#messages">Messages</a></li>
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prompt-guardrails">Prompt Guardrails</a></li>
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prompt-targets">Prompt Targets</a><ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#intent-detection-and-prompt-matching">Intent Detection and Prompt Matching:</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#agentic-apps-via-prompt-targets">Agentic Apps via Prompt Targets</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prompting-llms">Prompting LLMs</a><ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#example-using-the-arch-python-sdk">Example: Using the Arch Python SDK</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#example-using-openai-client-with-arch-as-an-egress-gateway">Example: Using OpenAI Client with Arch as an Egress Gateway</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="searchbox"></div>
|
||||
<article class="bd-article">
|
||||
|
||||
<section id="prompts">
|
||||
<span id="arch-overview-prompt-handling"></span><h1>Prompts<a class="headerlink" href="#prompts" title="Link to this heading">#</a></h1>
|
||||
<p>Arch’s primary design point is to securely accept, process and handle prompts. To do that effectively,
|
||||
Arch relies on Envoy’s HTTP <a class="reference external" href="https://www.envoyproxy.io/docs/envoy/v1.31.2/intro/arch_overview/http/http_connection_management">connection management</a>,
|
||||
subsystem and its <strong>prompt handler</strong> subsystem engineered with purpose-built <a class="reference internal" href="../../../llms/llms.html#llms-in-arch"><span class="std std-ref">LLMs</span></a> to
|
||||
implement critical functionality on behalf of developers so that you can stay focused on business logic.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>Arch’s <strong>prompt handler</strong> subsystem interacts with the <strong>model</strong> subsytem through Envoy’s cluster manager
|
||||
system to ensure robust, resilient and fault-tolerant experience in managing incoming prompts. Read more
|
||||
about the <a class="reference internal" href="../model_serving/model_serving.html#arch-model-serving"><span class="std std-ref">model subsystem</span></a> and how the LLMs are hosted in Arch.</p>
|
||||
</div>
|
||||
</section>
|
||||
<section id="messages">
|
||||
<h1>Messages<a class="headerlink" href="#messages" title="Link to this heading">#</a></h1>
|
||||
<p>Arch accepts messages directly from the body of the HTTP request in a format that follows the <a class="reference external" href="https://huggingface.co/docs/text-generation-inference/en/messages_api">Hugging Face Messages API</a>.
|
||||
This design allows developers to pass a list of messages, where each message is represented as a dictionary
|
||||
containing two key-value pairs:</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><p><strong>Role</strong>: Defines the role of the message sender, such as “user” or “assistant”.</p></li>
|
||||
<li><p><strong>Content</strong>: Contains the actual text of the message.</p></li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
</section>
|
||||
<section id="prompt-guardrails">
|
||||
<h1>Prompt Guardrails<a class="headerlink" href="#prompt-guardrails" title="Link to this heading">#</a></h1>
|
||||
<p>Arch is engineered with <a class="reference internal" href="../../../llms/llms.html#llms-in-arch"><span class="std std-ref">Arch-Guard</span></a>, an industry leading safety layer, powered by a
|
||||
compact and high-performimg LLM that monitors incoming prompts to detect and reject jailbreak attempts -
|
||||
ensuring that unauthorized or harmful behaviors are intercepted early in the process.</p>
|
||||
<p>To add jailbreak guardrails, see example below:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id1">
|
||||
<div class="code-block-caption"><span class="caption-text"><a class="reference download internal" download="" href="../../../_downloads/a0b0970c8fa22d4fe861e8362965b77c/getting-started.yml"><code class="xref download docutils literal notranslate"><span class="pre">arch-getting-started.yml</span></code></a></span><a class="headerlink" href="#id1" title="Link to this code">#</a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
<span class="linenos"> 2</span><span class="nt">listener</span><span class="p">:</span>
|
||||
<span class="linenos"> 3</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
|
||||
<span class="linenos"> 4</span><span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
|
||||
<span class="linenos"> 5</span><span class="w"> </span><span class="nt">messages</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">tuple | hugging-face-messages-api</span>
|
||||
<span class="linenos"> 6</span>
|
||||
<span class="linenos"> 7</span><span class="nt">system_prompts</span><span class="p">:</span>
|
||||
<span class="linenos"> 8</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">network_assistant</span>
|
||||
<span class="linenos"> 9</span><span class="w"> </span><span class="nt">content</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts about the operational health of the network</span>
|
||||
<span class="linenos">10</span>
|
||||
<span class="linenos">11</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
<span class="linenos">12</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
<span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPEN_AI_KEY</span>
|
||||
<span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
<span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
<span class="linenos">16</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Mistral"</span>
|
||||
<span class="linenos">17</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_KEY</span>
|
||||
<span class="linenos">18</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mixtral8-7B</span>
|
||||
<span class="linenos">19</span>
|
||||
<span class="linenos">20</span><span class="nt">prompt_endpoints</span><span class="p">:</span>
|
||||
<span class="linenos">21</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"http://127.0.0.2"</span>
|
||||
<span class="linenos">22</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"http://127.0.0.1"</span>
|
||||
<span class="linenos">23</span>
|
||||
<span class="hll"><span class="linenos">24</span><span class="nt">prompt_guards</span><span class="p">:</span>
|
||||
</span><span class="hll"><span class="linenos">25</span><span class="w"> </span><span class="nt">input-guard</span><span class="p">:</span>
|
||||
</span><span class="hll"><span class="linenos">26</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="c1">#jailbreak</span>
|
||||
</span><span class="hll"><span class="linenos">27</span><span class="w"> </span><span class="nt">on-exception-message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you are curious about my abilities. But I can only</span>
|
||||
</span><span class="linenos">28</span>
|
||||
<span class="linenos">29</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
<span class="linenos">30</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">information_extraction</span>
|
||||
<span class="linenos">31</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">RAG</span>
|
||||
<span class="linenos">32</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">this prompt handles all information extractions scenarios</span>
|
||||
<span class="linenos">33</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/summary</span>
|
||||
<span class="linenos">34</span>
|
||||
<span class="linenos">35</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_network_device</span>
|
||||
<span class="linenos">36</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/action</span>
|
||||
<span class="linenos">37</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">used to help network operators with perform device operations like rebooting a device.</span>
|
||||
<span class="linenos">38</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
<span class="linenos">39</span><span class="nt">error_target</span><span class="p">:</span><span class="w"> </span><span class="c1">#handle errors from Bolt or upstream LLMs</span>
|
||||
<span class="linenos">40</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">“error_handler”</span>
|
||||
<span class="linenos">41</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/errors</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>As a roadmap item, Arch will expose the ability for developers to define custom guardrails via Arch-Guard-v2,
|
||||
and add support for additional safety checks defined by developers and hazardous categories like, violent crimes, privacy, hate,
|
||||
etc. To offer feedback on our roadmap, please visit our <a class="reference external" href="https://github.com/orgs/katanemo/projects/1">github page</a></p>
|
||||
</div>
|
||||
</section>
|
||||
<section id="prompt-targets">
|
||||
<h1>Prompt Targets<a class="headerlink" href="#prompt-targets" title="Link to this heading">#</a></h1>
|
||||
<p>Once a prompt passes any configured guardrail checks, Arch processes the contents of the incoming conversation
|
||||
and identifies where to forwad the conversation to via its essential <code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code> primitve. Prompt targets
|
||||
are endpoints that receive prompts that are processed by Arch. For example, Arch enriches incoming prompts with
|
||||
metadata like knowing when a user’s intent has changed so that you can build faster, more accurate RAG apps.</p>
|
||||
<p>Configuring <code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code> is simple. See example below:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id2">
|
||||
<div class="code-block-caption"><span class="caption-text"><a class="reference download internal" download="" href="../../../_downloads/a0b0970c8fa22d4fe861e8362965b77c/getting-started.yml"><code class="xref download docutils literal notranslate"><span class="pre">arch-getting-started.yml</span></code></a></span><a class="headerlink" href="#id2" title="Link to this code">#</a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
<span class="linenos"> 2</span><span class="nt">listener</span><span class="p">:</span>
|
||||
<span class="linenos"> 3</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
|
||||
<span class="linenos"> 4</span><span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
|
||||
<span class="linenos"> 5</span><span class="w"> </span><span class="nt">messages</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">tuple | hugging-face-messages-api</span>
|
||||
<span class="linenos"> 6</span>
|
||||
<span class="linenos"> 7</span><span class="nt">system_prompts</span><span class="p">:</span>
|
||||
<span class="linenos"> 8</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">network_assistant</span>
|
||||
<span class="linenos"> 9</span><span class="w"> </span><span class="nt">content</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts about the operational health of the network</span>
|
||||
<span class="linenos">10</span>
|
||||
<span class="linenos">11</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
<span class="linenos">12</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
<span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPEN_AI_KEY</span>
|
||||
<span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
<span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
<span class="linenos">16</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Mistral"</span>
|
||||
<span class="linenos">17</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_KEY</span>
|
||||
<span class="linenos">18</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mixtral8-7B</span>
|
||||
<span class="linenos">19</span>
|
||||
<span class="linenos">20</span><span class="nt">prompt_endpoints</span><span class="p">:</span>
|
||||
<span class="linenos">21</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"http://127.0.0.2"</span>
|
||||
<span class="linenos">22</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"http://127.0.0.1"</span>
|
||||
<span class="linenos">23</span>
|
||||
<span class="linenos">24</span><span class="nt">prompt_guards</span><span class="p">:</span>
|
||||
<span class="linenos">25</span><span class="w"> </span><span class="nt">input-guard</span><span class="p">:</span>
|
||||
<span class="linenos">26</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="c1">#jailbreak</span>
|
||||
<span class="linenos">27</span><span class="w"> </span><span class="nt">on-exception-message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you are curious about my abilities. But I can only</span>
|
||||
<span class="linenos">28</span>
|
||||
<span class="hll"><span class="linenos">29</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</span><span class="hll"><span class="linenos">30</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">information_extraction</span>
|
||||
</span><span class="hll"><span class="linenos">31</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">RAG</span>
|
||||
</span><span class="hll"><span class="linenos">32</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">this prompt handles all information extractions scenarios</span>
|
||||
</span><span class="hll"><span class="linenos">33</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/summary</span>
|
||||
</span><span class="hll"><span class="linenos">34</span>
|
||||
</span><span class="hll"><span class="linenos">35</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_network_device</span>
|
||||
</span><span class="hll"><span class="linenos">36</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/action</span>
|
||||
</span><span class="hll"><span class="linenos">37</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">used to help network operators with perform device operations like rebooting a device.</span>
|
||||
</span><span class="hll"><span class="linenos">38</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</span><span class="linenos">39</span><span class="nt">error_target</span><span class="p">:</span><span class="w"> </span><span class="c1">#handle errors from Bolt or upstream LLMs</span>
|
||||
<span class="linenos">40</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">“error_handler”</span>
|
||||
<span class="linenos">41</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/errors</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<section id="intent-detection-and-prompt-matching">
|
||||
<h2>Intent Detection and Prompt Matching:<a class="headerlink" href="#intent-detection-and-prompt-matching" title="Link to this heading">#</a></h2>
|
||||
<p>Arch uses fast Natural Language Inference (NLI) and embedding approaches to first detect the intent of each
|
||||
incoming prompt. This intent detection phase analyzes the prompt’s content and matches it against predefined
|
||||
prompt targets, ensuring that each prompt is forwarded to the most appropriate endpoint. Arch’s intent
|
||||
detection framework considers both the name and description of each prompt target, and uses a composite matching
|
||||
score between an NLI and cosine similarity to enchance accuracy in forwarding decisions.</p>
|
||||
<ul class="simple">
|
||||
<li><p><strong>Embeddings</strong>: By embedding the prompt and comparing it to known target vectors, Arch effectively identifies
|
||||
the closest match, ensuring that the prompt is handled by the correct downstream service.</p></li>
|
||||
<li><p><strong>NLI</strong>: NLI techniques further refine the matching process by evaluating the semantic alignment between the
|
||||
prompt and potential targets.</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="agentic-apps-via-prompt-targets">
|
||||
<h2>Agentic Apps via Prompt Targets<a class="headerlink" href="#agentic-apps-via-prompt-targets" title="Link to this heading">#</a></h2>
|
||||
<p>To support agentic apps, like scheduling travel plans or sharing comments on a document - via prompts, Arch uses
|
||||
its function calling abilities to extract critical information from the incoming prompt (or a set of prompts)
|
||||
needed by a downstream backend API or function call before calling it directly. For more details on how you can
|
||||
build agentic applications using Arch, see our full guide <a class="reference internal" href="../../../getting_started/use_cases/function_calling.html#arch-function-calling-agentic-guide"><span class="std std-ref">here</span></a>:</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>Arch <a class="reference internal" href="../../../llms/llms.html#llms-in-arch"><span class="std std-ref">Arch-FC</span></a> is the dedicated agentic model engineered in Arch to extract information from
|
||||
a (set of) prompts and executes necessary backend API calls. This allows for efficient handling of agentic tasks,
|
||||
such as scheduling data retrieval, by dynamically interacting with backend services. Arch-FC is a flagship 1.3
|
||||
billion parameter model that matches performance with frontier models like Claude Sonnet 3.5 ang GPT-4, while
|
||||
being 100x cheaper ($0.05M/token hosted) and 10x faster (p50 latencies of 200ms).</p>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="prompting-llms">
|
||||
<h1>Prompting LLMs<a class="headerlink" href="#prompting-llms" title="Link to this heading">#</a></h1>
|
||||
<p>Arch is a single piece of software that is designed to manage both ingress and egress prompt traffic, drawing its
|
||||
distributed proxy nature from the robust <a class="reference external" href="https://envoyproxy.io">Envoy</a>. This makes it extremely efficient and capable
|
||||
of handling upstream connections to LLMs. If your application is originating code to an API-based LLM, simply use
|
||||
Arch’s Python or JavaScript client SDK to send traffic to the desired LLM of choice. By sending traffic through Arch,
|
||||
you can propagate traces, manage and monitor traffic, apply rate limits, and utilize a large set of traffic management
|
||||
capabilities in a central place.</p>
|
||||
<div class="admonition attention">
|
||||
<p class="admonition-title">Attention</p>
|
||||
<p>When you start Arch, it automatically creates a listener port for egress calls to upstream LLMs. This is based on the
|
||||
<code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> configuration section in the <code class="docutils literal notranslate"><span class="pre">prompt_config.yml</span></code> file. Arch binds itself to a local address such as
|
||||
127.0.0.1:9000/v1 or a DNS-based address like arch.local:9000/v1 for outgoing traffic.</p>
|
||||
</div>
|
||||
<section id="example-using-the-arch-python-sdk">
|
||||
<h2>Example: Using the Arch Python SDK<a class="headerlink" href="#example-using-the-arch-python-sdk" title="Link to this heading">#</a></h2>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">arch_client</span> <span class="kn">import</span> <span class="n">ArchClient</span>
|
||||
|
||||
<span class="c1"># Initialize the Arch client</span>
|
||||
<span class="n">client</span> <span class="o">=</span> <span class="n">ArchClient</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:9000/v1"</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Define your LLM provider and prompt</span>
|
||||
<span class="n">model_id</span> <span class="o">=</span> <span class="s2">"openai"</span>
|
||||
<span class="n">prompt</span> <span class="o">=</span> <span class="s2">"What is the capital of France?"</span>
|
||||
|
||||
<span class="c1"># Send the prompt to the LLM through Arch</span>
|
||||
<span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">llm_provider</span><span class="o">=</span><span class="n">llm_provider</span><span class="p">,</span> <span class="n">prompt</span><span class="o">=</span><span class="n">prompt</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Print the response</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"LLM Response:"</span><span class="p">,</span> <span class="n">response</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="example-using-openai-client-with-arch-as-an-egress-gateway">
|
||||
<h2>Example: Using OpenAI Client with Arch as an Egress Gateway<a class="headerlink" href="#example-using-openai-client-with-arch-as-an-egress-gateway" title="Link to this heading">#</a></h2>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">openai</span>
|
||||
|
||||
<span class="c1"># Set the OpenAI API base URL to the Arch gateway endpoint</span>
|
||||
<span class="n">openai</span><span class="o">.</span><span class="n">api_base</span> <span class="o">=</span> <span class="s2">"http://127.0.0.1:9000/v1"</span>
|
||||
|
||||
<span class="c1"># No need to set openai.api_key since it's configured in Arch's gateway</span>
|
||||
|
||||
<span class="c1"># Use the OpenAI client as usual</span>
|
||||
<span class="n">response</span> <span class="o">=</span> <span class="n">openai</span><span class="o">.</span><span class="n">Completion</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
<span class="n">model</span><span class="o">=</span><span class="s2">"text-davinci-003"</span><span class="p">,</span>
|
||||
<span class="n">prompt</span><span class="o">=</span><span class="s2">"What is the capital of France?"</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"OpenAI Response:"</span><span class="p">,</span> <span class="n">response</span><span class="o">.</span><span class="n">choices</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>In these examples:</p>
|
||||
<blockquote>
|
||||
<div><p>The ArchClient is used to send traffic directly through the Arch egress proxy to the LLM of your choice, such as OpenAI.
|
||||
The OpenAI client is configured to route traffic via Arch by setting the proxy to 127.0.0.1:9000, assuming Arch is
|
||||
running locally and bound to that address and port.</p>
|
||||
</div></blockquote>
|
||||
<p>This setup allows you to take advantage of Arch’s advanced traffic management features while interacting with LLM APIs like OpenAI.</p>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
|
||||
</article>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<footer class="prev-next-footer d-print-none">
|
||||
|
||||
<div class="prev-next-area">
|
||||
<a class="left-prev"
|
||||
href="../listeners/listeners.html"
|
||||
title="previous page">
|
||||
<i class="fa-solid fa-angle-left"></i>
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">previous</p>
|
||||
<p class="prev-next-title">Listener</p>
|
||||
</div>
|
||||
</a>
|
||||
<a class="right-next"
|
||||
href="../listeners/llm_provider.html"
|
||||
title="next page">
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">next</p>
|
||||
<p class="prev-next-title">LLM Provider</p>
|
||||
</div>
|
||||
<i class="fa-solid fa-angle-right"></i>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
||||
|
||||
|
||||
<div class="sidebar-secondary-item">
|
||||
<div class="page-toc tocsection onthispage">
|
||||
<i class="fa-solid fa-list"></i> Contents
|
||||
</div>
|
||||
<nav class="bd-toc-nav page-toc">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Prompts</a></li>
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#messages">Messages</a></li>
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prompt-guardrails">Prompt Guardrails</a></li>
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prompt-targets">Prompt Targets</a><ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#intent-detection-and-prompt-matching">Intent Detection and Prompt Matching:</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#agentic-apps-via-prompt-targets">Agentic Apps via Prompt Targets</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prompting-llms">Prompting LLMs</a><ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#example-using-the-arch-python-sdk">Example: Using the Arch Python SDK</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#example-using-openai-client-with-arch-as-an-egress-gateway">Example: Using OpenAI Client with Arch as an Egress Gateway</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</nav></div>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
</div>
|
||||
<footer class="bd-footer-content">
|
||||
|
||||
<div class="bd-footer-content__inner container">
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="component-author">
|
||||
By Katanemo Labs, Inc
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
|
||||
<p class="copyright">
|
||||
|
||||
© Copyright 2024, Katanemo Labs, Inc.
|
||||
<br/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
|
||||
</main>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||
<script src="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<footer class="bd-footer">
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
464
intro/getting_help.html
Normal file
464
intro/getting_help.html
Normal file
|
|
@ -0,0 +1,464 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en" data-content_root="../" >
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<title>Getting help — Arch 0.1-beta documentation</title>
|
||||
|
||||
|
||||
|
||||
<script data-cfasync="false">
|
||||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||||
</script>
|
||||
|
||||
<!-- Loaded before other Sphinx assets -->
|
||||
<link href="../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
|
||||
|
||||
<link href="../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=a746c00c" />
|
||||
<link rel="stylesheet" type="text/css" href="../_static/styles/sphinx-book-theme.css?v=a3416100" />
|
||||
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
|
||||
|
||||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<script src="../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<script src="../_static/documentation_options.js?v=2742c0eb"></script>
|
||||
<script src="../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../_static/clipboard.min.js?v=a7894cd8"></script>
|
||||
<script src="../_static/copybutton.js?v=f281be69"></script>
|
||||
<script src="../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||||
<script>DOCUMENTATION_OPTIONS.pagename = 'intro/getting_help';</script>
|
||||
<link rel="icon" href="../_static/favicon.ico"/>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="next" title="Getting Started" href="../getting_started/getting_started.html" />
|
||||
<link rel="prev" title="Life of a Request" href="life_of_a_request.html" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
</head>
|
||||
|
||||
|
||||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||
|
||||
|
||||
|
||||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||||
|
||||
<div id="pst-scroll-pixel-helper"></div>
|
||||
|
||||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||||
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-primary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-secondary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||||
|
||||
<div class="search-button__wrapper">
|
||||
<div class="search-button__overlay"></div>
|
||||
<div class="search-button__search-container">
|
||||
<form class="bd-search d-flex align-items-center"
|
||||
action="../search.html"
|
||||
method="get">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<input type="search"
|
||||
class="form-control"
|
||||
name="q"
|
||||
id="search-input"
|
||||
placeholder="Search..."
|
||||
aria-label="Search..."
|
||||
autocomplete="off"
|
||||
autocorrect="off"
|
||||
autocapitalize="off"
|
||||
spellcheck="false"/>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||
</form></div>
|
||||
</div>
|
||||
|
||||
<div class="pst-async-banner-revealer d-none">
|
||||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||||
</div>
|
||||
|
||||
|
||||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||||
</header>
|
||||
|
||||
|
||||
<div class="bd-container">
|
||||
<div class="bd-container__inner bd-page-width">
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-primary bd-sidebar">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items sidebar-primary__section">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a class="navbar-brand logo" href="../root.html">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="../_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
||||
<script>document.write(`<img src="../_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
||||
|
||||
|
||||
</a></div>
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||||
<div class="bd-toc-item navbar-nav active">
|
||||
<ul class="current nav bd-sidenav">
|
||||
<li class="toctree-l1 current active has-children"><a class="reference internal" href="intro.html">Introduction</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="what_is_arch.html">What is Arch</a></li>
|
||||
<li class="toctree-l2 has-children"><a class="reference internal" href="architecture/architecture.html">Technical Architecture</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/intro/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/intro/threading_model.html">Threading model</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/listeners/listeners.html">Listener</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/prompt_processing/prompt_processing.html">Prompts</a></li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/listeners/llm_provider.html">LLM Provider</a></li>
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/model_serving/model_serving.html">Model Serving</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="life_of_a_request.html">Life of a Request</a></li>
|
||||
<li class="toctree-l2 current active"><a class="current reference internal" href="#">Getting help</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../getting_started/getting_started.html">Getting Started</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
||||
<li class="toctree-l1 has-children"><a class="reference internal" href="../observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../observability/stats.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../observability/access_logs.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llms/llms.html">LLMs</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||
</div>
|
||||
|
||||
<div id="rtd-footer-container"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<main id="main-content" class="bd-main" role="main">
|
||||
|
||||
|
||||
|
||||
<div class="sbt-scroll-pixel-helper"></div>
|
||||
|
||||
<div class="bd-content">
|
||||
<div class="bd-article-container">
|
||||
|
||||
<div class="bd-header-article d-print-none">
|
||||
<div class="header-article-items header-article__inner">
|
||||
|
||||
<div class="header-article-items__start">
|
||||
|
||||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-bars"></span>
|
||||
</button></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="header-article-items__end">
|
||||
|
||||
<div class="header-article-item">
|
||||
|
||||
<div class="article-header-buttons">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="dropdown dropdown-download-buttons">
|
||||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||||
<i class="fas fa-download"></i>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
|
||||
|
||||
|
||||
<li><a href="../_sources/intro/getting_help.rst" target="_blank"
|
||||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||||
title="Download source file"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.rst</span>
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li>
|
||||
<button onclick="window.print()"
|
||||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||||
title="Print to PDF"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file-pdf"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.pdf</span>
|
||||
</button>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<button onclick="toggleFullScreen()"
|
||||
class="btn btn-sm btn-fullscreen-button"
|
||||
title="Fullscreen mode"
|
||||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-expand"></i>
|
||||
</span>
|
||||
|
||||
</button>
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
<button class="sidebar-toggle secondary-toggle btn btn-sm" title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-list"></span>
|
||||
</button>
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="jb-print-docs-body" class="onlyprint">
|
||||
<h1>Getting help</h1>
|
||||
<!-- Table of contents -->
|
||||
<div id="print-main-content">
|
||||
<div id="jb-print-toc">
|
||||
|
||||
<div>
|
||||
<h2> Contents </h2>
|
||||
</div>
|
||||
<nav aria-label="Page">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reporting-security-vulnerabilities">Reporting security vulnerabilities</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="searchbox"></div>
|
||||
<article class="bd-article">
|
||||
|
||||
<section id="getting-help">
|
||||
<span id="id1"></span><h1>Getting help<a class="headerlink" href="#getting-help" title="Link to this heading">#</a></h1>
|
||||
<p>We are very interested in building a community around Arch. Please reach out to us if you are
|
||||
interested in using it and need help or want to contribute.</p>
|
||||
<p>Please see <a class="reference external" href="https://github.com/katanemo/arch#contact">contact info</a>.</p>
|
||||
<section id="reporting-security-vulnerabilities">
|
||||
<h2>Reporting security vulnerabilities<a class="headerlink" href="#reporting-security-vulnerabilities" title="Link to this heading">#</a></h2>
|
||||
<p>Please see <a class="reference external" href="https://github.com/katanemo/arch#reporting-security-vulnerabilities">security contact info</a>.</p>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
|
||||
</article>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<footer class="prev-next-footer d-print-none">
|
||||
|
||||
<div class="prev-next-area">
|
||||
<a class="left-prev"
|
||||
href="life_of_a_request.html"
|
||||
title="previous page">
|
||||
<i class="fa-solid fa-angle-left"></i>
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">previous</p>
|
||||
<p class="prev-next-title">Life of a Request</p>
|
||||
</div>
|
||||
</a>
|
||||
<a class="right-next"
|
||||
href="../getting_started/getting_started.html"
|
||||
title="next page">
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">next</p>
|
||||
<p class="prev-next-title">Getting Started</p>
|
||||
</div>
|
||||
<i class="fa-solid fa-angle-right"></i>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
||||
|
||||
|
||||
<div class="sidebar-secondary-item">
|
||||
<div class="page-toc tocsection onthispage">
|
||||
<i class="fa-solid fa-list"></i> Contents
|
||||
</div>
|
||||
<nav class="bd-toc-nav page-toc">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reporting-security-vulnerabilities">Reporting security vulnerabilities</a></li>
|
||||
</ul>
|
||||
</nav></div>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
</div>
|
||||
<footer class="bd-footer-content">
|
||||
|
||||
<div class="bd-footer-content__inner container">
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="component-author">
|
||||
By Katanemo Labs, Inc
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
|
||||
<p class="copyright">
|
||||
|
||||
© Copyright 2024, Katanemo Labs, Inc.
|
||||
<br/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
|
||||
</main>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||
<script src="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<footer class="bd-footer">
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
465
intro/intro.html
Normal file
465
intro/intro.html
Normal file
|
|
@ -0,0 +1,465 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en" data-content_root="../" >
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<title>Introduction — Arch 0.1-beta documentation</title>
|
||||
|
||||
|
||||
|
||||
<script data-cfasync="false">
|
||||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||||
</script>
|
||||
|
||||
<!-- Loaded before other Sphinx assets -->
|
||||
<link href="../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
|
||||
|
||||
<link href="../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=a746c00c" />
|
||||
<link rel="stylesheet" type="text/css" href="../_static/styles/sphinx-book-theme.css?v=a3416100" />
|
||||
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
|
||||
|
||||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<script src="../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<script src="../_static/documentation_options.js?v=2742c0eb"></script>
|
||||
<script src="../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../_static/clipboard.min.js?v=a7894cd8"></script>
|
||||
<script src="../_static/copybutton.js?v=f281be69"></script>
|
||||
<script src="../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||||
<script>DOCUMENTATION_OPTIONS.pagename = 'intro/intro';</script>
|
||||
<link rel="icon" href="../_static/favicon.ico"/>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="next" title="What is Arch" href="what_is_arch.html" />
|
||||
<link rel="prev" title="Documentation" href="../root.html" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
</head>
|
||||
|
||||
|
||||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||
|
||||
|
||||
|
||||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||||
|
||||
<div id="pst-scroll-pixel-helper"></div>
|
||||
|
||||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||||
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-primary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-secondary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||||
|
||||
<div class="search-button__wrapper">
|
||||
<div class="search-button__overlay"></div>
|
||||
<div class="search-button__search-container">
|
||||
<form class="bd-search d-flex align-items-center"
|
||||
action="../search.html"
|
||||
method="get">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<input type="search"
|
||||
class="form-control"
|
||||
name="q"
|
||||
id="search-input"
|
||||
placeholder="Search..."
|
||||
aria-label="Search..."
|
||||
autocomplete="off"
|
||||
autocorrect="off"
|
||||
autocapitalize="off"
|
||||
spellcheck="false"/>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||
</form></div>
|
||||
</div>
|
||||
|
||||
<div class="pst-async-banner-revealer d-none">
|
||||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||||
</div>
|
||||
|
||||
|
||||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||||
</header>
|
||||
|
||||
|
||||
<div class="bd-container">
|
||||
<div class="bd-container__inner bd-page-width">
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-primary bd-sidebar">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items sidebar-primary__section">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a class="navbar-brand logo" href="../root.html">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="../_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
||||
<script>document.write(`<img src="../_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
||||
|
||||
|
||||
</a></div>
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||||
<div class="bd-toc-item navbar-nav active">
|
||||
<ul class="current nav bd-sidenav">
|
||||
<li class="toctree-l1 current active has-children"><a class="current reference internal" href="#">Introduction</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="what_is_arch.html">What is Arch</a></li>
|
||||
<li class="toctree-l2 has-children"><a class="reference internal" href="architecture/architecture.html">Technical Architecture</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/intro/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/intro/threading_model.html">Threading model</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/listeners/listeners.html">Listener</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/prompt_processing/prompt_processing.html">Prompts</a></li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/listeners/llm_provider.html">LLM Provider</a></li>
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/model_serving/model_serving.html">Model Serving</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="life_of_a_request.html">Life of a Request</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="getting_help.html">Getting help</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../getting_started/getting_started.html">Getting Started</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
||||
<li class="toctree-l1 has-children"><a class="reference internal" href="../observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../observability/stats.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../observability/access_logs.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llms/llms.html">LLMs</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||
</div>
|
||||
|
||||
<div id="rtd-footer-container"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<main id="main-content" class="bd-main" role="main">
|
||||
|
||||
|
||||
|
||||
<div class="sbt-scroll-pixel-helper"></div>
|
||||
|
||||
<div class="bd-content">
|
||||
<div class="bd-article-container">
|
||||
|
||||
<div class="bd-header-article d-print-none">
|
||||
<div class="header-article-items header-article__inner">
|
||||
|
||||
<div class="header-article-items__start">
|
||||
|
||||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-bars"></span>
|
||||
</button></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="header-article-items__end">
|
||||
|
||||
<div class="header-article-item">
|
||||
|
||||
<div class="article-header-buttons">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="dropdown dropdown-download-buttons">
|
||||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||||
<i class="fas fa-download"></i>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
|
||||
|
||||
|
||||
<li><a href="../_sources/intro/intro.rst" target="_blank"
|
||||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||||
title="Download source file"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.rst</span>
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li>
|
||||
<button onclick="window.print()"
|
||||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||||
title="Print to PDF"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file-pdf"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.pdf</span>
|
||||
</button>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<button onclick="toggleFullScreen()"
|
||||
class="btn btn-sm btn-fullscreen-button"
|
||||
title="Fullscreen mode"
|
||||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-expand"></i>
|
||||
</span>
|
||||
|
||||
</button>
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="jb-print-docs-body" class="onlyprint">
|
||||
<h1>Introduction</h1>
|
||||
<!-- Table of contents -->
|
||||
<div id="print-main-content">
|
||||
<div id="jb-print-toc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="searchbox"></div>
|
||||
<article class="bd-article">
|
||||
|
||||
<section id="introduction">
|
||||
<span id="intro"></span><h1>Introduction<a class="headerlink" href="#introduction" title="Link to this heading">#</a></h1>
|
||||
<div class="toctree-wrapper compound">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="what_is_arch.html">What is Arch</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="architecture/architecture.html">Technical Architecture</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="architecture/intro/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="architecture/intro/threading_model.html">Threading model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="architecture/listeners/listeners.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="architecture/prompt_processing/prompt_processing.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="architecture/prompt_processing/prompt_processing.html#messages">Messages</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="architecture/prompt_processing/prompt_processing.html#prompt-guardrails">Prompt Guardrails</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="architecture/prompt_processing/prompt_processing.html#prompt-targets">Prompt Targets</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="architecture/prompt_processing/prompt_processing.html#prompting-llms">Prompting LLMs</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="architecture/listeners/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="architecture/listeners/llm_provider.html#example-using-the-arch-python-sdk">Example: Using the Arch Python SDK</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="architecture/model_serving/model_serving.html">Model Serving</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="life_of_a_request.html">Life of a Request</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="life_of_a_request.html#terminology">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="life_of_a_request.html#network-topology">Network topology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="life_of_a_request.html#high-level-architecture">High level architecture</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="life_of_a_request.html#configuration">Configuration</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="life_of_a_request.html#request-flow-ingress">Request Flow (Ingress)</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="life_of_a_request.html#request-flow-egress">Request Flow (Egress)</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="life_of_a_request.html#id2">Overview</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="getting_help.html">Getting help</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="getting_help.html#reporting-security-vulnerabilities">Reporting security vulnerabilities</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
|
||||
</article>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<footer class="prev-next-footer d-print-none">
|
||||
|
||||
<div class="prev-next-area">
|
||||
<a class="left-prev"
|
||||
href="../root.html"
|
||||
title="previous page">
|
||||
<i class="fa-solid fa-angle-left"></i>
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">previous</p>
|
||||
<p class="prev-next-title">Documentation</p>
|
||||
</div>
|
||||
</a>
|
||||
<a class="right-next"
|
||||
href="what_is_arch.html"
|
||||
title="next page">
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">next</p>
|
||||
<p class="prev-next-title">What is Arch</p>
|
||||
</div>
|
||||
<i class="fa-solid fa-angle-right"></i>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
<footer class="bd-footer-content">
|
||||
|
||||
<div class="bd-footer-content__inner container">
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="component-author">
|
||||
By Katanemo Labs, Inc
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
|
||||
<p class="copyright">
|
||||
|
||||
© Copyright 2024, Katanemo Labs, Inc.
|
||||
<br/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
|
||||
</main>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||
<script src="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<footer class="bd-footer">
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
668
intro/life_of_a_request.html
Normal file
668
intro/life_of_a_request.html
Normal file
|
|
@ -0,0 +1,668 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en" data-content_root="../" >
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<title>Life of a Request — Arch 0.1-beta documentation</title>
|
||||
|
||||
|
||||
|
||||
<script data-cfasync="false">
|
||||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||||
</script>
|
||||
|
||||
<!-- Loaded before other Sphinx assets -->
|
||||
<link href="../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
|
||||
|
||||
<link href="../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=a746c00c" />
|
||||
<link rel="stylesheet" type="text/css" href="../_static/styles/sphinx-book-theme.css?v=a3416100" />
|
||||
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
|
||||
|
||||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<script src="../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<script src="../_static/documentation_options.js?v=2742c0eb"></script>
|
||||
<script src="../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../_static/clipboard.min.js?v=a7894cd8"></script>
|
||||
<script src="../_static/copybutton.js?v=f281be69"></script>
|
||||
<script src="../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||||
<script>DOCUMENTATION_OPTIONS.pagename = 'intro/life_of_a_request';</script>
|
||||
<link rel="icon" href="../_static/favicon.ico"/>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="next" title="Getting help" href="getting_help.html" />
|
||||
<link rel="prev" title="Model Serving" href="architecture/model_serving/model_serving.html" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
</head>
|
||||
|
||||
|
||||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||
|
||||
|
||||
|
||||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||||
|
||||
<div id="pst-scroll-pixel-helper"></div>
|
||||
|
||||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||||
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-primary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-secondary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||||
|
||||
<div class="search-button__wrapper">
|
||||
<div class="search-button__overlay"></div>
|
||||
<div class="search-button__search-container">
|
||||
<form class="bd-search d-flex align-items-center"
|
||||
action="../search.html"
|
||||
method="get">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<input type="search"
|
||||
class="form-control"
|
||||
name="q"
|
||||
id="search-input"
|
||||
placeholder="Search..."
|
||||
aria-label="Search..."
|
||||
autocomplete="off"
|
||||
autocorrect="off"
|
||||
autocapitalize="off"
|
||||
spellcheck="false"/>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||
</form></div>
|
||||
</div>
|
||||
|
||||
<div class="pst-async-banner-revealer d-none">
|
||||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||||
</div>
|
||||
|
||||
|
||||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||||
</header>
|
||||
|
||||
|
||||
<div class="bd-container">
|
||||
<div class="bd-container__inner bd-page-width">
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-primary bd-sidebar">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items sidebar-primary__section">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a class="navbar-brand logo" href="../root.html">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="../_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
||||
<script>document.write(`<img src="../_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
||||
|
||||
|
||||
</a></div>
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||||
<div class="bd-toc-item navbar-nav active">
|
||||
<ul class="current nav bd-sidenav">
|
||||
<li class="toctree-l1 current active has-children"><a class="reference internal" href="intro.html">Introduction</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="what_is_arch.html">What is Arch</a></li>
|
||||
<li class="toctree-l2 has-children"><a class="reference internal" href="architecture/architecture.html">Technical Architecture</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/intro/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/intro/threading_model.html">Threading model</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/listeners/listeners.html">Listener</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/prompt_processing/prompt_processing.html">Prompts</a></li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/listeners/llm_provider.html">LLM Provider</a></li>
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/model_serving/model_serving.html">Model Serving</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l2 current active"><a class="current reference internal" href="#">Life of a Request</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="getting_help.html">Getting help</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../getting_started/getting_started.html">Getting Started</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
||||
<li class="toctree-l1 has-children"><a class="reference internal" href="../observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../observability/stats.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../observability/access_logs.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llms/llms.html">LLMs</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||
</div>
|
||||
|
||||
<div id="rtd-footer-container"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<main id="main-content" class="bd-main" role="main">
|
||||
|
||||
|
||||
|
||||
<div class="sbt-scroll-pixel-helper"></div>
|
||||
|
||||
<div class="bd-content">
|
||||
<div class="bd-article-container">
|
||||
|
||||
<div class="bd-header-article d-print-none">
|
||||
<div class="header-article-items header-article__inner">
|
||||
|
||||
<div class="header-article-items__start">
|
||||
|
||||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-bars"></span>
|
||||
</button></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="header-article-items__end">
|
||||
|
||||
<div class="header-article-item">
|
||||
|
||||
<div class="article-header-buttons">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="dropdown dropdown-download-buttons">
|
||||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||||
<i class="fas fa-download"></i>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
|
||||
|
||||
|
||||
<li><a href="../_sources/intro/life_of_a_request.rst" target="_blank"
|
||||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||||
title="Download source file"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.rst</span>
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li>
|
||||
<button onclick="window.print()"
|
||||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||||
title="Print to PDF"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file-pdf"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.pdf</span>
|
||||
</button>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<button onclick="toggleFullScreen()"
|
||||
class="btn btn-sm btn-fullscreen-button"
|
||||
title="Fullscreen mode"
|
||||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-expand"></i>
|
||||
</span>
|
||||
|
||||
</button>
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
<button class="sidebar-toggle secondary-toggle btn btn-sm" title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-list"></span>
|
||||
</button>
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="jb-print-docs-body" class="onlyprint">
|
||||
<h1>Life of a Request</h1>
|
||||
<!-- Table of contents -->
|
||||
<div id="print-main-content">
|
||||
<div id="jb-print-toc">
|
||||
|
||||
<div>
|
||||
<h2> Contents </h2>
|
||||
</div>
|
||||
<nav aria-label="Page">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#terminology">Terminology</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#network-topology">Network topology</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#high-level-architecture">High level architecture</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#configuration">Configuration</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#request-flow-ingress">Request Flow (Ingress)</a><ul class="nav section-nav flex-column">
|
||||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#request-flow-egress">Request Flow (Egress)</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">Overview</a><ul class="nav section-nav flex-column">
|
||||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#post-request-processing">Post-request processing</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="searchbox"></div>
|
||||
<article class="bd-article">
|
||||
|
||||
<section id="life-of-a-request">
|
||||
<span id="id1"></span><h1>Life of a Request<a class="headerlink" href="#life-of-a-request" title="Link to this heading">#</a></h1>
|
||||
<p>Below we describe the events in the life of a request passing through an Arch gateway instance. We first
|
||||
describe how Arch fits into the request path and then the internal events that take place following
|
||||
the arrival of a request at Arch from downtream clients. We follow the request until the corresponding
|
||||
dispatch upstream and the response path.</p>
|
||||
<a class="reference internal image-reference" href="../_images/network-topology-ingress-egress.jpg"><img alt="../_images/network-topology-ingress-egress.jpg" class="align-center" src="../_images/network-topology-ingress-egress.jpg" style="width: 100%;" />
|
||||
</a>
|
||||
<section id="terminology">
|
||||
<h2>Terminology<a class="headerlink" href="#terminology" title="Link to this heading">#</a></h2>
|
||||
<p>We recommend that you get familiar with some of the <a class="reference internal" href="architecture/intro/terminology.html#arch-terminology"><span class="std std-ref">terminology</span></a> used in Arch
|
||||
before reading this section.</p>
|
||||
</section>
|
||||
<section id="network-topology">
|
||||
<h2>Network topology<a class="headerlink" href="#network-topology" title="Link to this heading">#</a></h2>
|
||||
<p>How a request flows through the components in a network (including Arch) depends on the network’s topology.
|
||||
Arch can be used in a wide variety of networking topologies. We focus on the inner operation of Arch below,
|
||||
but briefly we address how Arch relates to the rest of the network in this section.</p>
|
||||
<ul class="simple">
|
||||
<li><p><strong>Downstream(Ingress)</strong> listeners take requests from upstream clients like a web UI or clients that forward
|
||||
prompts to you local application responses from the application flow back through Arch to the downstream.</p></li>
|
||||
<li><p><strong>Upstream(Egress)</strong> listeners take requests from the application and forward them to LLMs.</p></li>
|
||||
</ul>
|
||||
<a class="reference internal image-reference" href="../_images/network-topology-ingress-egress.jpg"><img alt="../_images/network-topology-ingress-egress.jpg" class="align-center" src="../_images/network-topology-ingress-egress.jpg" style="width: 100%;" />
|
||||
</a>
|
||||
<p>In practice, Arch can be deployed on the edge and as an internal load balancer between AI agents. A request path may
|
||||
traverse multiple Arch gateways:</p>
|
||||
<a class="reference internal image-reference" href="../_images/network-topology-agent.jpg"><img alt="../_images/network-topology-agent.jpg" class="align-center" src="../_images/network-topology-agent.jpg" style="width: 100%;" />
|
||||
</a>
|
||||
</section>
|
||||
<section id="high-level-architecture">
|
||||
<h2>High level architecture<a class="headerlink" href="#high-level-architecture" title="Link to this heading">#</a></h2>
|
||||
<p>Arch is a set of <strong>two</strong> self-contained processes that are designed to run alongside your application servers
|
||||
(or on a separate server connected to your application servers via a network). The first process is designated
|
||||
to manage HTTP-level networking and connection management concerns (protocol management, request id generation,
|
||||
header sanitization, etc.), and the other process is for <strong>model serving</strong>, which helps Arch make intelligent
|
||||
decisions about the incoming prompts. The model server hosts the purpose-built <a class="reference internal" href="../llms/llms.html#llms-in-arch"><span class="std std-ref">LLMs</span></a> to
|
||||
manage several critical, but undifferentiated, prompt related tasks on behalf of developers.</p>
|
||||
<p>The request processing path in Arch has three main parts:</p>
|
||||
<ul class="simple">
|
||||
<li><p><a class="reference internal" href="architecture/listeners/listeners.html#arch-overview-listeners"><span class="std std-ref">Listener subsystem</span></a> which handles <strong>downstream</strong> and <strong>upstream</strong> request
|
||||
processing. It is responsible for managing the downstream (ingress) and the upstream (egress) request
|
||||
lifecycle. The downstream and upstream HTTP/2 codec lives here.</p></li>
|
||||
<li><p><a class="reference internal" href="architecture/prompt_processing/prompt_processing.html#arch-overview-prompt-handling"><span class="std std-ref">Prompt handler subsystem</span></a> which is responsible for selecting and
|
||||
forwarding prompts <code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code> and establishes the lifecycle of any <strong>upstream</strong> connection to a
|
||||
hosted endpoint that implements domain-specific business logic for incoming promots. This is where knowledge
|
||||
of targets and endpoint health, load balancing and connection pooling exists.</p></li>
|
||||
<li><p><a class="reference internal" href="architecture/model_serving/model_serving.html#arch-model-serving"><span class="std std-ref">Model serving subsystem</span></a> which helps Arch make intelligent decisions about the
|
||||
incoming prompts. The model server is designed to call the purpose-built <a class="reference internal" href="../llms/llms.html#llms-in-arch"><span class="std std-ref">LLMs</span></a> in Arch.</p></li>
|
||||
</ul>
|
||||
<p>The three subsystems are bridged with either the HTTP router filter, and the cluster manager subsystems of Envoy.</p>
|
||||
<p>Also, Arch utilizes <a class="reference external" href="https://blog.envoyproxy.io/envoy-threading-model-a8d44b922310">Envoy event-based thread model</a>.
|
||||
A main thread is responsible forthe server lifecycle, configuration processing, stats, etc. and some number of
|
||||
<a class="reference internal" href="architecture/intro/threading_model.html#arch-overview-threading"><span class="std std-ref">worker threads</span></a> process requests. All threads operate around an event loop (<a class="reference external" href="https://libevent.org/">libevent</a>)
|
||||
and any given downstream TCP connection will be handled by exactly one worker thread for its lifetime. Each worker
|
||||
thread maintains its own pool of TCP connections to upstream endpoints.</p>
|
||||
<p>Worker threads rarely share state and operate in a trivially parallel fashion. This threading model
|
||||
enables scaling to very high core count CPUs.</p>
|
||||
</section>
|
||||
<section id="configuration">
|
||||
<h2>Configuration<a class="headerlink" href="#configuration" title="Link to this heading">#</a></h2>
|
||||
<p>Today, only support a static bootstrap configuration file for simplicity today:</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
<span class="nt">listener</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
|
||||
<span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
|
||||
<span class="w"> </span><span class="nt">messages</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">tuple | hugging-face-messages-api</span>
|
||||
|
||||
<span class="nt">system_prompts</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">network_assistant</span>
|
||||
<span class="w"> </span><span class="nt">content</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts about the operational health of the network</span>
|
||||
|
||||
<span class="nt">llm_providers</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
<span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPEN_AI_KEY</span>
|
||||
<span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
<span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Mistral"</span>
|
||||
<span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_KEY</span>
|
||||
<span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mixtral8-7B</span>
|
||||
|
||||
<span class="nt">prompt_endpoints</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"http://127.0.0.2"</span>
|
||||
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"http://127.0.0.1"</span>
|
||||
|
||||
<span class="nt">prompt_guards</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="nt">input-guard</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="c1">#jailbreak</span>
|
||||
<span class="w"> </span><span class="nt">on-exception-message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you are curious about my abilities. But I can only</span>
|
||||
|
||||
<span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">information_extraction</span>
|
||||
<span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">RAG</span>
|
||||
<span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">this prompt handles all information extractions scenarios</span>
|
||||
<span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/summary</span>
|
||||
|
||||
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_network_device</span>
|
||||
<span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/action</span>
|
||||
<span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">used to help network operators with perform device operations like rebooting a device.</span>
|
||||
<span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
<span class="nt">error_target</span><span class="p">:</span><span class="w"> </span><span class="c1">#handle errors from Bolt or upstream LLMs</span>
|
||||
<span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">“error_handler”</span>
|
||||
<span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/errors</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="request-flow-ingress">
|
||||
<h2>Request Flow (Ingress)<a class="headerlink" href="#request-flow-ingress" title="Link to this heading">#</a></h2>
|
||||
<section id="overview">
|
||||
<h3>Overview<a class="headerlink" href="#overview" title="Link to this heading">#</a></h3>
|
||||
<p>A brief outline of the life cycle of a request and response using the example configuration above:</p>
|
||||
<ol class="arabic simple">
|
||||
<li><p><strong>TCP Connection Establishment</strong>:
|
||||
A TCP connection from downstream is accepted by an Arch listener running on a worker thread.
|
||||
The listener filter chain provides SNI and other pre-TLS information. The transport socket, typically TLS,
|
||||
decrypts incoming data for processing.</p></li>
|
||||
<li><p><strong>Prompt Guardrails Check</strong>:
|
||||
Arch first checks the incoming prompts for guardrails such as jailbreak attempts. This ensures
|
||||
that harmful or unwanted behaviors are detected early in the request processing pipeline.</p></li>
|
||||
<li><p><strong>Intent Matching</strong>:
|
||||
The decrypted data stream is deframed by the HTTP/2 codec in Arch’s HTTP connection manager. Arch performs
|
||||
intent matching via is <strong>prompt-handler</strong> subsystem using the name and description of the defined prompt targets,
|
||||
determining which endpoint should handle the prompt.</p></li>
|
||||
<li><p><strong>Parameter Gathering with Arch-FC</strong>:
|
||||
If a prompt target requires specific parameters, Arch engages Arch-FC to extract the necessary details
|
||||
from the incoming prompt(s). This process gathers the critical information needed for downstream API calls.</p></li>
|
||||
<li><p><strong>API Call Execution</strong>:
|
||||
Arch routes the prompt to the appropriate backend API or function call. If an endpoint cluster is identified,
|
||||
load balancing is performed, circuit breakers are checked, and the request is proxied to the upstream endpoint.</p></li>
|
||||
<li><p><strong>Default Summarization by Upstream LLM</strong>:
|
||||
By default, if no specific endpoint processing is needed, the prompt is sent to an upstream LLM for summarization.
|
||||
This ensures that responses are concise and relevant, enhancing user experience in RAG (Retrieval-Augmented Generation)
|
||||
and agentic applications.</p></li>
|
||||
<li><p><strong>Error Handling and Forwarding</strong>:
|
||||
Errors encountered during processing, such as failed function calls or guardrail detections, are forwarded to
|
||||
designated error targets. Error details are communicated through specific headers to the application:</p>
|
||||
<ul class="simple">
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">X-Function-Error-Code</span></code>: Code indicating the type of function call error.</p></li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">X-Prompt-Guard-Error-Code</span></code>: Code specifying violations detected by prompt guardrails.</p></li>
|
||||
<li><p>Additional headers carry messages and timestamps to aid in debugging and logging.</p></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><p><strong>Response Handling</strong>:
|
||||
The upstream endpoint’s TLS transport socket encrypts the response, which is then proxied back downstream.
|
||||
Responses pass through HTTP filters in reverse order, ensuring any necessary processing or modification before final delivery.</p></li>
|
||||
</ol>
|
||||
</section>
|
||||
</section>
|
||||
<section id="request-flow-egress">
|
||||
<h2>Request Flow (Egress)<a class="headerlink" href="#request-flow-egress" title="Link to this heading">#</a></h2>
|
||||
</section>
|
||||
<section id="id2">
|
||||
<h2>Overview<a class="headerlink" href="#id2" title="Link to this heading">#</a></h2>
|
||||
<p>A brief outline of the life cycle of a request and response in the context of egress traffic from an application
|
||||
to Large Language Models (LLMs) via Arch:</p>
|
||||
<ol class="arabic simple">
|
||||
<li><p><strong>HTTP Connection Establishment to LLM</strong>:
|
||||
Arch initiates an HTTP connection to the upstream LLM service. This connection is handled by Arch’s egress listener
|
||||
running on a worker thread. The connection typically uses a secure transport protocol such as HTTPS, ensuring the
|
||||
prompt data is encrypted before being sent to the LLM service.</p></li>
|
||||
<li><p><strong>Rate Limiting</strong>:
|
||||
Before sending the request to the LLM, Arch applies rate-limiting policies to ensure that the upstream LLM service
|
||||
is not overwhelmed by excessive traffic. Rate limits are enforced per client or service, ensuring fair usage and
|
||||
preventing accidental or malicious overload. If the rate limit is exceeded, Arch may return an appropriate HTTP
|
||||
error (e.g., 429 Too Many Requests) without sending the prompt to the LLM.</p></li>
|
||||
<li><p><strong>Load Balancing to (hosted) LLM Endpoints</strong>:
|
||||
After passing the rate-limiting checks, Arch routes the prompt to the appropriate LLM endpoint.
|
||||
If multiple LLM providers instances are available, load balancing is performed to distribute traffic evenly
|
||||
across the instances. Arch checks the health of the LLM endpoints using circuit breakers and health checks,
|
||||
ensuring that the prompt is only routed to a healthy, responsive instance.</p></li>
|
||||
<li><p><strong>Response Reception and Forwarding</strong>:
|
||||
Once the LLM processes the prompt, Arch receives the response from the LLM service. The response is typically a
|
||||
generated text, completion, or summarization. Upon reception, Arch decrypts (if necessary) and handles the response,
|
||||
passing it through any egress processing pipeline defined by the application, such as logging or additional response filtering.</p></li>
|
||||
</ol>
|
||||
<section id="post-request-processing">
|
||||
<h3>Post-request processing<a class="headerlink" href="#post-request-processing" title="Link to this heading">#</a></h3>
|
||||
<p>Once a request completes, the stream is destroyed. The following also takes places:</p>
|
||||
<ul class="simple">
|
||||
<li><p>The post-request <a class="reference internal" href="../observability/stats.html#monitoring"><span class="std std-ref">monitoring</span></a> are updated (e.g. timing, active requests, upgrades, health checks).
|
||||
Some statistics are updated earlier however, during request processing. Stats are batchedand written by the main
|
||||
thread periodically.</p></li>
|
||||
<li><p><a class="reference internal" href="../observability/access_logs.html#arch-access-logging"><span class="std std-ref">Access logs</span></a> are written to the access log</p></li>
|
||||
<li><p><a class="reference internal" href="../observability/tracing.html#arch-overview-tracing"><span class="std std-ref">Trace</span></a> spans are finalized. If our example request was traced, a
|
||||
trace span, describing the duration and details of the request would be created by the HCM when
|
||||
processing request headers and then finalized by the HCM during post-request processing.</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
|
||||
</article>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<footer class="prev-next-footer d-print-none">
|
||||
|
||||
<div class="prev-next-area">
|
||||
<a class="left-prev"
|
||||
href="architecture/model_serving/model_serving.html"
|
||||
title="previous page">
|
||||
<i class="fa-solid fa-angle-left"></i>
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">previous</p>
|
||||
<p class="prev-next-title">Model Serving</p>
|
||||
</div>
|
||||
</a>
|
||||
<a class="right-next"
|
||||
href="getting_help.html"
|
||||
title="next page">
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">next</p>
|
||||
<p class="prev-next-title">Getting help</p>
|
||||
</div>
|
||||
<i class="fa-solid fa-angle-right"></i>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
||||
|
||||
|
||||
<div class="sidebar-secondary-item">
|
||||
<div class="page-toc tocsection onthispage">
|
||||
<i class="fa-solid fa-list"></i> Contents
|
||||
</div>
|
||||
<nav class="bd-toc-nav page-toc">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#terminology">Terminology</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#network-topology">Network topology</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#high-level-architecture">High level architecture</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#configuration">Configuration</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#request-flow-ingress">Request Flow (Ingress)</a><ul class="nav section-nav flex-column">
|
||||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#request-flow-egress">Request Flow (Egress)</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">Overview</a><ul class="nav section-nav flex-column">
|
||||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#post-request-processing">Post-request processing</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</nav></div>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
</div>
|
||||
<footer class="bd-footer-content">
|
||||
|
||||
<div class="bd-footer-content__inner container">
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="component-author">
|
||||
By Katanemo Labs, Inc
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
|
||||
<p class="copyright">
|
||||
|
||||
© Copyright 2024, Katanemo Labs, Inc.
|
||||
<br/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
|
||||
</main>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||
<script src="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<footer class="bd-footer">
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
505
intro/what_is_arch.html
Normal file
505
intro/what_is_arch.html
Normal file
|
|
@ -0,0 +1,505 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en" data-content_root="../" >
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<title>What is Arch — Arch 0.1-beta documentation</title>
|
||||
|
||||
|
||||
|
||||
<script data-cfasync="false">
|
||||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||||
</script>
|
||||
|
||||
<!-- Loaded before other Sphinx assets -->
|
||||
<link href="../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link href="../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
|
||||
|
||||
<link href="../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=a746c00c" />
|
||||
<link rel="stylesheet" type="text/css" href="../_static/styles/sphinx-book-theme.css?v=a3416100" />
|
||||
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
|
||||
|
||||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||||
<script src="../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<script src="../_static/documentation_options.js?v=2742c0eb"></script>
|
||||
<script src="../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../_static/clipboard.min.js?v=a7894cd8"></script>
|
||||
<script src="../_static/copybutton.js?v=f281be69"></script>
|
||||
<script src="../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||||
<script>DOCUMENTATION_OPTIONS.pagename = 'intro/what_is_arch';</script>
|
||||
<link rel="icon" href="../_static/favicon.ico"/>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="next" title="Technical Architecture" href="architecture/architecture.html" />
|
||||
<link rel="prev" title="Introduction" href="intro.html" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
</head>
|
||||
|
||||
|
||||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||
|
||||
|
||||
|
||||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||||
|
||||
<div id="pst-scroll-pixel-helper"></div>
|
||||
|
||||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||||
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-primary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
id="pst-secondary-sidebar-checkbox"/>
|
||||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||||
|
||||
<div class="search-button__wrapper">
|
||||
<div class="search-button__overlay"></div>
|
||||
<div class="search-button__search-container">
|
||||
<form class="bd-search d-flex align-items-center"
|
||||
action="../search.html"
|
||||
method="get">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<input type="search"
|
||||
class="form-control"
|
||||
name="q"
|
||||
id="search-input"
|
||||
placeholder="Search..."
|
||||
aria-label="Search..."
|
||||
autocomplete="off"
|
||||
autocorrect="off"
|
||||
autocapitalize="off"
|
||||
spellcheck="false"/>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||
</form></div>
|
||||
</div>
|
||||
|
||||
<div class="pst-async-banner-revealer d-none">
|
||||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||||
</div>
|
||||
|
||||
|
||||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||||
</header>
|
||||
|
||||
|
||||
<div class="bd-container">
|
||||
<div class="bd-container__inner bd-page-width">
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-primary bd-sidebar">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items sidebar-primary__section">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a class="navbar-brand logo" href="../root.html">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="../_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
||||
<script>document.write(`<img src="../_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
||||
|
||||
|
||||
</a></div>
|
||||
<div class="sidebar-primary-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||||
<div class="bd-toc-item navbar-nav active">
|
||||
<ul class="current nav bd-sidenav">
|
||||
<li class="toctree-l1 current active has-children"><a class="reference internal" href="intro.html">Introduction</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
||||
<li class="toctree-l2 current active"><a class="current reference internal" href="#">What is Arch</a></li>
|
||||
<li class="toctree-l2 has-children"><a class="reference internal" href="architecture/architecture.html">Technical Architecture</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/intro/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/intro/threading_model.html">Threading model</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/listeners/listeners.html">Listener</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/prompt_processing/prompt_processing.html">Prompts</a></li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/listeners/llm_provider.html">LLM Provider</a></li>
|
||||
|
||||
<li class="toctree-l3"><a class="reference internal" href="architecture/model_serving/model_serving.html">Model Serving</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="life_of_a_request.html">Life of a Request</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="getting_help.html">Getting help</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../getting_started/getting_started.html">Getting Started</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
||||
<li class="toctree-l1 has-children"><a class="reference internal" href="../observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../observability/stats.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../observability/access_logs.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</details></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llms/llms.html">LLMs</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||
</div>
|
||||
|
||||
<div id="rtd-footer-container"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<main id="main-content" class="bd-main" role="main">
|
||||
|
||||
|
||||
|
||||
<div class="sbt-scroll-pixel-helper"></div>
|
||||
|
||||
<div class="bd-content">
|
||||
<div class="bd-article-container">
|
||||
|
||||
<div class="bd-header-article d-print-none">
|
||||
<div class="header-article-items header-article__inner">
|
||||
|
||||
<div class="header-article-items__start">
|
||||
|
||||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="fa-solid fa-bars"></span>
|
||||
</button></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="header-article-items__end">
|
||||
|
||||
<div class="header-article-item">
|
||||
|
||||
<div class="article-header-buttons">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="dropdown dropdown-download-buttons">
|
||||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||||
<i class="fas fa-download"></i>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
|
||||
|
||||
|
||||
<li><a href="../_sources/intro/what_is_arch.rst" target="_blank"
|
||||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||||
title="Download source file"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.rst</span>
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
<li>
|
||||
<button onclick="window.print()"
|
||||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||||
title="Print to PDF"
|
||||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-file-pdf"></i>
|
||||
</span>
|
||||
<span class="btn__text-container">.pdf</span>
|
||||
</button>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<button onclick="toggleFullScreen()"
|
||||
class="btn btn-sm btn-fullscreen-button"
|
||||
title="Fullscreen mode"
|
||||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||||
>
|
||||
|
||||
|
||||
<span class="btn__icon-container">
|
||||
<i class="fas fa-expand"></i>
|
||||
</span>
|
||||
|
||||
</button>
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="jb-print-docs-body" class="onlyprint">
|
||||
<h1>What is Arch</h1>
|
||||
<!-- Table of contents -->
|
||||
<div id="print-main-content">
|
||||
<div id="jb-print-toc">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div id="searchbox"></div>
|
||||
<article class="bd-article">
|
||||
|
||||
<section id="what-is-arch">
|
||||
<h1>What is Arch<a class="headerlink" href="#what-is-arch" title="Link to this heading">#</a></h1>
|
||||
<p>Arch is an intelligent <a class="reference external" href="https://www.cloudflare.com/learning/ddos/what-is-layer-7/">(Layer 7)</a> gateway
|
||||
designed for generative AI apps, AI agents, and Co-pilots that work with prompts. Engineered with purpose-built
|
||||
<a class="reference internal" href="../llms/llms.html#llms-in-arch"><span class="std std-ref">LLMs</span></a>, Arch handles all the critical but undifferentiated tasks related to the handling and
|
||||
processing of prompts, including detecting and rejecting <a class="reference external" href="https://github.com/verazuo/jailbreak_llms">jailbreak</a>
|
||||
attempts, intelligently calling “backend” APIs to fulfill the user’s request represented in a prompt, routing to
|
||||
and offering disaster recovery between upstream LLMs, and managing the observability of prompts and LLM interactions
|
||||
in a centralized way.</p>
|
||||
<a class="reference internal image-reference" href="../_images/arch-logo.png"><img alt="../_images/arch-logo.png" class="align-center" src="../_images/arch-logo.png" style="width: 100%;" />
|
||||
</a>
|
||||
<p><strong>The project was born out of the belief that:</strong></p>
|
||||
<blockquote>
|
||||
<div><p><em>Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests
|
||||
including secure handling, intelligent routing, robust observability, and integration with backend (API)
|
||||
systems for personalization - all outside business logic.</em></p>
|
||||
</div></blockquote>
|
||||
<p>In practice, achieving the above goal is incredibly difficult. Arch attempts to do so by providing the
|
||||
following high level features:</p>
|
||||
<hr class="docutils" />
|
||||
<p><strong>Out-of-process architecture, built on</strong> <a class="reference external" href="http://envoyproxy.io/">Envoy</a>: Arch is takes a dependency on
|
||||
Envoy and is a self-contained process that is designed to run alongside your application servers. Arch uses
|
||||
Envoy’s HTTP connection management subsystem, HTTP L7 filtering and telemetry capabilities to extend the
|
||||
functionality exclusively for prompts and LLMs. This gives Arch several advantages:</p>
|
||||
<ul class="simple">
|
||||
<li><p>Arch builds on Envoy’s proven success. Envoy is used at masssive sacle by the leading technology companies of
|
||||
our time including <a class="reference external" href="https://www.airbnb.com">AirBnB</a>, <a class="reference external" href="https://www.dropbox.com">Dropbox</a>,
|
||||
<a class="reference external" href="https://www.google.com">Google</a>, <a class="reference external" href="https://www.reddit.com">Reddit</a>, <a class="reference external" href="https://www.stripe.com">Stripe</a>,
|
||||
etc. Its battle tested and scales linearly with usage and enables developers to focus on what really matters:
|
||||
application features and business logic.</p></li>
|
||||
<li><p>Arch works with any application language. A single Arch deployment can act as gateway for AI applications
|
||||
written in Python, Java, C++, Go, Php, etc.</p></li>
|
||||
<li><p>Arch can be deployed and upgraded quickly across your infrastructure transparently without the horrid pain
|
||||
of deploying library upgrades in your applications.</p></li>
|
||||
</ul>
|
||||
<p><strong>Engineered with Fast LLMs:</strong> Arch is engineered with specialized (sub-billion) LLMs that are desgined for
|
||||
fast, cost-effective and acurrate handling of prompts. These <a class="reference internal" href="../llms/llms.html#llms-in-arch"><span class="std std-ref">LLMs</span></a> are designed to be
|
||||
best-in-class for critcal prompt-related tasks like:</p>
|
||||
<ul class="simple">
|
||||
<li><p><strong>Function/API Calling:</strong> Arch helps you easily personalize your applications by enabling calls to
|
||||
application-specific (API) operations via user prompts. This involves any predefined functions or APIs
|
||||
you want to expose to users to perform tasks, gather information, or manipulate data. With function calling,
|
||||
you have flexibility to support “agentic” experiences tailored to specific use cases - from updating insurance
|
||||
claims to creating ad campaigns - via prompts. Arch analyzes prompts, extracts critical information from
|
||||
prompts, engages in lightweight conversation to gather any missing parameters and makes API calls so that you can
|
||||
focus on writing business logic. For more details, read <a class="reference internal" href="architecture/prompt_processing/prompt_processing.html#arch-overview-prompt-handling"><span class="std std-ref">prompt processing</span></a>.</p></li>
|
||||
<li><p><strong>Prompt Guardrails:</strong> Arch helps you improve the safety of your application by applying prompt guardrails in
|
||||
a centralized way for better governance hygiene. With prompt guardrails you can prevent <a class="reference external" href="https://github.com/verazuo/jailbreak_llms">jailbreak</a>
|
||||
attempts or toxicity present in user’s prompts without having to write a single line of code. To learn more
|
||||
about how to configure guardrails available in Arch, read <a class="reference internal" href="architecture/prompt_processing/prompt_processing.html#arch-overview-prompt-handling"><span class="std std-ref">prompt processing</span></a>.</p></li>
|
||||
<li><p><strong>Intent-Drift Detection:</strong> Developers struggle to handle <a class="reference external" href="https://www.reddit.com/r/ChatGPTPromptGenius/comments/17dzmpy/how_to_use_rag_with_conversation_history_for/?">follow-up</a>,
|
||||
or <a class="reference external" href="https://www.reddit.com/r/LocalLLaMA/comments/18mqwg6/best_practice_for_rag_with_followup_chat/">clarifying</a>
|
||||
questions. Specifically, when users ask for modifications or additions to previous responses their AI applications
|
||||
often generate entirely new responses instead of adjusting the previous ones. Arch offers intent-drift detection as a
|
||||
feature so that developers know when the user has shifted away from the previous intent so that they can improve
|
||||
their retrieval, lower overall token cost and dramatically improve the speed and accuracy of their responses back
|
||||
to users.</p></li>
|
||||
</ul>
|
||||
<p><strong>Traffic Management:</strong> Arch offers several capabilities for LLM calls originating from your applications, including a
|
||||
vendor-agnostic SDK to make LLM calls, smart retries on errors from upstream LLMs, and automatic cutover to other LLMs
|
||||
configured in Arch for continuous availability and disaster recovery scenarios. Arch extends Envoy’s <a class="reference external" href="https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/cluster_manager">cluster subsystem</a> to manage upstream connections
|
||||
to LLMs so that you can build resilient AI applications.</p>
|
||||
<p><strong>Front/edge Gateway:</strong> There is substantial benefit in using the same software at the edge (observability,
|
||||
traffic shaping alogirithms, applying guardrails, etc.) as for outbound LLM inference use cases. Arch has the feature set
|
||||
that makes it exceptionally well suited as an edge gateway for AI applications. This includes TLS termination, rate limiting,
|
||||
and prompt-based routing.</p>
|
||||
<p><strong>Best-In Class Monitoring:</strong> Arch offers several monitoring metrics that help you understand three
|
||||
critical aspects of your application: latency, token usage, and error rates by an upstream LLM provider. Latency
|
||||
measures the speed at which your application is responding to users, which includes metrics like time to first
|
||||
token (TFT), time per output token (TOT) metrics, and the total latency as perceived by users.</p>
|
||||
<p><strong>End-to-End Tracing:</strong> Arch propagates trace context using the W3C Trace Context standard, specifically through
|
||||
the <code class="docutils literal notranslate"><span class="pre">traceparent</span></code> header. This allows each component in the system to record its part of the request flow,
|
||||
enabling <strong>end-to-end tracing</strong> across the entire application. By using OpenTelemetry, Arch ensures that
|
||||
developers can capture this trace data consistently and in a format compatible with various observability tools.
|
||||
For more details, read <a class="reference internal" href="../observability/tracing.html#arch-overview-tracing"><span class="std std-ref">tracing</span></a>.</p>
|
||||
</section>
|
||||
|
||||
|
||||
</article>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<footer class="prev-next-footer d-print-none">
|
||||
|
||||
<div class="prev-next-area">
|
||||
<a class="left-prev"
|
||||
href="intro.html"
|
||||
title="previous page">
|
||||
<i class="fa-solid fa-angle-left"></i>
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">previous</p>
|
||||
<p class="prev-next-title">Introduction</p>
|
||||
</div>
|
||||
</a>
|
||||
<a class="right-next"
|
||||
href="architecture/architecture.html"
|
||||
title="next page">
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">next</p>
|
||||
<p class="prev-next-title">Technical Architecture</p>
|
||||
</div>
|
||||
<i class="fa-solid fa-angle-right"></i>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
<footer class="bd-footer-content">
|
||||
|
||||
<div class="bd-footer-content__inner container">
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="component-author">
|
||||
By Katanemo Labs, Inc
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
|
||||
<p class="copyright">
|
||||
|
||||
© Copyright 2024, Katanemo Labs, Inc.
|
||||
<br/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
|
||||
</main>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||
<script src="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||||
|
||||
<footer class="bd-footer">
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
Loading…
Add table
Add a link
Reference in a new issue