mirror of
https://github.com/katanemo/plano.git
synced 2026-05-04 13:23:00 +02:00
deploy: ba7279becb
This commit is contained in:
parent
65a77aff31
commit
95b657c966
37 changed files with 783 additions and 486 deletions
|
|
@ -7,9 +7,9 @@
|
|||
<meta content="white" media="(prefers-color-scheme: light)" name="theme-color"/>
|
||||
<meta content="black" media="(prefers-color-scheme: dark)" name="theme-color"/>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport"/>
|
||||
<title>Terminology | Arch Docs v0.1.5</title>
|
||||
<meta content="Terminology | Arch Docs v0.1.5" property="og:title"/>
|
||||
<meta content="Terminology | Arch Docs v0.1.5" name="twitter:title"/>
|
||||
<title>Terminology | Arch Docs v0.1.7</title>
|
||||
<meta content="Terminology | Arch Docs v0.1.7" property="og:title"/>
|
||||
<meta content="Terminology | Arch Docs v0.1.7" name="twitter:title"/>
|
||||
<link href="../../_static/pygments.css?v=75ebff74" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/theme.css?v=42baaae4" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
|
||||
|
|
@ -39,7 +39,7 @@
|
|||
</a><header class="sticky top-0 z-40 w-full border-b shadow-sm border-border supports-backdrop-blur:bg-background/60 bg-background/95 backdrop-blur"><div class="container flex items-center h-14">
|
||||
<div class="hidden mr-4 md:flex">
|
||||
<a class="flex items-center mr-6" href="../../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Arch Docs v0.1.5</span>
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Arch Docs v0.1.7</span>
|
||||
</a></div><button @click="showSidebar = true" class="inline-flex items-center justify-center h-10 px-0 py-2 mr-2 text-base font-medium transition-colors rounded-md hover:text-accent-foreground hover:bg-transparent md:hidden" type="button">
|
||||
<svg aria-hidden="true" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M152.587 825.087q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440Zm0-203.587q-19.152 0-32.326-13.174T107.087 576q0-19.152 13.174-32.326t32.326-13.174h320q19.152 0 32.326 13.174T518.087 576q0 19.152-13.174 32.326T472.587 621.5h-320Zm0-203.587q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440ZM708.913 576l112.174 112.174q12.674 12.674 12.674 31.826t-12.674 31.826Q808.413 764.5 789.261 764.5t-31.826-12.674l-144-144Q600 594.391 600 576t13.435-31.826l144-144q12.674-12.674 31.826-12.674t31.826 12.674q12.674 12.674 12.674 31.826t-12.674 31.826L708.913 576Z"></path>
|
||||
|
|
@ -75,7 +75,7 @@
|
|||
</header>
|
||||
<div class="flex-1"><div class="container flex-1 items-start md:grid md:grid-cols-[220px_minmax(0,1fr)] md:gap-6 lg:grid-cols-[240px_minmax(0,1fr)] lg:gap-10"><aside :aria-hidden="!showSidebar" :class="{ 'translate-x-0': showSidebar }" class="fixed inset-y-0 left-0 md:top-14 z-50 md:z-30 bg-background md:bg-transparent transition-all duration-100 -translate-x-full md:translate-x-0 ml-0 p-6 md:p-0 md:-ml-2 md:h-[calc(100vh-3.5rem)] w-5/6 md:w-full shrink-0 overflow-y-auto border-r border-border md:sticky" id="left-sidebar">
|
||||
<a class="!justify-start text-sm md:!hidden bg-background" href="../../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Arch Docs v0.1.5</span>
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Arch Docs v0.1.7</span>
|
||||
</a>
|
||||
<div class="relative overflow-hidden md:overflow-auto my-4 md:my-0 h-[calc(100vh-8rem)] md:h-auto">
|
||||
<div class="overflow-y-auto h-full w-full relative pr-6">
|
||||
|
|
@ -124,8 +124,9 @@
|
|||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Build with Arch</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/agent.html">Agentic Workflow</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/rag.html">RAG Application</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/agent.html">Agentic Apps</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/rag.html">RAG Apps</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/multi_turn.html">Multi-Turn</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
|
|
@ -144,7 +145,7 @@
|
|||
<div class="w-full min-w-0 mx-auto">
|
||||
<nav aria-label="breadcrumbs" class="flex items-center mb-4 space-x-1 text-sm text-muted-foreground">
|
||||
<a class="overflow-hidden text-ellipsis whitespace-nowrap hover:text-foreground" href="../../index.html">
|
||||
<span class="hidden md:inline">Arch Docs v0.1.5</span>
|
||||
<span class="hidden md:inline">Arch Docs v0.1.7</span>
|
||||
<svg aria-label="Home" class="md:hidden" fill="currentColor" height="18" stroke="none" viewbox="0 96 960 960" width="18" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M240 856h120V616h240v240h120V496L480 316 240 496v360Zm-80 80V456l320-240 320 240v480H520V696h-80v240H160Zm320-350Z"></path>
|
||||
</svg>
|
||||
|
|
@ -155,38 +156,41 @@
|
|||
<div id="content" role="main">
|
||||
<section id="terminology">
|
||||
<span id="arch-terminology"></span><h1>Terminology<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#terminology"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p>A few definitions before we dive into the main architecture documentation. Arch borrows from Envoy’s terminology
|
||||
to keep things consistent in logs, traces and in code.</p>
|
||||
<p>A few definitions before we dive into the main architecture documentation. Also note, Arch borrows from Envoy’s terminology
|
||||
to keep things consistent in logs and traces, and introduces and clarifies concepts are is relates to LLM applications.</p>
|
||||
<p><strong>Agent</strong>: An application that uses LLMs to handle wide-ranging tasks from users via prompts. This could be as simple
|
||||
as retrieving or summarizing data from an API, or being able to trigger compleix actions like adjusting ad campaigns, or
|
||||
changing travel plans via prompts.</p>
|
||||
<p><strong>Arch Config</strong>: Arch operates based on a configuration that controls the behavior of a single instance of the Arch gateway.
|
||||
This where you enable capabilities like LLM routing, fast function calling (via prompt_targets), applying guardrails, and enabling critical
|
||||
features like metrics and tracing. For the full configuration reference of <cite>arch_config.yaml</cite> see <a class="reference internal" href="../../resources/configuration_reference.html#configuration-refernce"><span class="std std-ref">here</span></a>.</p>
|
||||
<p><strong>Downstream(Ingress)</strong>: An downstream client (web application, etc.) connects to Arch, sends prompts, and receives responses.</p>
|
||||
<p><strong>Upstream(Egress)</strong>: An upstream host that receives connections and prompts from Arch, and returns context or responses for a prompt</p>
|
||||
<a class="reference internal image-reference" href="../../_images/network-topology-ingress-egress.jpg"><img alt="../../_images/network-topology-ingress-egress.jpg" class="align-center" src="../../_images/network-topology-ingress-egress.jpg" style="width: 100%;"/>
|
||||
</a>
|
||||
<p><strong>Listener</strong>: A <a class="reference internal" href="listener.html#arch-overview-listeners"><span class="std std-ref">listener</span></a> is a named network location (e.g., port, address, path etc.) that Arch listens on to process prompts
|
||||
before forwarding them to your application server endpoints. rch enables you to configure one listener for downstream connections
|
||||
(like port 80, 443) and creates a separate internal listener for calls that initiate from your application code to LLMs.</p>
|
||||
<p><strong>Listener</strong>: A <a class="reference internal" href="listener.html#arch-overview-listeners"><span class="std std-ref">listener</span></a> is a named network location (e.g., port, address, path etc.) that Arch
|
||||
listens on to process prompts before forwarding them to your application server endpoints. rch enables you to configure one listener
|
||||
for downstream connections (like port 80, 443) and creates a separate internal listener for calls that initiate from your application
|
||||
code to LLMs.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>When you start Arch, you specify a listener address/port that you want to bind downstream. But, Arch uses are predefined port
|
||||
that you can use (<code class="docutils literal notranslate"><span class="pre">127.0.0.1:12000</span></code>) to proxy egress calls originating from your application to LLMs (API-based or hosted).
|
||||
For more details, check out <a class="reference internal" href="../llm_provider.html#llm-provider"><span class="std std-ref">LLM provider</span></a>.</p>
|
||||
</div>
|
||||
<p><strong>Instance</strong>: An instance of the Arch gateway. When you start Arch it creates at most two processes. One to handle Layer 7
|
||||
networking operations (auth, tls, observability, etc) and the second process to serve models that enable it to make smart
|
||||
decisions on how to accept, handle and forward prompts. The second process is optional, as the model serving sevice could be
|
||||
hosted on a different network (an API call). But these two processes are considered a single instance of Arch.</p>
|
||||
<p><strong>Prompt Target</strong>: Arch offers a primitive called <a class="reference internal" href="../prompt_target.html#prompt-target"><span class="std std-ref">prompt target</span></a> to help separate business logic from undifferentiated
|
||||
work in building generative AI apps. Prompt targets are endpoints that receive prompts that are processed by Arch.
|
||||
For example, Arch enriches incoming prompts with metadata like knowing when a request is a follow-up or clarifying prompt
|
||||
so that you can build faster, more accurate retrieval (RAG) apps. To support agentic apps, like scheduling travel plans or
|
||||
sharing comments on a document - via prompts, Arch uses its function calling abilities to extract critical information from
|
||||
the incoming prompt (or a set of prompts) needed by a downstream backend API or function call before calling it directly.</p>
|
||||
<p><strong>Prompt Target</strong>: Arch offers a primitive called <a class="reference internal" href="../prompt_target.html#prompt-target"><span class="std std-ref">prompt target</span></a> to help separate business logic from
|
||||
undifferentiated work in building generative AI apps. Prompt targets are endpoints that receive prompts that are processed by Arch.
|
||||
For example, Arch enriches incoming prompts with metadata like knowing when a request is a follow-up or clarifying prompt so that you
|
||||
can build faster, more accurate retrieval (RAG) apps. To support agentic apps, like scheduling travel plans or sharing comments on a
|
||||
document - via prompts, Arch uses its function calling abilities to extract critical information fromthe incoming prompt (or a set of
|
||||
prompts) needed by a downstream backend API or function call before calling it directly.</p>
|
||||
<p><strong>Model Serving</strong>: Arch is a set of <cite>two</cite> self-contained processes that are designed to run alongside your application servers
|
||||
(or on a separate hostconnected via a network).The <a class="reference internal" href="model_serving.html#model-serving"><span class="std std-ref">model serving</span></a> process helps Arch make intelligent decisions
|
||||
about the incoming prompts. The model server is designed to call the (fast) purpose-built LLMs in Arch.</p>
|
||||
<p><strong>Error Target</strong>: <a class="reference internal" href="error_target.html#error-target"><span class="std std-ref">Error targets</span></a> are those endpoints that receive forwarded errors from Arch when issues arise,
|
||||
such as failing to properly call a function/API, detecting violations of guardrails, or encountering other processing errors.
|
||||
These errors are communicated to the application via headers <code class="docutils literal notranslate"><span class="pre">X-Arch-[ERROR-TYPE]</span></code>, allowing it to handle the errors gracefully
|
||||
and take appropriate actions.</p>
|
||||
<p><strong>Model Serving</strong>: Arch is a set of <cite>two</cite> self-contained processes that are designed to run alongside your application servers
|
||||
(or on a separate hostconnected via a network).The <a class="reference internal" href="model_serving.html#model-serving"><span class="std std-ref">model serving</span></a> process helps Arch make intelligent decisions about the
|
||||
incoming prompts. The model server is designed to call the (fast) purpose-built LLMs in Arch.</p>
|
||||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
|
|
@ -211,12 +215,12 @@ incoming prompts. The model server is designed to call the (fast) purpose-built
|
|||
</div><footer class="py-6 border-t border-border md:py-0">
|
||||
<div class="container flex flex-col items-center justify-between gap-4 md:h-24 md:flex-row">
|
||||
<div class="flex flex-col items-center gap-4 px-8 md:flex-row md:gap-2 md:px-0">
|
||||
<p class="text-sm leading-loose text-center text-muted-foreground md:text-left">© 2024, Katanemo Labs, Inc Last updated: Dec 12, 2024. </p>
|
||||
<p class="text-sm leading-loose text-center text-muted-foreground md:text-left">© 2024, Katanemo Labs, Inc Last updated: Dec 20, 2024. </p>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
</div>
|
||||
<script src="../../_static/documentation_options.js?v=10d53aa0"></script>
|
||||
<script src="../../_static/documentation_options.js?v=d94a9a28"></script>
|
||||
<script src="../../_static/doctools.js?v=9bcbadda"></script>
|
||||
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script defer="defer" src="../../_static/theme.js?v=073f68d9"></script>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue