This commit is contained in:
salmanap 2024-10-08 20:19:06 +00:00
parent f4b686c7fc
commit 3e881c6eec
28 changed files with 819 additions and 820 deletions

View file

@ -1,4 +1,4 @@
version: "0.1-beta"
version: v0.1
listener:
address: 0.0.0.0 # or 127.0.0.1
@ -8,9 +8,9 @@ listener:
common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
tls_certificates:
- certificate_chain:
filename: "/etc/certs/cert.pem"
filename: /etc/certs/cert.pem
private_key:
filename: "/etc/certs/key.pem"
filename: /etc/certs/key.pem
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
endpoints:
@ -18,42 +18,42 @@ endpoints:
# value could be ip address or a hostname with port
# this could also be a list of endpoints for load balancing
# for example endpoint: [ ip1:port, ip2:port ]
endpoint: "127.0.0.1:80"
endpoint: 127.0.0.1:80
# max time to wait for a connection to be established
connect_timeout: 0.005s
mistral_local:
endpoint: "127.0.0.1:8001"
endpoint: 127.0.0.1:8001
error_target:
endpoint: "error_target_1"
endpoint: error_target_1
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
- name: "OpenAI"
provider: "openai"
access_key: $OPENAI_API_KEY
- name: OpenAI
provider: openai
access_key: OPENAI_API_KEY
model: gpt-4o
default: true
stream: true
rate_limits:
selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
http_header:
name: "Authorization"
name: Authorization
value: "" # Empty value means each separate value has a separate limit
limit:
tokens: 100000 # Tokens per unit
unit: "minute"
unit: minute
- name: "Mistral8x7b"
provider: "mistral"
access_key: $MISTRAL_API_KEY
model: "mistral-8x7b"
- name: Mistral8x7b
provider: mistral
access_key: MISTRAL_API_KEY
model: mistral-8x7b
- name: "MistralLocal7b"
provider: "local"
model: "mistral-7b-instruct"
endpoint: "mistral_local"
- name: MistralLocal7b
provider: local
model: mistral-7b-instruct
endpoint: mistral_local
# provides a way to override default settings for the arch system
overrides:
@ -62,44 +62,41 @@ overrides:
prompt_target_intent_matching_threshold: 0.60
# default system prompt used by all prompt targets
system_prompt: |
You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
prompt_guards:
input_guards:
jailbreak:
on_exception:
message: "Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters."
message: Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.
prompt_targets:
- name: "reboot_network_device"
description: "Helps network operators perform device operations like rebooting a device."
endpoint:
name: app_server
path: "/agent/action"
parameters:
- name: "device_id"
# additional type options include: int | float | bool | string | list | dict
type: "string"
description: "Identifier of the network device to reboot."
required: true
- name: "confirmation"
type: "string"
description: "Confirmation flag to proceed with reboot."
default: "no"
enum: [yes, no]
- name: "information_extraction"
- name: information_extraction
default: true
description: "This prompt handles all scenarios that are question and answer in nature. Like summarization, information extraction, etc."
description: handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.
endpoint:
name: app_server
path: "/agent/summary"
path: /agent/summary
# Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM
auto_llm_dispatch_on_response: true
# override system prompt for this prompt target
system_prompt: |
You are a helpful information extraction assistant. Use the information that is provided to you.
system_prompt: You are a helpful information extraction assistant. Use the information that is provided to you.
- name: reboot_network_device
description: Reboot a specific network device
endpoint:
name: app_server
path: /agent/action
parameters:
- name: device_id
type: str
description: Identifier of the network device to reboot.
required: true
- name: confirmation
type: bool
description: Confirmation flag to proceed with reboot.
default: false
enum: [true, false]
error_target:
endpoint:

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -160,7 +160,7 @@ you have the flexibility to support “agentic” apps tailored to specific use
claims to creating ad campaigns - via prompts.</p>
<p>Arch analyzes prompts, extracts critical information from prompts, engages in lightweight conversation with
the user to gather any missing parameters and makes API calls so that you can focus on writing business logic.
Arch does this via its purpose-built <a class="reference internal" href="../guides/function_calling.html#function-calling"><span class="std std-ref">Arch-FC LLM</span></a> - the fastest (200ms p90 - 10x faser than GPT-4o)
Arch does this via its purpose-built <a class="reference internal" href="../guides/function_calling.html#function-calling"><span class="std std-ref">Arch-Function</span></a> - the fastest (200ms p90 - 10x faser than GPT-4o)
and cheapest (100x than GPT-40) function-calling LLM that matches performance with frontier models.</p>
<a class="reference internal image-reference" href="../_images/function-calling-flow.jpg"><img alt="../_images/function-calling-flow.jpg" class="align-center" src="../_images/function-calling-flow.jpg" style="width: 100%;"/>
</a>
@ -169,175 +169,171 @@ and cheapest (100x than GPT-40) function-calling LLM that matches performance wi
<p>In the most common scenario, users will request a single action via prompts, and Arch efficiently processes the
request by extracting relevant parameters, validating the input, and calling the designated function or API. Here
is how you would go about enabling this scenario with Arch:</p>
<section id="step-1-define-prompt-targets-with-functions">
<h3>Step 1: Define prompt targets with functions<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-1-define-prompt-targets-with-functions" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-1-define-prompt-targets-with-functions'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<div class="literal-block-wrapper docutils container" id="id1">
<div class="code-block-caption"><span class="caption-text">Define prompt targets that can enable users to engage with API and backened functions of an app</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
</span><span id="line-2"><span class="linenos"> 2</span><span class="nt">listen</span><span class="p">:</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
</span><span id="line-5"><span class="linenos"> 5</span>
</span><span id="line-6"><span class="linenos"> 6</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
<section id="step-1-define-prompt-targets">
<h3>Step 1: Define Prompt Targets<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-1-define-prompt-targets" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-1-define-prompt-targets'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<div class="literal-block-wrapper docutils container" id="id2">
<div class="code-block-caption"><span class="caption-text">Prompt Target Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
</span><span id="line-2"><span class="linenos"> 2</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listen</span><span class="p">:</span>
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
</span><span id="line-5"><span class="linenos"> 5</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
</span><span id="line-6"><span class="linenos"> 6</span><span class="w"> </span><span class="c1"># Defines how Arch should parse the content from application/json or text/pain Content-type in the http request</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">huggingface</span>
</span><span id="line-8"><span class="linenos"> 8</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="nt">llm_providers</span><span class="p">:</span>
</span><span id="line-10"><span class="linenos">10</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-15"><span class="linenos">15</span>
</span><span id="line-16"><mark><span class="linenos">16</span><span class="nt">prompt_targets</span><span class="p">:</span>
</mark></span><span id="line-17"><mark><span class="linenos">17</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_devices</span>
</mark></span><span id="line-18"><mark><span class="linenos">18</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">&gt;</span>
</mark></span><span id="line-19"><mark><span class="linenos">19</span><span class="w"> </span><span class="no">This prompt target handles user requests to reboot devices.</span>
</mark></span><span id="line-20"><mark><span class="linenos">20</span><span class="w"> </span><span class="no">It ensures that when users request to reboot specific devices or device groups, the system processes the reboot commands accurately.</span>
</mark></span><span id="line-21"><mark><span class="linenos">21</span>
</mark></span><span id="line-22"><mark><span class="linenos">22</span><span class="w"> </span><span class="no">**Examples of user prompts:**</span>
</mark></span><span id="line-23"><mark><span class="linenos">23</span>
</mark></span><span id="line-24"><mark><span class="linenos">24</span><span class="w"> </span><span class="no">- "Please reboot device 12345."</span>
</mark></span><span id="line-25"><mark><span class="linenos">25</span><span class="w"> </span><span class="no">- "Restart all devices in tenant group tenant-XYZ</span>
</mark></span><span id="line-26"><mark><span class="linenos">26</span><span class="w"> </span><span class="no">- "I need to reboot devices A, B, and C."</span>
</mark></span><span id="line-27"><mark><span class="linenos">27</span>
</mark></span><span id="line-28"><mark><span class="linenos">28</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_reboot</span>
</mark></span><span id="line-29"><mark><span class="linenos">29</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</mark></span><span id="line-30"><mark><span class="linenos">30</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_ids"</span>
</mark></span><span id="line-31"><mark><span class="linenos">31</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span><span class="w"> </span><span class="c1"># Options: integer | float | list | dictionary | set</span>
</mark></span><span id="line-32"><mark><span class="linenos">32</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"A</span><span class="nv"> </span><span class="s">list</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">identifiers</span><span class="nv"> </span><span class="s">(IDs)</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
</mark></span><span id="line-33"><mark><span class="linenos">33</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</mark></span><span id="line-34"><mark><span class="linenos">34</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_group"</span>
</mark></span><span id="line-35"><mark><span class="linenos">35</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">string</span><span class="w"> </span><span class="c1"># Options: string | integer | float | list | dictionary | set</span>
</mark></span><span id="line-36"><mark><span class="linenos">36</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"The</span><span class="nv"> </span><span class="s">name</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">group</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
</mark></span><span id="line-37"><mark><span class="linenos">37</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</mark></span><span id="line-38"><span class="linenos">38</span>
</span><span id="line-39"><span class="linenos">39</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
</span><span id="line-40"><span class="linenos">40</span><span class="nt">endpoints</span><span class="p">:</span>
</span><span id="line-41"><span class="linenos">41</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
</span><span id="line-42"><span class="linenos">42</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
</span><span id="line-43"><span class="linenos">43</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
</span><span id="line-44"><span class="linenos">44</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
</span><span id="line-45"><span class="linenos">45</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
</span><span id="line-46"><span class="linenos">46</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
</span><span id="line-47"><span class="linenos">47</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
</span><span id="line-10"><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-16"><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-17"><span class="linenos">17</span>
</span><span id="line-18"><span class="linenos">18</span><span class="c1"># default system prompt used by all prompt targets</span>
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span><span id="line-20"><span class="linenos">20</span>
</span><span id="line-21"><mark><span class="linenos">21</span><span class="nt">prompt_targets</span><span class="p">:</span>
</mark></span><span id="line-22"><mark><span class="linenos">22</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_devices</span>
</mark></span><span id="line-23"><mark><span class="linenos">23</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Reboot specific devices or device groups</span>
</mark></span><span id="line-24"><mark><span class="linenos">24</span>
</mark></span><span id="line-25"><mark><span class="linenos">25</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_reboot</span>
</mark></span><span id="line-26"><mark><span class="linenos">26</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</mark></span><span id="line-27"><mark><span class="linenos">27</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_ids</span>
</mark></span><span id="line-28"><mark><span class="linenos">28</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span>
</mark></span><span id="line-29"><mark><span class="linenos">29</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">A list of device identifiers (IDs) to reboot.</span>
</mark></span><span id="line-30"><mark><span class="linenos">30</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</mark></span><span id="line-31"><mark><span class="linenos">31</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_group</span>
</mark></span><span id="line-32"><mark><span class="linenos">32</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">str</span>
</mark></span><span id="line-33"><mark><span class="linenos">33</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">The name of the device group to reboot</span>
</mark></span><span id="line-34"><mark><span class="linenos">34</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</mark></span><span id="line-35"><span class="linenos">35</span>
</span><span id="line-36"><span class="linenos">36</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
</span><span id="line-37"><span class="linenos">37</span><span class="nt">endpoints</span><span class="p">:</span>
</span><span id="line-38"><span class="linenos">38</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
</span><span id="line-39"><span class="linenos">39</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
</span><span id="line-40"><span class="linenos">40</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
</span><span id="line-41"><span class="linenos">41</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
</span><span id="line-42"><span class="linenos">42</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
</span><span id="line-43"><span class="linenos">43</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
</span><span id="line-44"><span class="linenos">44</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span></code></pre></div>
</div>
</div>
</section>
<section id="step-2-process-request-parameters-in-flask">
<h3>Step 2: Process request parameters in Flask<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-process-request-parameters-in-flask" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-process-request-parameters-in-flask'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<section id="step-2-process-request-parameters">
<h3>Step 2: Process Request Parameters<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-process-request-parameters" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-process-request-parameters'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<p>Once the prompt targets are configured as above, handling those parameters is</p>
<div class="literal-block-wrapper docutils container" id="id2">
<div class="code-block-caption"><span class="caption-text">Parameter handling with Flask</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="literal-block-wrapper docutils container" id="id3">
<div class="code-block-caption"><span class="caption-text">Parameter handling with Flask</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="kn">from</span> <span class="nn">flask</span> <span class="kn">import</span> <span class="n">Flask</span><span class="p">,</span> <span class="n">request</span><span class="p">,</span> <span class="n">jsonify</span>
</span><span id="line-2"><span class="linenos"> 2</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="n">app</span> <span class="o">=</span> <span class="n">Flask</span><span class="p">(</span><span class="vm">__name__</span><span class="p">)</span>
</span><span id="line-4"><span class="linenos"> 4</span>
</span><span id="line-5"><span class="linenos"> 5</span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s1">'/agent/device_summary'</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s1">'POST'</span><span class="p">])</span>
</span><span id="line-6"><span class="linenos"> 6</span><span class="k">def</span> <span class="nf">get_device_summary</span><span class="p">():</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="sd">"""</span>
</span><span id="line-8"><span class="linenos"> 8</span><span class="sd"> Endpoint to retrieve device statistics based on device IDs and an optional time range.</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="sd"> """</span>
</span><span id="line-10"><span class="linenos">10</span> <span class="n">data</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">get_json</span><span class="p">()</span>
</span><span id="line-11"><span class="linenos">11</span>
</span><span id="line-12"><span class="linenos">12</span> <span class="c1"># Validate 'device_ids' parameter</span>
</span><span id="line-13"><span class="linenos">13</span> <span class="n">device_ids</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'device_ids'</span><span class="p">)</span>
</span><span id="line-14"><span class="linenos">14</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">device_ids</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">device_ids</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
</span><span id="line-15"><span class="linenos">15</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s2">"'device_ids' parameter is required and must be a list"</span><span class="p">}),</span> <span class="mi">400</span>
</span><span id="line-16"><span class="linenos">16</span>
</span><span id="line-17"><span class="linenos">17</span> <span class="c1"># Validate 'time_range' parameter (optional, defaults to 7)</span>
</span><span id="line-18"><span class="linenos">18</span> <span class="n">time_range</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'time_range'</span><span class="p">,</span> <span class="mi">7</span><span class="p">)</span>
</span><span id="line-19"><span class="linenos">19</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">time_range</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
</span><span id="line-20"><span class="linenos">20</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s2">"'time_range' must be an integer"</span><span class="p">}),</span> <span class="mi">400</span>
</span><span id="line-21"><span class="linenos">21</span>
</span><span id="line-22"><span class="linenos">22</span> <span class="c1"># Simulate retrieving statistics for the given device IDs and time range</span>
</span><span id="line-23"><span class="linenos">23</span> <span class="c1"># In a real application, you would query your database or external service here</span>
</span><span id="line-24"><span class="linenos">24</span> <span class="n">statistics</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="line-25"><span class="linenos">25</span> <span class="k">for</span> <span class="n">device_id</span> <span class="ow">in</span> <span class="n">device_ids</span><span class="p">:</span>
</span><span id="line-26"><span class="linenos">26</span> <span class="c1"># Placeholder for actual data retrieval</span>
</span><span id="line-27"><span class="linenos">27</span> <span class="n">stats</span> <span class="o">=</span> <span class="p">{</span>
</span><span id="line-28"><span class="linenos">28</span> <span class="s1">'device_id'</span><span class="p">:</span> <span class="n">device_id</span><span class="p">,</span>
</span><span id="line-29"><span class="linenos">29</span> <span class="s1">'time_range'</span><span class="p">:</span> <span class="sa">f</span><span class="s1">'Last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s1"> days'</span><span class="p">,</span>
</span><span id="line-30"><span class="linenos">30</span> <span class="s1">'data'</span><span class="p">:</span> <span class="sa">f</span><span class="s1">'Statistics data for device </span><span class="si">{</span><span class="n">device_id</span><span class="si">}</span><span class="s1"> over the last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s1"> days.'</span>
</span><span id="line-31"><span class="linenos">31</span> <span class="p">}</span>
</span><span id="line-32"><span class="linenos">32</span> <span class="n">statistics</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
</span><span id="line-33"><span class="linenos">33</span>
</span><span id="line-34"><span class="linenos">34</span> <span class="n">response</span> <span class="o">=</span> <span class="p">{</span>
</span><span id="line-35"><span class="linenos">35</span> <span class="s1">'statistics'</span><span class="p">:</span> <span class="n">statistics</span>
</span><span id="line-36"><span class="linenos">36</span> <span class="p">}</span>
</span><span id="line-37"><span class="linenos">37</span>
</span><span id="line-38"><span class="linenos">38</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span><span class="n">response</span><span class="p">),</span> <span class="mi">200</span>
</span><span id="line-39"><span class="linenos">39</span>
</span><span id="line-40"><span class="linenos">40</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
</span><span id="line-41"><span class="linenos">41</span> <span class="n">app</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">debug</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
</span><span id="line-5"><span class="linenos"> 5</span>
</span><span id="line-6"><span class="linenos"> 6</span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s2">"/agent/device_summary"</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s2">"POST"</span><span class="p">])</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="k">def</span> <span class="nf">get_device_summary</span><span class="p">():</span>
</span><span id="line-8"><span class="linenos"> 8</span><span class="w"> </span><span class="sd">"""</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="sd"> Endpoint to retrieve device statistics based on device IDs and an optional time range.</span>
</span><span id="line-10"><span class="linenos">10</span><span class="sd"> """</span>
</span><span id="line-11"><span class="linenos">11</span> <span class="n">data</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">get_json</span><span class="p">()</span>
</span><span id="line-12"><span class="linenos">12</span>
</span><span id="line-13"><span class="linenos">13</span> <span class="c1"># Validate 'device_ids' parameter</span>
</span><span id="line-14"><span class="linenos">14</span> <span class="n">device_ids</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"device_ids"</span><span class="p">)</span>
</span><span id="line-15"><span class="linenos">15</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">device_ids</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">device_ids</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
</span><span id="line-16"><span class="linenos">16</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span>
</span><span id="line-17"><span class="linenos">17</span> <span class="p">{</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"'device_ids' parameter is required and must be a list"</span><span class="p">}</span>
</span><span id="line-18"><span class="linenos">18</span> <span class="p">),</span> <span class="mi">400</span>
</span><span id="line-19"><span class="linenos">19</span>
</span><span id="line-20"><span class="linenos">20</span> <span class="c1"># Validate 'time_range' parameter (optional, defaults to 7)</span>
</span><span id="line-21"><span class="linenos">21</span> <span class="n">time_range</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"time_range"</span><span class="p">,</span> <span class="mi">7</span><span class="p">)</span>
</span><span id="line-22"><span class="linenos">22</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">time_range</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
</span><span id="line-23"><span class="linenos">23</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"'time_range' must be an integer"</span><span class="p">}),</span> <span class="mi">400</span>
</span><span id="line-24"><span class="linenos">24</span>
</span><span id="line-25"><span class="linenos">25</span> <span class="c1"># Simulate retrieving statistics for the given device IDs and time range</span>
</span><span id="line-26"><span class="linenos">26</span> <span class="c1"># In a real application, you would query your database or external service here</span>
</span><span id="line-27"><span class="linenos">27</span> <span class="n">statistics</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="line-28"><span class="linenos">28</span> <span class="k">for</span> <span class="n">device_id</span> <span class="ow">in</span> <span class="n">device_ids</span><span class="p">:</span>
</span><span id="line-29"><span class="linenos">29</span> <span class="c1"># Placeholder for actual data retrieval</span>
</span><span id="line-30"><span class="linenos">30</span> <span class="n">stats</span> <span class="o">=</span> <span class="p">{</span>
</span><span id="line-31"><span class="linenos">31</span> <span class="s2">"device_id"</span><span class="p">:</span> <span class="n">device_id</span><span class="p">,</span>
</span><span id="line-32"><span class="linenos">32</span> <span class="s2">"time_range"</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"Last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s2"> days"</span><span class="p">,</span>
</span><span id="line-33"><span class="linenos">33</span> <span class="s2">"data"</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"Statistics data for device </span><span class="si">{</span><span class="n">device_id</span><span class="si">}</span><span class="s2"> over the last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s2"> days."</span><span class="p">,</span>
</span><span id="line-34"><span class="linenos">34</span> <span class="p">}</span>
</span><span id="line-35"><span class="linenos">35</span> <span class="n">statistics</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
</span><span id="line-36"><span class="linenos">36</span>
</span><span id="line-37"><span class="linenos">37</span> <span class="n">response</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"statistics"</span><span class="p">:</span> <span class="n">statistics</span><span class="p">}</span>
</span><span id="line-38"><span class="linenos">38</span>
</span><span id="line-39"><span class="linenos">39</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span><span class="n">response</span><span class="p">),</span> <span class="mi">200</span>
</span><span id="line-40"><span class="linenos">40</span>
</span><span id="line-41"><span class="linenos">41</span>
</span><span id="line-42"><span class="linenos">42</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span>
</span><span id="line-43"><span class="linenos">43</span> <span class="n">app</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">debug</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
</span></code></pre></div>
</div>
</div>
</section>
</section>
<section id="parallel-multiple-function-calling">
<h2>Parallel/ Multiple Function Calling<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#parallel-multiple-function-calling" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#parallel-multiple-function-calling'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<h2>Parallel &amp; Multiple Function Calling<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#parallel-multiple-function-calling" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#parallel-multiple-function-calling'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<p>In more complex use cases, users may request multiple actions or need multiple APIs/functions to be called
simultaneously or sequentially. With Arch, you can handle these scenarios efficiently using parallel or multiple
function calling. This allows your application to engage in a broader range of interactions, such as updating
different datasets, triggering events across systems, or collecting results from multiple services in one prompt.</p>
<p>Arch-FC1B is built to manage these parallel tasks efficiently, ensuring low latency and high throughput, even
when multiple functions are invoked. It provides two mechanisms to handle these cases:</p>
<section id="step-1-define-multiple-function-targets">
<h3>Step 1: Define Multiple Function Targets<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-1-define-multiple-function-targets" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-1-define-multiple-function-targets'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<section id="id1">
<h3>Step 1: Define Prompt Targets<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#id1'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<p>When enabling multiple function calling, define the prompt targets in a way that supports multiple functions or
API calls based on the users prompt. These targets can be triggered in parallel or sequentially, depending on
the users intent.</p>
<p>Example of Multiple Prompt Targets in YAML:</p>
<div class="literal-block-wrapper docutils container" id="id3">
<div class="code-block-caption"><span class="caption-text">Define prompt targets that can enable users to engage with API and backened functions of an app</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
</span><span id="line-2"><span class="linenos"> 2</span><span class="nt">listen</span><span class="p">:</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
</span><span id="line-5"><span class="linenos"> 5</span>
</span><span id="line-6"><span class="linenos"> 6</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
<div class="literal-block-wrapper docutils container" id="id4">
<div class="code-block-caption"><span class="caption-text">Prompt Target Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id4"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
</span><span id="line-2"><span class="linenos"> 2</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listen</span><span class="p">:</span>
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
</span><span id="line-5"><span class="linenos"> 5</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
</span><span id="line-6"><span class="linenos"> 6</span><span class="w"> </span><span class="c1"># Defines how Arch should parse the content from application/json or text/pain Content-type in the http request</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">huggingface</span>
</span><span id="line-8"><span class="linenos"> 8</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="nt">llm_providers</span><span class="p">:</span>
</span><span id="line-10"><span class="linenos">10</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-15"><span class="linenos">15</span>
</span><span id="line-16"><mark><span class="linenos">16</span><span class="nt">prompt_targets</span><span class="p">:</span>
</mark></span><span id="line-17"><mark><span class="linenos">17</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_devices</span>
</mark></span><span id="line-18"><mark><span class="linenos">18</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">&gt;</span>
</mark></span><span id="line-19"><mark><span class="linenos">19</span><span class="w"> </span><span class="no">This prompt target handles user requests to reboot devices.</span>
</mark></span><span id="line-20"><mark><span class="linenos">20</span><span class="w"> </span><span class="no">It ensures that when users request to reboot specific devices or device groups, the system processes the reboot commands accurately.</span>
</mark></span><span id="line-21"><mark><span class="linenos">21</span>
</mark></span><span id="line-22"><mark><span class="linenos">22</span><span class="w"> </span><span class="no">**Examples of user prompts:**</span>
</mark></span><span id="line-23"><mark><span class="linenos">23</span>
</mark></span><span id="line-24"><mark><span class="linenos">24</span><span class="w"> </span><span class="no">- "Please reboot device 12345."</span>
</mark></span><span id="line-25"><mark><span class="linenos">25</span><span class="w"> </span><span class="no">- "Restart all devices in tenant group tenant-XYZ</span>
</mark></span><span id="line-26"><mark><span class="linenos">26</span><span class="w"> </span><span class="no">- "I need to reboot devices A, B, and C."</span>
</mark></span><span id="line-27"><mark><span class="linenos">27</span>
</mark></span><span id="line-28"><mark><span class="linenos">28</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_reboot</span>
</mark></span><span id="line-29"><mark><span class="linenos">29</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</mark></span><span id="line-30"><mark><span class="linenos">30</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_ids"</span>
</mark></span><span id="line-31"><mark><span class="linenos">31</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span><span class="w"> </span><span class="c1"># Options: integer | float | list | dictionary | set</span>
</mark></span><span id="line-32"><mark><span class="linenos">32</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"A</span><span class="nv"> </span><span class="s">list</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">identifiers</span><span class="nv"> </span><span class="s">(IDs)</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
</mark></span><span id="line-33"><mark><span class="linenos">33</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</mark></span><span id="line-34"><mark><span class="linenos">34</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_group"</span>
</mark></span><span id="line-35"><mark><span class="linenos">35</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">string</span><span class="w"> </span><span class="c1"># Options: string | integer | float | list | dictionary | set</span>
</mark></span><span id="line-36"><mark><span class="linenos">36</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"The</span><span class="nv"> </span><span class="s">name</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">group</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
</mark></span><span id="line-37"><mark><span class="linenos">37</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</mark></span><span id="line-38"><span class="linenos">38</span>
</span><span id="line-39"><span class="linenos">39</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
</span><span id="line-40"><span class="linenos">40</span><span class="nt">endpoints</span><span class="p">:</span>
</span><span id="line-41"><span class="linenos">41</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
</span><span id="line-42"><span class="linenos">42</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
</span><span id="line-43"><span class="linenos">43</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
</span><span id="line-44"><span class="linenos">44</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
</span><span id="line-45"><span class="linenos">45</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
</span><span id="line-46"><span class="linenos">46</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
</span><span id="line-47"><span class="linenos">47</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
</span><span id="line-10"><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-16"><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-17"><span class="linenos">17</span>
</span><span id="line-18"><span class="linenos">18</span><span class="c1"># default system prompt used by all prompt targets</span>
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span><span id="line-20"><span class="linenos">20</span>
</span><span id="line-21"><mark><span class="linenos">21</span><span class="nt">prompt_targets</span><span class="p">:</span>
</mark></span><span id="line-22"><mark><span class="linenos">22</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_devices</span>
</mark></span><span id="line-23"><mark><span class="linenos">23</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Reboot specific devices or device groups</span>
</mark></span><span id="line-24"><mark><span class="linenos">24</span>
</mark></span><span id="line-25"><mark><span class="linenos">25</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_reboot</span>
</mark></span><span id="line-26"><mark><span class="linenos">26</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</mark></span><span id="line-27"><mark><span class="linenos">27</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_ids</span>
</mark></span><span id="line-28"><mark><span class="linenos">28</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span>
</mark></span><span id="line-29"><mark><span class="linenos">29</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">A list of device identifiers (IDs) to reboot.</span>
</mark></span><span id="line-30"><mark><span class="linenos">30</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</mark></span><span id="line-31"><mark><span class="linenos">31</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_group</span>
</mark></span><span id="line-32"><mark><span class="linenos">32</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">str</span>
</mark></span><span id="line-33"><mark><span class="linenos">33</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">The name of the device group to reboot</span>
</mark></span><span id="line-34"><mark><span class="linenos">34</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</mark></span><span id="line-35"><span class="linenos">35</span>
</span><span id="line-36"><span class="linenos">36</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
</span><span id="line-37"><span class="linenos">37</span><span class="nt">endpoints</span><span class="p">:</span>
</span><span id="line-38"><span class="linenos">38</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
</span><span id="line-39"><span class="linenos">39</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
</span><span id="line-40"><span class="linenos">40</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
</span><span id="line-41"><span class="linenos">41</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
</span><span id="line-42"><span class="linenos">42</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
</span><span id="line-43"><span class="linenos">43</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
</span><span id="line-44"><span class="linenos">44</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span></code></pre></div>
</div>
</div>
@ -365,12 +361,12 @@ the users intent.</p>
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
<ul>
<li><a :data-current="activeSection === '#single-function-call'" class="reference internal" href="#single-function-call">Single Function Call</a><ul>
<li><a :data-current="activeSection === '#step-1-define-prompt-targets-with-functions'" class="reference internal" href="#step-1-define-prompt-targets-with-functions">Step 1: Define prompt targets with functions</a></li>
<li><a :data-current="activeSection === '#step-2-process-request-parameters-in-flask'" class="reference internal" href="#step-2-process-request-parameters-in-flask">Step 2: Process request parameters in Flask</a></li>
<li><a :data-current="activeSection === '#step-1-define-prompt-targets'" class="reference internal" href="#step-1-define-prompt-targets">Step 1: Define Prompt Targets</a></li>
<li><a :data-current="activeSection === '#step-2-process-request-parameters'" class="reference internal" href="#step-2-process-request-parameters">Step 2: Process Request Parameters</a></li>
</ul>
</li>
<li><a :data-current="activeSection === '#parallel-multiple-function-calling'" class="reference internal" href="#parallel-multiple-function-calling">Parallel/ Multiple Function Calling</a><ul>
<li><a :data-current="activeSection === '#step-1-define-multiple-function-targets'" class="reference internal" href="#step-1-define-multiple-function-targets">Step 1: Define Multiple Function Targets</a></li>
<li><a :data-current="activeSection === '#parallel-multiple-function-calling'" class="reference internal" href="#parallel-multiple-function-calling">Parallel &amp; Multiple Function Calling</a><ul>
<li><a :data-current="activeSection === '#id1'" class="reference internal" href="#id1">Step 1: Define Prompt Targets</a></li>
</ul>
</li>
</ul>

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -157,49 +157,72 @@
Retrieval-Augmented Generation (RAG) applications.</p>
<section id="intent-drift-detection">
<h2>Intent-drift Detection<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#intent-drift-detection" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#intent-drift-detection'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<p>Developers struggle to handle <a class="reference external" href="https://www.reddit.com/r/ChatGPTPromptGenius/comments/17dzmpy/how_to_use_rag_with_conversation_history_for/?" rel="nofollow noopener">follow-up<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>
or <a class="reference external" href="https://www.reddit.com/r/LocalLLaMA/comments/18mqwg6/best_practice_for_rag_with_followup_chat/" rel="nofollow noopener">clarifying<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>
questions. Specifically, when users ask for changes or additions to previous responses their AI applications often
generate entirely new responses instead of adjusting previous ones. Arch offers <em>intent-drift</em> tracking as a feature so
that developers can know when the user has shifted away from a previous intent so that they can dramatically improve
retrieval accuracy, lower overall token cost and improve the speed of their responses back to users.</p>
<p>Developers struggle to handle <code class="docutils literal notranslate"><span class="pre">follow-up</span></code> or <code class="docutils literal notranslate"><span class="pre">clarification</span></code> questions.
Specifically, when users ask for changes or additions to previous responses their AI applications often generate entirely new responses instead of adjusting previous ones.
Arch offers <strong>intent-drift</strong> tracking as a feature so that developers can know when the user has shifted away from a previous intent so that they can dramatically improve retrieval accuracy, lower overall token cost and improve the speed of their responses back to users.</p>
<p>Arch uses its built-in lightweight NLI and embedding models to know if the user has steered away from an active intent.
Archs intent-drift detection mechanism is based on its <em>prompt_targets</em> primtive. Arch tries to match an incoming
prompt to one of the <em>prompt_targets</em> configured in the gateway. Once it detects that the user has moved away from an active
Archs intent-drift detection mechanism is based on its <a class="reference internal" href="../concepts/prompt_target.html#prompt-target"><span class="std std-ref">prompt_targets</span></a> primtive. Arch tries to match an incoming
prompt to one of the prompt_targets configured in the gateway. Once it detects that the user has moved away from an active
active intent, Arch adds the <code class="docutils literal notranslate"><span class="pre">x-arch-intent-drift</span></code> headers to the request before sending it your application servers.</p>
<div class="literal-block-wrapper docutils container" id="id1">
<div class="code-block-caption"><span class="caption-text">Intent Detection Example</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s1">'/process_rag'</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s1">'POST'</span><span class="p">])</span>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s2">"/process_rag"</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s2">"POST"</span><span class="p">])</span>
</span><span id="line-2"><span class="linenos"> 2</span><span class="k">def</span> <span class="nf">process_rag</span><span class="p">():</span>
</span><span id="line-3"><span class="linenos"> 3</span> <span class="c1"># Extract JSON data from the request</span>
</span><span id="line-4"><span class="linenos"> 4</span> <span class="n">data</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">get_json</span><span class="p">()</span>
</span><span id="line-5"><span class="linenos"> 5</span>
</span><span id="line-6"><span class="linenos"> 6</span> <span class="n">user_id</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'user_id'</span><span class="p">)</span>
</span><span id="line-6"><span class="linenos"> 6</span> <span class="n">user_id</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"user_id"</span><span class="p">)</span>
</span><span id="line-7"><span class="linenos"> 7</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">user_id</span><span class="p">:</span>
</span><span id="line-8"><span class="linenos"> 8</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s1">'User ID is required'</span><span class="p">}),</span> <span class="mi">400</span>
</span><span id="line-8"><span class="linenos"> 8</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"User ID is required"</span><span class="p">}),</span> <span class="mi">400</span>
</span><span id="line-9"><span class="linenos"> 9</span>
</span><span id="line-10"><span class="linenos">10</span> <span class="n">client_messages</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'messages'</span><span class="p">)</span>
</span><span id="line-10"><span class="linenos">10</span> <span class="n">client_messages</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"messages"</span><span class="p">)</span>
</span><span id="line-11"><span class="linenos">11</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">client_messages</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">client_messages</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
</span><span id="line-12"><span class="linenos">12</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s1">'Messages array is required'</span><span class="p">}),</span> <span class="mi">400</span>
</span><span id="line-12"><span class="linenos">12</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"Messages array is required"</span><span class="p">}),</span> <span class="mi">400</span>
</span><span id="line-13"><span class="linenos">13</span>
</span><span id="line-14"><mark><span class="linenos">14</span> <span class="c1"># Extract the intent change marker from Arch's headers if present for the current prompt</span>
</mark></span><span id="line-15"><mark><span class="linenos">15</span> <span class="n">intent_changed_header</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'x-arch-intent-marker'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
</mark></span><span id="line-16"><mark><span class="linenos">16</span> <span class="k">if</span> <span class="n">intent_changed_header</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">''</span><span class="p">,</span> <span class="s1">'false'</span><span class="p">]:</span>
</mark></span><span id="line-15"><mark><span class="linenos">15</span> <span class="n">intent_changed_header</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"x-arch-intent-marker"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
</mark></span><span id="line-16"><mark><span class="linenos">16</span> <span class="k">if</span> <span class="n">intent_changed_header</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">""</span><span class="p">,</span> <span class="s2">"false"</span><span class="p">]:</span>
</mark></span><span id="line-17"><mark><span class="linenos">17</span> <span class="n">intent_changed</span> <span class="o">=</span> <span class="kc">False</span>
</mark></span><span id="line-18"><mark><span class="linenos">18</span> <span class="k">elif</span> <span class="n">intent_changed_header</span> <span class="o">==</span> <span class="s1">'true'</span><span class="p">:</span>
</mark></span><span id="line-18"><mark><span class="linenos">18</span> <span class="k">elif</span> <span class="n">intent_changed_header</span> <span class="o">==</span> <span class="s2">"true"</span><span class="p">:</span>
</mark></span><span id="line-19"><mark><span class="linenos">19</span> <span class="n">intent_changed</span> <span class="o">=</span> <span class="kc">True</span>
</mark></span><span id="line-20"><mark><span class="linenos">20</span> <span class="k">else</span><span class="p">:</span>
</mark></span><span id="line-21"><mark><span class="linenos">21</span> <span class="c1"># Invalid value provided</span>
</mark></span><span id="line-22"><mark><span class="linenos">22</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s1">'Invalid value for x-arch-prompt-intent-change header'</span><span class="p">}),</span> <span class="mi">400</span>
</mark></span><span id="line-23"><span class="linenos">23</span>
</span><span id="line-24"><span class="linenos">24</span> <span class="c1"># Update user conversation based on intent change</span>
</span><span id="line-25"><span class="linenos">25</span> <span class="n">memory</span> <span class="o">=</span> <span class="n">update_user_conversation</span><span class="p">(</span><span class="n">user_id</span><span class="p">,</span> <span class="n">client_messages</span><span class="p">,</span> <span class="n">intent_changed</span><span class="p">)</span>
</span><span id="line-26"><span class="linenos">26</span>
</span><span id="line-27"><span class="linenos">27</span> <span class="c1"># Retrieve messages since last intent change for LLM</span>
</span><span id="line-28"><span class="linenos">28</span> <span class="n">messages_for_llm</span> <span class="o">=</span> <span class="n">get_messages_since_last_intent</span><span class="p">(</span><span class="n">memory</span><span class="o">.</span><span class="n">chat_memory</span><span class="o">.</span><span class="n">messages</span><span class="p">)</span>
</span><span id="line-29"><span class="linenos">29</span>
</span><span id="line-30"><span class="linenos">30</span> <span class="c1"># Forward messages to upstream LLM</span>
</span><span id="line-31"><span class="linenos">31</span> <span class="n">llm_response</span> <span class="o">=</span> <span class="n">forward_to_llm</span><span class="p">(</span><span class="n">messages_for_llm</span><span class="p">)</span>
</mark></span><span id="line-22"><mark><span class="linenos">22</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span>
</mark></span><span id="line-23"><mark><span class="linenos">23</span> <span class="p">{</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"Invalid value for x-arch-prompt-intent-change header"</span><span class="p">}</span>
</mark></span><span id="line-24"><mark><span class="linenos">24</span> <span class="p">),</span> <span class="mi">400</span>
</mark></span><span id="line-25"><span class="linenos">25</span>
</span><span id="line-26"><span class="linenos">26</span> <span class="c1"># Update user conversation based on intent change</span>
</span><span id="line-27"><span class="linenos">27</span> <span class="n">memory</span> <span class="o">=</span> <span class="n">update_user_conversation</span><span class="p">(</span><span class="n">user_id</span><span class="p">,</span> <span class="n">client_messages</span><span class="p">,</span> <span class="n">intent_changed</span><span class="p">)</span>
</span><span id="line-28"><span class="linenos">28</span>
</span><span id="line-29"><span class="linenos">29</span> <span class="c1"># Retrieve messages since last intent change for LLM</span>
</span><span id="line-30"><span class="linenos">30</span> <span class="n">messages_for_llm</span> <span class="o">=</span> <span class="n">get_messages_since_last_intent</span><span class="p">(</span><span class="n">memory</span><span class="o">.</span><span class="n">chat_memory</span><span class="o">.</span><span class="n">messages</span><span class="p">)</span>
</span><span id="line-31"><span class="linenos">31</span>
</span><span id="line-32"><span class="linenos">32</span> <span class="c1"># Forward messages to upstream LLM</span>
</span><span id="line-33"><span class="linenos">33</span> <span class="n">llm_response</span> <span class="o">=</span> <span class="n">forward_to_llm</span><span class="p">(</span><span class="n">messages_for_llm</span><span class="p">)</span>
</span><span id="line-34"><span class="linenos">34</span>
</span><span id="line-35"><span class="linenos">35</span> <span class="c1"># Prepare the messages to return</span>
</span><span id="line-36"><span class="linenos">36</span> <span class="n">messages_to_return</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="line-37"><span class="linenos">37</span> <span class="k">for</span> <span class="n">message</span> <span class="ow">in</span> <span class="n">memory</span><span class="o">.</span><span class="n">chat_memory</span><span class="o">.</span><span class="n">messages</span><span class="p">:</span>
</span><span id="line-38"><span class="linenos">38</span> <span class="n">role</span> <span class="o">=</span> <span class="s2">"user"</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">message</span><span class="p">,</span> <span class="n">HumanMessage</span><span class="p">)</span> <span class="k">else</span> <span class="s2">"assistant"</span>
</span><span id="line-39"><span class="linenos">39</span> <span class="n">content</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">content</span>
</span><span id="line-40"><span class="linenos">40</span> <span class="n">metadata</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">additional_kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"metadata"</span><span class="p">,</span> <span class="p">{})</span>
</span><span id="line-41"><span class="linenos">41</span> <span class="n">message_entry</span> <span class="o">=</span> <span class="p">{</span>
</span><span id="line-42"><span class="linenos">42</span> <span class="s2">"uuid"</span><span class="p">:</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"uuid"</span><span class="p">),</span>
</span><span id="line-43"><span class="linenos">43</span> <span class="s2">"timestamp"</span><span class="p">:</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"timestamp"</span><span class="p">),</span>
</span><span id="line-44"><span class="linenos">44</span> <span class="s2">"role"</span><span class="p">:</span> <span class="n">role</span><span class="p">,</span>
</span><span id="line-45"><span class="linenos">45</span> <span class="s2">"content"</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
</span><span id="line-46"><span class="linenos">46</span> <span class="s2">"intent_changed"</span><span class="p">:</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"intent_changed"</span><span class="p">,</span> <span class="kc">False</span><span class="p">),</span>
</span><span id="line-47"><span class="linenos">47</span> <span class="p">}</span>
</span><span id="line-48"><span class="linenos">48</span> <span class="n">messages_to_return</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">message_entry</span><span class="p">)</span>
</span><span id="line-49"><span class="linenos">49</span>
</span><span id="line-50"><span class="linenos">50</span> <span class="c1"># Prepare the response</span>
</span><span id="line-51"><span class="linenos">51</span> <span class="n">response</span> <span class="o">=</span> <span class="p">{</span>
</span><span id="line-52"><span class="linenos">52</span> <span class="s2">"user_id"</span><span class="p">:</span> <span class="n">user_id</span><span class="p">,</span>
</span><span id="line-53"><span class="linenos">53</span> <span class="s2">"messages"</span><span class="p">:</span> <span class="n">messages_to_return</span><span class="p">,</span>
</span><span id="line-54"><span class="linenos">54</span> <span class="s2">"llm_response"</span><span class="p">:</span> <span class="n">llm_response</span><span class="p">,</span>
</span><span id="line-55"><span class="linenos">55</span> <span class="p">}</span>
</span><span id="line-56"><span class="linenos">56</span>
</span><span id="line-57"><span class="linenos">57</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span><span class="n">response</span><span class="p">),</span> <span class="mi">200</span>
</span></code></pre></div>
</div>
</div>
@ -224,20 +247,20 @@ so that you can use the most relevant prompts for your retrieval and for prompti
</span><span id="line-10"><span class="linenos">10</span><span class="c1"># Global dictionary to keep track of user memories</span>
</span><span id="line-11"><span class="linenos">11</span><span class="n">user_memories</span> <span class="o">=</span> <span class="p">{}</span>
</span><span id="line-12"><span class="linenos">12</span>
</span><span id="line-13"><span class="linenos">13</span><span class="k">def</span> <span class="nf">get_user_conversation</span><span class="p">(</span><span class="n">user_id</span><span class="p">):</span>
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="sd">"""</span>
</span><span id="line-15"><span class="linenos">15</span><span class="sd"> Retrieve the user's conversation memory using LangChain.</span>
</span><span id="line-16"><span class="linenos">16</span><span class="sd"> If the user does not exist, initialize their conversation memory.</span>
</span><span id="line-17"><span class="linenos">17</span><span class="sd"> """</span>
</span><span id="line-18"><span class="linenos">18</span> <span class="k">if</span> <span class="n">user_id</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">user_memories</span><span class="p">:</span>
</span><span id="line-19"><span class="linenos">19</span> <span class="n">user_memories</span><span class="p">[</span><span class="n">user_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">ConversationBufferMemory</span><span class="p">(</span><span class="n">return_messages</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
</span><span id="line-20"><span class="linenos">20</span> <span class="k">return</span> <span class="n">user_memories</span><span class="p">[</span><span class="n">user_id</span><span class="p">]</span>
</span><span id="line-21"><span class="linenos">21</span>
</span><span id="line-13"><span class="linenos">13</span>
</span><span id="line-14"><span class="linenos">14</span><span class="k">def</span> <span class="nf">get_user_conversation</span><span class="p">(</span><span class="n">user_id</span><span class="p">):</span>
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="sd">"""</span>
</span><span id="line-16"><span class="linenos">16</span><span class="sd"> Retrieve the user's conversation memory using LangChain.</span>
</span><span id="line-17"><span class="linenos">17</span><span class="sd"> If the user does not exist, initialize their conversation memory.</span>
</span><span id="line-18"><span class="linenos">18</span><span class="sd"> """</span>
</span><span id="line-19"><span class="linenos">19</span> <span class="k">if</span> <span class="n">user_id</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">user_memories</span><span class="p">:</span>
</span><span id="line-20"><span class="linenos">20</span> <span class="n">user_memories</span><span class="p">[</span><span class="n">user_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">ConversationBufferMemory</span><span class="p">(</span><span class="n">return_messages</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
</span><span id="line-21"><span class="linenos">21</span> <span class="k">return</span> <span class="n">user_memories</span><span class="p">[</span><span class="n">user_id</span><span class="p">]</span>
</span></code></pre></div>
</div>
</section>
<section id="step-2-update-conversationbuffermemory-w-intent">
<h3>Step 2: Update ConversationBufferMemory w/ intent<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-update-conversationbuffermemory-w-intent" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-update-conversationbuffermemory-w-intent'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<section id="step-2-update-conversationbuffermemory-with-intents">
<h3>Step 2: Update ConversationBufferMemory with Intents<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-update-conversationbuffermemory-with-intents" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-update-conversationbuffermemory-with-intents'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="k">def</span> <span class="nf">update_user_conversation</span><span class="p">(</span><span class="n">user_id</span><span class="p">,</span> <span class="n">client_messages</span><span class="p">,</span> <span class="n">intent_changed</span><span class="p">):</span>
</span><span id="line-2"><span class="linenos"> 2</span><span class="w"> </span><span class="sd">"""</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="sd"> Update the user's conversation memory with new messages using LangChain.</span>
@ -253,26 +276,26 @@ so that you can use the most relevant prompts for your retrieval and for prompti
</span><span id="line-13"><span class="linenos">13</span>
</span><span id="line-14"><span class="linenos">14</span> <span class="c1"># Process each new message</span>
</span><span id="line-15"><span class="linenos">15</span> <span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">message</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">new_messages</span><span class="p">):</span>
</span><span id="line-16"><span class="linenos">16</span> <span class="n">role</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'role'</span><span class="p">)</span>
</span><span id="line-17"><span class="linenos">17</span> <span class="n">content</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'content'</span><span class="p">)</span>
</span><span id="line-16"><span class="linenos">16</span> <span class="n">role</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"role"</span><span class="p">)</span>
</span><span id="line-17"><span class="linenos">17</span> <span class="n">content</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"content"</span><span class="p">)</span>
</span><span id="line-18"><span class="linenos">18</span> <span class="n">metadata</span> <span class="o">=</span> <span class="p">{</span>
</span><span id="line-19"><span class="linenos">19</span> <span class="s1">'uuid'</span><span class="p">:</span> <span class="nb">str</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">()),</span>
</span><span id="line-20"><span class="linenos">20</span> <span class="s1">'timestamp'</span><span class="p">:</span> <span class="n">datetime</span><span class="o">.</span><span class="n">utcnow</span><span class="p">()</span><span class="o">.</span><span class="n">isoformat</span><span class="p">(),</span>
</span><span id="line-21"><span class="linenos">21</span> <span class="s1">'intent_changed'</span><span class="p">:</span> <span class="kc">False</span> <span class="c1"># Default value</span>
</span><span id="line-19"><span class="linenos">19</span> <span class="s2">"uuid"</span><span class="p">:</span> <span class="nb">str</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">()),</span>
</span><span id="line-20"><span class="linenos">20</span> <span class="s2">"timestamp"</span><span class="p">:</span> <span class="n">datetime</span><span class="o">.</span><span class="n">utcnow</span><span class="p">()</span><span class="o">.</span><span class="n">isoformat</span><span class="p">(),</span>
</span><span id="line-21"><span class="linenos">21</span> <span class="s2">"intent_changed"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> <span class="c1"># Default value</span>
</span><span id="line-22"><span class="linenos">22</span> <span class="p">}</span>
</span><span id="line-23"><span class="linenos">23</span>
</span><span id="line-24"><span class="linenos">24</span> <span class="c1"># Mark the intent change on the last message if detected</span>
</span><span id="line-25"><span class="linenos">25</span> <span class="k">if</span> <span class="n">intent_changed</span> <span class="ow">and</span> <span class="n">index</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">new_messages</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">:</span>
</span><span id="line-26"><span class="linenos">26</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">'intent_changed'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span>
</span><span id="line-26"><span class="linenos">26</span> <span class="n">metadata</span><span class="p">[</span><span class="s2">"intent_changed"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span>
</span><span id="line-27"><span class="linenos">27</span>
</span><span id="line-28"><span class="linenos">28</span> <span class="c1"># Create a new message with metadata</span>
</span><span id="line-29"><span class="linenos">29</span> <span class="k">if</span> <span class="n">role</span> <span class="o">==</span> <span class="s1">'user'</span><span class="p">:</span>
</span><span id="line-29"><span class="linenos">29</span> <span class="k">if</span> <span class="n">role</span> <span class="o">==</span> <span class="s2">"user"</span><span class="p">:</span>
</span><span id="line-30"><span class="linenos">30</span> <span class="n">memory</span><span class="o">.</span><span class="n">chat_memory</span><span class="o">.</span><span class="n">add_message</span><span class="p">(</span>
</span><span id="line-31"><span class="linenos">31</span> <span class="n">HumanMessage</span><span class="p">(</span><span class="n">content</span><span class="o">=</span><span class="n">content</span><span class="p">,</span> <span class="n">additional_kwargs</span><span class="o">=</span><span class="p">{</span><span class="s1">'metadata'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">})</span>
</span><span id="line-31"><span class="linenos">31</span> <span class="n">HumanMessage</span><span class="p">(</span><span class="n">content</span><span class="o">=</span><span class="n">content</span><span class="p">,</span> <span class="n">additional_kwargs</span><span class="o">=</span><span class="p">{</span><span class="s2">"metadata"</span><span class="p">:</span> <span class="n">metadata</span><span class="p">})</span>
</span><span id="line-32"><span class="linenos">32</span> <span class="p">)</span>
</span><span id="line-33"><span class="linenos">33</span> <span class="k">elif</span> <span class="n">role</span> <span class="o">==</span> <span class="s1">'assistant'</span><span class="p">:</span>
</span><span id="line-33"><span class="linenos">33</span> <span class="k">elif</span> <span class="n">role</span> <span class="o">==</span> <span class="s2">"assistant"</span><span class="p">:</span>
</span><span id="line-34"><span class="linenos">34</span> <span class="n">memory</span><span class="o">.</span><span class="n">chat_memory</span><span class="o">.</span><span class="n">add_message</span><span class="p">(</span>
</span><span id="line-35"><span class="linenos">35</span> <span class="n">AIMessage</span><span class="p">(</span><span class="n">content</span><span class="o">=</span><span class="n">content</span><span class="p">,</span> <span class="n">additional_kwargs</span><span class="o">=</span><span class="p">{</span><span class="s1">'metadata'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">})</span>
</span><span id="line-35"><span class="linenos">35</span> <span class="n">AIMessage</span><span class="p">(</span><span class="n">content</span><span class="o">=</span><span class="n">content</span><span class="p">,</span> <span class="n">additional_kwargs</span><span class="o">=</span><span class="p">{</span><span class="s2">"metadata"</span><span class="p">:</span> <span class="n">metadata</span><span class="p">})</span>
</span><span id="line-36"><span class="linenos">36</span> <span class="p">)</span>
</span><span id="line-37"><span class="linenos">37</span> <span class="k">else</span><span class="p">:</span>
</span><span id="line-38"><span class="linenos">38</span> <span class="c1"># Handle other roles if necessary</span>
@ -292,11 +315,12 @@ so that you can use the most relevant prompts for your retrieval and for prompti
</span><span id="line-6"><span class="linenos"> 6</span> <span class="k">for</span> <span class="n">message</span> <span class="ow">in</span> <span class="nb">reversed</span><span class="p">(</span><span class="n">messages</span><span class="p">):</span>
</span><span id="line-7"><span class="linenos"> 7</span> <span class="c1"># Insert message at the beginning to maintain correct order</span>
</span><span id="line-8"><span class="linenos"> 8</span> <span class="n">messages_since_intent</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">message</span><span class="p">)</span>
</span><span id="line-9"><span class="linenos"> 9</span> <span class="n">metadata</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">additional_kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'metadata'</span><span class="p">,</span> <span class="p">{})</span>
</span><span id="line-9"><span class="linenos"> 9</span> <span class="n">metadata</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">additional_kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"metadata"</span><span class="p">,</span> <span class="p">{})</span>
</span><span id="line-10"><span class="linenos">10</span> <span class="c1"># Break if intent_changed is True</span>
</span><span id="line-11"><span class="linenos">11</span> <span class="k">if</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'intent_changed'</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span> <span class="o">==</span> <span class="kc">True</span><span class="p">:</span>
</span><span id="line-11"><span class="linenos">11</span> <span class="k">if</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"intent_changed"</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span> <span class="o">==</span> <span class="kc">True</span><span class="p">:</span>
</span><span id="line-12"><span class="linenos">12</span> <span class="k">break</span>
</span><span id="line-13"><span class="linenos">13</span> <span class="k">return</span> <span class="n">messages_since_intent</span>
</span><span id="line-13"><span class="linenos">13</span>
</span><span id="line-14"><span class="linenos">14</span> <span class="k">return</span> <span class="n">messages_since_intent</span>
</span></code></pre></div>
</div>
<p>You can used the last set of messages that match to an intent to prompt an LLM, use it with an vector-DB for
@ -311,37 +335,31 @@ enabling Arch to retrieve critical information in a structured way for processin
retrieval quality and speed of your application. By extracting parameters from the conversation, you can pull
the appropriate chunks from a vector database or SQL-like data store to enhance accuracy. With Arch, you can
streamline data retrieval and processing to build more efficient and precise RAG applications.</p>
<section id="step-1-define-prompt-targets-with-parameter-definitions">
<h3>Step 1: Define prompt targets with parameter definitions<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-1-define-prompt-targets-with-parameter-definitions" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-1-define-prompt-targets-with-parameter-definitions'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<section id="step-1-define-prompt-targets">
<h3>Step 1: Define Prompt Targets<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-1-define-prompt-targets" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-1-define-prompt-targets'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<div class="literal-block-wrapper docutils container" id="id2">
<div class="code-block-caption"><span class="caption-text">Prompt Targets</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">prompt_targets</span><span class="p">:</span>
</span><span id="line-2"><span class="linenos"> 2</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">get_device_statistics</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">&gt;</span>
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="no">This prompt target ensures that when users request device-related statistics, the system accurately retrieves and presents the relevant data</span>
</span><span id="line-5"><span class="linenos"> 5</span><span class="w"> </span><span class="no">based on the specified devices and time range. Examples of user prompts, include:</span>
</span><span id="line-6"><span class="linenos"> 6</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="no">- "Show me the performance stats for device 12345 over the past week."</span>
</span><span id="line-8"><span class="linenos"> 8</span><span class="w"> </span><span class="no">- "What are the error rates for my devices in the last 24 hours?"</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="w"> </span><span class="no">- "I need statistics on device 789 over the last 10 days."</span>
</span><span id="line-10"><span class="linenos">10</span>
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_summary</span>
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_ids"</span>
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span><span class="w"> </span><span class="c1"># Options: integer | float | list | dictionary | set</span>
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"A</span><span class="nv"> </span><span class="s">list</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">identifiers</span><span class="nv"> </span><span class="s">(IDs)</span><span class="nv"> </span><span class="s">for</span><span class="nv"> </span><span class="s">which</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">statistics</span><span class="nv"> </span><span class="s">are</span><span class="nv"> </span><span class="s">requested."</span>
</span><span id="line-16"><span class="linenos">16</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-17"><span class="linenos">17</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"time_range"</span>
</span><span id="line-18"><span class="linenos">18</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">integer</span><span class="w"> </span><span class="c1"># Options: integer | float | list | dictionary | set</span>
</span><span id="line-19"><span class="linenos">19</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"The</span><span class="nv"> </span><span class="s">number</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">days</span><span class="nv"> </span><span class="s">in</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">past</span><span class="nv"> </span><span class="s">over</span><span class="nv"> </span><span class="s">which</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">retrieve</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">statistics.</span><span class="nv"> </span><span class="s">Defaults</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">7</span><span class="nv"> </span><span class="s">days</span><span class="nv"> </span><span class="s">if</span><span class="nv"> </span><span class="s">not</span><span class="nv"> </span><span class="s">specified."</span>
</span><span id="line-20"><span class="linenos">20</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</span><span id="line-21"><span class="linenos">21</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">7</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Retrieve and present the relevant data based on the specified devices and time range</span>
</span><span id="line-4"><span class="linenos"> 4</span>
</span><span id="line-5"><span class="linenos"> 5</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_summary</span>
</span><span id="line-6"><span class="linenos"> 6</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_ids</span>
</span><span id="line-8"><span class="linenos"> 8</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">A list of device identifiers (IDs) to reboot.</span>
</span><span id="line-10"><span class="linenos">10</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">time_range</span>
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">int</span>
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">The number of days in the past over which to retrieve device statistics</span>
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">7</span>
</span></code></pre></div>
</div>
</div>
</section>
<section id="step-2-process-request-parameters-in-flask">
<h3>Step 2: Process request parameters in Flask<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-process-request-parameters-in-flask" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-process-request-parameters-in-flask'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<h3>Step 2: Process Request Parameters in Flask<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-process-request-parameters-in-flask" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-process-request-parameters-in-flask'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<p>Once the prompt targets are configured as above, handling those parameters is</p>
<div class="literal-block-wrapper docutils container" id="id3">
<div class="code-block-caption"><span class="caption-text">Parameter handling with Flask</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
@ -349,43 +367,45 @@ streamline data retrieval and processing to build more efficient and precise RAG
</span><span id="line-2"><span class="linenos"> 2</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="n">app</span> <span class="o">=</span> <span class="n">Flask</span><span class="p">(</span><span class="vm">__name__</span><span class="p">)</span>
</span><span id="line-4"><span class="linenos"> 4</span>
</span><span id="line-5"><span class="linenos"> 5</span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s1">'/agent/device_summary'</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s1">'POST'</span><span class="p">])</span>
</span><span id="line-6"><span class="linenos"> 6</span><span class="k">def</span> <span class="nf">get_device_summary</span><span class="p">():</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="sd">"""</span>
</span><span id="line-8"><span class="linenos"> 8</span><span class="sd"> Endpoint to retrieve device statistics based on device IDs and an optional time range.</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="sd"> """</span>
</span><span id="line-10"><span class="linenos">10</span> <span class="n">data</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">get_json</span><span class="p">()</span>
</span><span id="line-11"><span class="linenos">11</span>
</span><span id="line-12"><span class="linenos">12</span> <span class="c1"># Validate 'device_ids' parameter</span>
</span><span id="line-13"><span class="linenos">13</span> <span class="n">device_ids</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'device_ids'</span><span class="p">)</span>
</span><span id="line-14"><span class="linenos">14</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">device_ids</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">device_ids</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
</span><span id="line-15"><span class="linenos">15</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s2">"'device_ids' parameter is required and must be a list"</span><span class="p">}),</span> <span class="mi">400</span>
</span><span id="line-16"><span class="linenos">16</span>
</span><span id="line-17"><span class="linenos">17</span> <span class="c1"># Validate 'time_range' parameter (optional, defaults to 7)</span>
</span><span id="line-18"><span class="linenos">18</span> <span class="n">time_range</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'time_range'</span><span class="p">,</span> <span class="mi">7</span><span class="p">)</span>
</span><span id="line-19"><span class="linenos">19</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">time_range</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
</span><span id="line-20"><span class="linenos">20</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s2">"'time_range' must be an integer"</span><span class="p">}),</span> <span class="mi">400</span>
</span><span id="line-21"><span class="linenos">21</span>
</span><span id="line-22"><span class="linenos">22</span> <span class="c1"># Simulate retrieving statistics for the given device IDs and time range</span>
</span><span id="line-23"><span class="linenos">23</span> <span class="c1"># In a real application, you would query your database or external service here</span>
</span><span id="line-24"><span class="linenos">24</span> <span class="n">statistics</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="line-25"><span class="linenos">25</span> <span class="k">for</span> <span class="n">device_id</span> <span class="ow">in</span> <span class="n">device_ids</span><span class="p">:</span>
</span><span id="line-26"><span class="linenos">26</span> <span class="c1"># Placeholder for actual data retrieval</span>
</span><span id="line-27"><span class="linenos">27</span> <span class="n">stats</span> <span class="o">=</span> <span class="p">{</span>
</span><span id="line-28"><span class="linenos">28</span> <span class="s1">'device_id'</span><span class="p">:</span> <span class="n">device_id</span><span class="p">,</span>
</span><span id="line-29"><span class="linenos">29</span> <span class="s1">'time_range'</span><span class="p">:</span> <span class="sa">f</span><span class="s1">'Last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s1"> days'</span><span class="p">,</span>
</span><span id="line-30"><span class="linenos">30</span> <span class="s1">'data'</span><span class="p">:</span> <span class="sa">f</span><span class="s1">'Statistics data for device </span><span class="si">{</span><span class="n">device_id</span><span class="si">}</span><span class="s1"> over the last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s1"> days.'</span>
</span><span id="line-31"><span class="linenos">31</span> <span class="p">}</span>
</span><span id="line-32"><span class="linenos">32</span> <span class="n">statistics</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
</span><span id="line-33"><span class="linenos">33</span>
</span><span id="line-34"><span class="linenos">34</span> <span class="n">response</span> <span class="o">=</span> <span class="p">{</span>
</span><span id="line-35"><span class="linenos">35</span> <span class="s1">'statistics'</span><span class="p">:</span> <span class="n">statistics</span>
</span><span id="line-36"><span class="linenos">36</span> <span class="p">}</span>
</span><span id="line-37"><span class="linenos">37</span>
</span><span id="line-38"><span class="linenos">38</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span><span class="n">response</span><span class="p">),</span> <span class="mi">200</span>
</span><span id="line-39"><span class="linenos">39</span>
</span><span id="line-40"><span class="linenos">40</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
</span><span id="line-41"><span class="linenos">41</span> <span class="n">app</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">debug</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
</span><span id="line-5"><span class="linenos"> 5</span>
</span><span id="line-6"><span class="linenos"> 6</span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s2">"/agent/device_summary"</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s2">"POST"</span><span class="p">])</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="k">def</span> <span class="nf">get_device_summary</span><span class="p">():</span>
</span><span id="line-8"><span class="linenos"> 8</span><span class="w"> </span><span class="sd">"""</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="sd"> Endpoint to retrieve device statistics based on device IDs and an optional time range.</span>
</span><span id="line-10"><span class="linenos">10</span><span class="sd"> """</span>
</span><span id="line-11"><span class="linenos">11</span> <span class="n">data</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">get_json</span><span class="p">()</span>
</span><span id="line-12"><span class="linenos">12</span>
</span><span id="line-13"><span class="linenos">13</span> <span class="c1"># Validate 'device_ids' parameter</span>
</span><span id="line-14"><span class="linenos">14</span> <span class="n">device_ids</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"device_ids"</span><span class="p">)</span>
</span><span id="line-15"><span class="linenos">15</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">device_ids</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">device_ids</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
</span><span id="line-16"><span class="linenos">16</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span>
</span><span id="line-17"><span class="linenos">17</span> <span class="p">{</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"'device_ids' parameter is required and must be a list"</span><span class="p">}</span>
</span><span id="line-18"><span class="linenos">18</span> <span class="p">),</span> <span class="mi">400</span>
</span><span id="line-19"><span class="linenos">19</span>
</span><span id="line-20"><span class="linenos">20</span> <span class="c1"># Validate 'time_range' parameter (optional, defaults to 7)</span>
</span><span id="line-21"><span class="linenos">21</span> <span class="n">time_range</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"time_range"</span><span class="p">,</span> <span class="mi">7</span><span class="p">)</span>
</span><span id="line-22"><span class="linenos">22</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">time_range</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
</span><span id="line-23"><span class="linenos">23</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"'time_range' must be an integer"</span><span class="p">}),</span> <span class="mi">400</span>
</span><span id="line-24"><span class="linenos">24</span>
</span><span id="line-25"><span class="linenos">25</span> <span class="c1"># Simulate retrieving statistics for the given device IDs and time range</span>
</span><span id="line-26"><span class="linenos">26</span> <span class="c1"># In a real application, you would query your database or external service here</span>
</span><span id="line-27"><span class="linenos">27</span> <span class="n">statistics</span> <span class="o">=</span> <span class="p">[]</span>
</span><span id="line-28"><span class="linenos">28</span> <span class="k">for</span> <span class="n">device_id</span> <span class="ow">in</span> <span class="n">device_ids</span><span class="p">:</span>
</span><span id="line-29"><span class="linenos">29</span> <span class="c1"># Placeholder for actual data retrieval</span>
</span><span id="line-30"><span class="linenos">30</span> <span class="n">stats</span> <span class="o">=</span> <span class="p">{</span>
</span><span id="line-31"><span class="linenos">31</span> <span class="s2">"device_id"</span><span class="p">:</span> <span class="n">device_id</span><span class="p">,</span>
</span><span id="line-32"><span class="linenos">32</span> <span class="s2">"time_range"</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"Last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s2"> days"</span><span class="p">,</span>
</span><span id="line-33"><span class="linenos">33</span> <span class="s2">"data"</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"Statistics data for device </span><span class="si">{</span><span class="n">device_id</span><span class="si">}</span><span class="s2"> over the last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s2"> days."</span><span class="p">,</span>
</span><span id="line-34"><span class="linenos">34</span> <span class="p">}</span>
</span><span id="line-35"><span class="linenos">35</span> <span class="n">statistics</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
</span><span id="line-36"><span class="linenos">36</span>
</span><span id="line-37"><span class="linenos">37</span> <span class="n">response</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"statistics"</span><span class="p">:</span> <span class="n">statistics</span><span class="p">}</span>
</span><span id="line-38"><span class="linenos">38</span>
</span><span id="line-39"><span class="linenos">39</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span><span class="n">response</span><span class="p">),</span> <span class="mi">200</span>
</span><span id="line-40"><span class="linenos">40</span>
</span><span id="line-41"><span class="linenos">41</span>
</span><span id="line-42"><span class="linenos">42</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span>
</span><span id="line-43"><span class="linenos">43</span> <span class="n">app</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">debug</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
</span></code></pre></div>
</div>
</div>
@ -414,13 +434,13 @@ streamline data retrieval and processing to build more efficient and precise RAG
<ul>
<li><a :data-current="activeSection === '#intent-drift-detection'" class="reference internal" href="#intent-drift-detection">Intent-drift Detection</a><ul>
<li><a :data-current="activeSection === '#step-1-define-conversationbuffermemory'" class="reference internal" href="#step-1-define-conversationbuffermemory">Step 1: Define ConversationBufferMemory</a></li>
<li><a :data-current="activeSection === '#step-2-update-conversationbuffermemory-w-intent'" class="reference internal" href="#step-2-update-conversationbuffermemory-w-intent">Step 2: Update ConversationBufferMemory w/ intent</a></li>
<li><a :data-current="activeSection === '#step-2-update-conversationbuffermemory-with-intents'" class="reference internal" href="#step-2-update-conversationbuffermemory-with-intents">Step 2: Update ConversationBufferMemory with Intents</a></li>
<li><a :data-current="activeSection === '#step-3-get-messages-based-on-latest-drift'" class="reference internal" href="#step-3-get-messages-based-on-latest-drift">Step 3: Get Messages based on latest drift</a></li>
</ul>
</li>
<li><a :data-current="activeSection === '#parameter-extraction-for-rag'" class="reference internal" href="#parameter-extraction-for-rag">Parameter Extraction for RAG</a><ul>
<li><a :data-current="activeSection === '#step-1-define-prompt-targets-with-parameter-definitions'" class="reference internal" href="#step-1-define-prompt-targets-with-parameter-definitions">Step 1: Define prompt targets with parameter definitions</a></li>
<li><a :data-current="activeSection === '#step-2-process-request-parameters-in-flask'" class="reference internal" href="#step-2-process-request-parameters-in-flask">Step 2: Process request parameters in Flask</a></li>
<li><a :data-current="activeSection === '#step-1-define-prompt-targets'" class="reference internal" href="#step-1-define-prompt-targets">Step 1: Define Prompt Targets</a></li>
<li><a :data-current="activeSection === '#step-2-process-request-parameters-in-flask'" class="reference internal" href="#step-2-process-request-parameters-in-flask">Step 2: Process Request Parameters in Flask</a></li>
</ul>
</li>
</ul>

View file

@ -19,7 +19,7 @@
<link href="../_static/favicon.ico" rel="icon"/>
<link href="../search.html" rel="search" title="Search"/>
<link href="prompt_target.html" rel="next" title="Prompt Target"/>
<link href="tech_overview/request_lifecycle.html" rel="prev" title="Request Lifecycle"/>
<link href="tech_overview/error_target.html" rel="prev" title="Error Target"/>
<script>
<!-- Prevent Flash of wrong theme -->
const userPreference = localStorage.getItem('darkMode');
@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -153,7 +153,7 @@
<div id="content" role="main">
<section id="llm-provider">
<span id="id1"></span><h1>LLM Provider<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#llm-provider"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
<p><code class="docutils literal notranslate"><span class="pre">llm_provider</span></code> is a top-level primitive in Arch, helping developers centrally define, secure, observe,
<p><strong>LLM provider</strong> is a top-level primitive in Arch, helping developers centrally define, secure, observe,
and manage the usage of of their LLMs. Arch builds on Envoys reliable <a class="reference external" href="https://www.envoyproxy.io/docs/envoy/v1.31.2/intro/arch_overview/upstream/cluster_manager" rel="nofollow noopener">cluster subsystem<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>
to manage egress traffic to LLMs, which includes intelligent routing, retry and fail-over mechanisms,
ensuring high availability and fault tolerance. This abstraction also enables developers to seamlessly
@ -162,7 +162,7 @@ across applications.</p>
<p>Below is an example of how you can configure <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> with an instance of an Arch gateway.</p>
<div class="literal-block-wrapper docutils container" id="id2">
<div class="code-block-caption"><span class="caption-text">Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
</span><span id="line-2"><span class="linenos"> 2</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
@ -172,16 +172,15 @@ across applications.</p>
</span><span id="line-8"><span class="linenos"> 8</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
</span><span id="line-10"><mark><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
</mark></span><span id="line-11"><mark><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
</mark></span><span id="line-12"><mark><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
</mark></span><span id="line-13"><mark><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
</mark></span><span id="line-11"><mark><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
</mark></span><span id="line-12"><mark><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
</mark></span><span id="line-13"><mark><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
</mark></span><span id="line-14"><mark><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</mark></span><span id="line-15"><mark><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</mark></span><span id="line-16"><mark><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</mark></span><span id="line-17"><span class="linenos">17</span>
</span><span id="line-18"><span class="linenos">18</span><span class="c1"># default system prompt used by all prompt targets</span>
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
</span><span id="line-20"><span class="linenos">20</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span></code></pre></div>
</div>
</div>
@ -219,11 +218,11 @@ make outbound LLM calls.</p>
</section>
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
<div class="mr-auto">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="tech_overview/request_lifecycle.html">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="tech_overview/error_target.html">
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<polyline points="15 18 9 12 15 6"></polyline>
</svg>
Request Lifecycle
Error Target
</a>
</div>
<div class="ml-auto">

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -152,7 +152,7 @@
</nav>
<div id="content" role="main">
<section id="prompt-target">
<h1>Prompt Target<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#prompt-target"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
<span id="id1"></span><h1>Prompt Target<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#prompt-target"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
<p><strong>Prompt Targets</strong> are a fundamental component of Arch, enabling developers to define how different types of user prompts are processed and routed within their generative AI applications.
This section provides an in-depth look at prompt targets, including their purpose, configuration, usage, and best practices to help you effectively leverage this feature in your projects.</p>
<section id="what-are-prompt-targets">
@ -263,12 +263,13 @@ Here is a full list of parameter attributes that Arch can support:</p>
</span><span id="line-7"> <span class="nb">type</span><span class="p">:</span> <span class="nb">str</span>
</span><span id="line-8"> <span class="n">required</span><span class="p">:</span> <span class="n">true</span>
</span><span id="line-9"> <span class="o">-</span> <span class="n">name</span><span class="p">:</span> <span class="n">unit</span>
</span><span id="line-10"> <span class="n">description</span><span class="p">:</span> <span class="n">The</span> <span class="n">unit</span> <span class="n">of</span> <span class="n">temperature</span> <span class="n">to</span> <span class="k">return</span>
</span><span id="line-10"> <span class="n">description</span><span class="p">:</span> <span class="n">The</span> <span class="n">unit</span> <span class="n">of</span> <span class="n">temperature</span>
</span><span id="line-11"> <span class="nb">type</span><span class="p">:</span> <span class="nb">str</span>
</span><span id="line-12"> <span class="n">enum</span><span class="p">:</span> <span class="p">[</span><span class="s2">"celsius"</span><span class="p">,</span> <span class="s2">"fahrenheit"</span><span class="p">]</span>
</span><span id="line-13"> <span class="n">endpoint</span><span class="p">:</span>
</span><span id="line-14"> <span class="n">name</span><span class="p">:</span> <span class="n">api_server</span>
</span><span id="line-15"> <span class="n">path</span><span class="p">:</span> <span class="o">/</span><span class="n">weather</span>
</span><span id="line-12"> <span class="n">default</span><span class="p">:</span> <span class="n">fahrenheit</span>
</span><span id="line-13"> <span class="n">enum</span><span class="p">:</span> <span class="p">[</span><span class="n">celsius</span><span class="p">,</span> <span class="n">fahrenheit</span><span class="p">]</span>
</span><span id="line-14"> <span class="n">endpoint</span><span class="p">:</span>
</span><span id="line-15"> <span class="n">name</span><span class="p">:</span> <span class="n">api_server</span>
</span><span id="line-16"> <span class="n">path</span><span class="p">:</span> <span class="o">/</span><span class="n">weather</span>
</span></code></pre></div>
</div>
</section>

View file

@ -1,24 +1,25 @@
<!DOCTYPE html>
<html :class="{'dark': darkMode === 'dark' || (darkMode === 'system' &amp;&amp; window.matchMedia('(prefers-color-scheme: dark)').matches)}" class="scroll-smooth" data-content_root="../" lang="en" x-data="{ darkMode: localStorage.getItem('darkMode') || localStorage.setItem('darkMode', 'system'), activeSection: '' }" x-init="$watch('darkMode', val =&gt; localStorage.setItem('darkMode', val))">
<html :class="{'dark': darkMode === 'dark' || (darkMode === 'system' &amp;&amp; window.matchMedia('(prefers-color-scheme: dark)').matches)}" class="scroll-smooth" data-content_root="../../" lang="en" x-data="{ darkMode: localStorage.getItem('darkMode') || localStorage.setItem('darkMode', 'system'), activeSection: '' }" x-init="$watch('darkMode', val =&gt; localStorage.setItem('darkMode', val))">
<head>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<meta charset="utf-8"/>
<meta content="white" media="(prefers-color-scheme: light)" name="theme-color"/>
<meta content="black" media="(prefers-color-scheme: dark)" name="theme-color"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<title>Error Targets | Arch Docs v0.1</title>
<meta content="Error Targets | Arch Docs v0.1" property="og:title"/>
<meta content="Error Targets | Arch Docs v0.1" name="twitter:title"/>
<link href="../_static/pygments.css?v=75ebff74" rel="stylesheet" type="text/css"/>
<link href="../_static/theme.css?v=edd7d3d2" rel="stylesheet" type="text/css"/>
<link href="../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
<link href="../_static/sphinx-design.min.css?v=95c83b7e" rel="stylesheet" type="text/css"/>
<link href="../_static/awesome-sphinx-design.css?v=a54cf077" rel="stylesheet" type="text/css"/>
<link href="./docs/resources/error_target.html" rel="canonical"/>
<link href="../_static/favicon.ico" rel="icon"/>
<link href="../search.html" rel="search" title="Search"/>
<link href="configuration_reference.html" rel="prev" title="Configuration Reference"/>
<title>Error Target | Arch Docs v0.1</title>
<meta content="Error Target | Arch Docs v0.1" property="og:title"/>
<meta content="Error Target | Arch Docs v0.1" name="twitter:title"/>
<link href="../../_static/pygments.css?v=75ebff74" rel="stylesheet" type="text/css"/>
<link href="../../_static/theme.css?v=edd7d3d2" rel="stylesheet" type="text/css"/>
<link href="../../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
<link href="../../_static/sphinx-design.min.css?v=95c83b7e" rel="stylesheet" type="text/css"/>
<link href="../../_static/awesome-sphinx-design.css?v=a54cf077" rel="stylesheet" type="text/css"/>
<link href="./docs/concepts/tech_overview/error_target.html" rel="canonical"/>
<link href="../../_static/favicon.ico" rel="icon"/>
<link href="../../search.html" rel="search" title="Search"/>
<link href="../llm_provider.html" rel="next" title="LLM Provider"/>
<link href="request_lifecycle.html" rel="prev" title="Request Lifecycle"/>
<script>
<!-- Prevent Flash of wrong theme -->
const userPreference = localStorage.getItem('darkMode');
@ -37,8 +38,8 @@
Skip to content
</a><header class="sticky top-0 z-40 w-full border-b shadow-sm border-border supports-backdrop-blur:bg-background/60 bg-background/95 backdrop-blur"><div class="container flex items-center h-14">
<div class="hidden mr-4 md:flex">
<a class="flex items-center mr-6" href="../index.html">
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Arch Docs v0.1</span>
<a class="flex items-center mr-6" href="../../index.html">
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Arch Docs v0.1</span>
</a></div><button @click="showSidebar = true" class="inline-flex items-center justify-center h-10 px-0 py-2 mr-2 text-base font-medium transition-colors rounded-md hover:text-accent-foreground hover:bg-transparent md:hidden" type="button">
<svg aria-hidden="true" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
<path d="M152.587 825.087q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440Zm0-203.587q-19.152 0-32.326-13.174T107.087 576q0-19.152 13.174-32.326t32.326-13.174h320q19.152 0 32.326 13.174T518.087 576q0 19.152-13.174 32.326T472.587 621.5h-320Zm0-203.587q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440ZM708.913 576l112.174 112.174q12.674 12.674 12.674 31.826t-12.674 31.826Q808.413 764.5 789.261 764.5t-31.826-12.674l-144-144Q600 594.391 600 576t13.435-31.826l144-144q12.674-12.674 31.826-12.674t31.826 12.674q12.674 12.674 12.674 31.826t-12.674 31.826L708.913 576Z"></path>
@ -46,7 +47,7 @@
<span class="sr-only">Toggle navigation menu</span>
</button>
<div class="flex items-center justify-between flex-1 space-x-2 sm:space-x-4 md:justify-end">
<div class="flex-1 w-full md:w-auto md:flex-none"><form @keydown.k.window.meta="$refs.search.focus()" action="../search.html" class="relative flex items-center group" id="searchbox" method="get">
<div class="flex-1 w-full md:w-auto md:flex-none"><form @keydown.k.window.meta="$refs.search.focus()" action="../../search.html" class="relative flex items-center group" id="searchbox" method="get">
<input aria-label="Search the docs" class="inline-flex items-center font-medium transition-colors bg-transparent focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 ring-offset-background border border-input hover:bg-accent focus:bg-accent hover:text-accent-foreground focus:text-accent-foreground hover:placeholder-accent-foreground py-2 px-4 relative h-9 w-full justify-start rounded-[0.5rem] text-sm text-muted-foreground sm:pr-12 md:w-40 lg:w-64" id="search-input" name="q" placeholder="Search ..." type="search" x-ref="search"/>
<kbd class="pointer-events-none absolute right-1.5 top-2 hidden h-5 select-none text-muted-foreground items-center gap-1 rounded border border-border bg-muted px-1.5 font-mono text-[10px] font-medium opacity-100 sm:flex group-hover:bg-accent group-hover:text-accent-foreground">
<span class="text-xs"></span>
@ -73,8 +74,8 @@
</div>
</header>
<div class="flex-1"><div class="container flex-1 items-start md:grid md:grid-cols-[220px_minmax(0,1fr)] md:gap-6 lg:grid-cols-[240px_minmax(0,1fr)] lg:gap-10"><aside :aria-hidden="!showSidebar" :class="{ 'translate-x-0': showSidebar }" class="fixed inset-y-0 left-0 md:top-14 z-50 md:z-30 bg-background md:bg-transparent transition-all duration-100 -translate-x-full md:translate-x-0 ml-0 p-6 md:p-0 md:-ml-2 md:h-[calc(100vh-3.5rem)] w-5/6 md:w-full shrink-0 overflow-y-auto border-r border-border md:sticky" id="left-sidebar">
<a class="!justify-start text-sm md:!hidden bg-background" href="../index.html">
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Arch Docs v0.1</span>
<a class="!justify-start text-sm md:!hidden bg-background" href="../../index.html">
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Arch Docs v0.1</span>
</a>
<div class="relative overflow-hidden md:overflow-auto my-4 md:my-0 h-[calc(100vh-8rem)] md:h-auto">
<div class="overflow-y-auto h-full w-full relative pr-6">
@ -90,44 +91,44 @@
<nav class="table w-full min-w-full my-6 lg:my-8">
<p class="caption" role="heading"><span class="caption-text">Get Started</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../get_started/overview.html">Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../get_started/intro_to_arch.html">Intro to Arch</a></li>
<li class="toctree-l1"><a class="reference internal" href="../get_started/quickstart.html">Quickstart</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../get_started/overview.html">Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../get_started/intro_to_arch.html">Intro to Arch</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../get_started/quickstart.html">Quickstart</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Concepts</span></p>
<ul>
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/tech_overview/tech_overview.html">Tech Overview<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<ul class="current">
<li class="toctree-l1 current" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="tech_overview.html">Tech Overview<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul class="current" x-show="expanded">
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../guides/prompt_guard.html">Prompt Guard</a></li>
<li class="toctree-l1"><a class="reference internal" href="../guides/function_calling.html">Function Calling</a></li>
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../guides/observability/observability.html">Observability<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
<li class="toctree-l2"><a class="reference internal" href="../guides/observability/tracing.html">Tracing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../guides/observability/monitoring.html">Monitoring</a></li>
<li class="toctree-l2"><a class="reference internal" href="../guides/observability/access_logging.html">Access Logging</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../guides/prompt_guard.html">Prompt Guard</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../guides/function_calling.html">Function Calling</a></li>
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../../guides/observability/observability.html">Observability<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/tracing.html">Tracing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/monitoring.html">Monitoring</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/access_logging.html">Access Logging</a></li>
</ul>
</li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Build with Arch</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../build_with_arch/agent.html">Agentic Workflow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../build_with_arch/rag.html">RAG Application</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/agent.html">Agentic Workflow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/rag.html">RAG Application</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Error Targets</a></li>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
</ul>
</nav>
</div>
@ -141,52 +142,53 @@
<main class="relative py-6 lg:gap-10 lg:py-8 xl:grid xl:grid-cols-[1fr_300px]">
<div class="w-full min-w-0 mx-auto">
<nav aria-label="breadcrumbs" class="flex items-center mb-4 space-x-1 text-sm text-muted-foreground">
<a class="overflow-hidden text-ellipsis whitespace-nowrap hover:text-foreground" href="../index.html">
<a class="overflow-hidden text-ellipsis whitespace-nowrap hover:text-foreground" href="../../index.html">
<span class="hidden md:inline">Arch Docs v0.1</span>
<svg aria-label="Home" class="md:hidden" fill="currentColor" height="18" stroke="none" viewbox="0 96 960 960" width="18" xmlns="http://www.w3.org/2000/svg">
<path d="M240 856h120V616h240v240h120V496L480 316 240 496v360Zm-80 80V456l320-240 320 240v480H520V696h-80v240H160Zm320-350Z"></path>
</svg>
</a>
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">Error Targets</span>
<div class="mr-1">/</div><a class="hover:text-foreground overflow-hidden text-ellipsis whitespace-nowrap" href="tech_overview.html">Tech Overview</a>
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">Error Target</span>
</nav>
<div id="content" role="main">
<section id="error-targets">
<span id="error-target"></span><h1>Error Targets<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#error-targets"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
<section id="error-target">
<span id="id1"></span><h1>Error Target<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#error-target"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
<p><strong>Error targets</strong> are designed to capture and manage specific issues or exceptions that occur during Archs function or systems execution.</p>
<p>These endpoints receive errors forwarded from Arch when issues arise, such as improper function/API calls, guardrail violations, or other processing errors.
The errors are communicated to the application via headers like <code class="docutils literal notranslate"><span class="pre">X-Arch-[ERROR-TYPE]</span></code>, enabling you to respond appropriately and handle errors gracefully.</p>
<section id="key-concepts">
<h2>Key Concepts<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#key-concepts" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#key-concepts'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<p><strong>Error Type</strong>: Categorizes the nature of the error, such as “ValidationError” or “RuntimeError.” These error types help in identifying what
kind of issue occurred and provide context for troubleshooting.</p>
<p><strong>Error Message</strong>: A clear, human-readable message describing the error. This should provide enough detail to inform users or developers of
the root cause or required action.</p>
<dl class="simple">
<dt><strong>Target Prompt</strong>: The specific prompt or operation where the error occurred. Understanding where the error happened helps with debugging</dt><dd><p>and pinpointing the source of the problem.</p>
</dd>
</dl>
<p><strong>Parameter-Specific Errors</strong>: Errors that arise due to invalid or missing parameters when invoking a function. These errors are critical
for ensuring the correctness of inputs.</p>
<ul class="simple">
<li><p><strong>Error Type</strong>: Categorizes the nature of the error, such as “ValidationError” or “RuntimeError.” These error types help in identifying what kind of issue occurred and provide context for troubleshooting.</p></li>
<li><p><strong>Error Message</strong>: A clear, human-readable message describing the error. This should provide enough detail to inform users or developers of the root cause or required action.</p></li>
<li><p><strong>Target Prompt</strong>: The specific prompt or operation where the error occurred. Understanding where the error happened helps with debugging and pinpointing the source of the problem.</p></li>
<li><p><strong>Parameter-Specific Errors</strong>: Errors that arise due to invalid or missing parameters when invoking a function. These errors are critical for ensuring the correctness of inputs.</p></li>
</ul>
</section>
<section id="error-header-example">
<h2>Error Header Example<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#error-header-example" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#error-header-example'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<div class="highlight-http notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kr">HTTP</span><span class="o">/</span><span class="m">1.1</span> <span class="m">400</span> <span class="ne">Bad Request</span>
</span><span id="line-2"><span class="na">X-Arch-Error-Type</span><span class="o">:</span> <span class="l">FunctionValidationError</span>
</span><span id="line-3"><span class="na">X-Arch-Error-Message</span><span class="o">:</span> <span class="l">Tools call parsing failure</span>
</span><span id="line-4"><span class="na">X-Arch-Target-Prompt</span><span class="o">:</span> <span class="l">createUser</span>
</span><span id="line-5"><span class="na">Content-Type</span><span class="o">:</span> <span class="l">application/json</span>
<div class="literal-block-wrapper docutils container" id="id2">
<div class="code-block-caption"><span class="caption-text">Error Header Example</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="w"> </span>HTTP/1.1<span class="w"> </span><span class="m">400</span><span class="w"> </span>Bad<span class="w"> </span>Request
</span><span id="line-2"><span class="w"> </span>X-Arch-Error-Type:<span class="w"> </span>FunctionValidationError
</span><span id="line-3"><span class="w"> </span>X-Arch-Error-Message:<span class="w"> </span>Tools<span class="w"> </span>call<span class="w"> </span>parsing<span class="w"> </span>failure
</span><span id="line-4"><span class="w"> </span>X-Arch-Target-Prompt:<span class="w"> </span>createUser
</span><span id="line-5"><span class="w"> </span>Content-Type:<span class="w"> </span>application/json
</span><span id="line-6">
</span><span id="line-7"><span class="nt">"messages"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
</span><span id="line-8"><span class="p">{</span>
</span><span id="line-9"><span class="w"> </span><span class="nt">"role"</span><span class="p">:</span><span class="w"> </span><span class="s2">"user"</span><span class="p">,</span>
</span><span id="line-10"><span class="w"> </span><span class="nt">"content"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Please create a user with the following ID: 1234"</span>
</span><span id="line-11"><span class="p">},</span>
</span><span id="line-12"><span class="p">{</span>
</span><span id="line-13"><span class="w"> </span><span class="nt">"role"</span><span class="p">:</span><span class="w"> </span><span class="s2">"system"</span><span class="p">,</span>
</span><span id="line-14"><span class="w"> </span><span class="nt">"content"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Expected a string for 'user_id', but got an integer."</span>
</span><span id="line-15"><span class="p">}]</span>
</span><span id="line-7"><span class="w"> </span><span class="s2">"messages"</span>:<span class="w"> </span><span class="o">[</span>
</span><span id="line-8"><span class="w"> </span><span class="o">{</span>
</span><span id="line-9"><span class="w"> </span><span class="s2">"role"</span>:<span class="w"> </span><span class="s2">"user"</span>,
</span><span id="line-10"><span class="w"> </span><span class="s2">"content"</span>:<span class="w"> </span><span class="s2">"Please create a user with the following ID: 1234"</span>
</span><span id="line-11"><span class="w"> </span><span class="o">}</span>,
</span><span id="line-12"><span class="w"> </span><span class="o">{</span>
</span><span id="line-13"><span class="w"> </span><span class="s2">"role"</span>:<span class="w"> </span><span class="s2">"system"</span>,
</span><span id="line-14"><span class="w"> </span><span class="s2">"content"</span>:<span class="w"> </span><span class="s2">"Expected a string for 'user_id', but got an integer."</span>
</span><span id="line-15"><span class="w"> </span><span class="o">}</span>
</span><span id="line-16"><span class="w"> </span><span class="o">]</span>
</span></code></pre></div>
</div>
</div>
</section>
<section id="best-practices-and-tips">
<h2>Best Practices and Tips<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#best-practices-and-tips" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#best-practices-and-tips'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
@ -199,13 +201,21 @@ for ensuring the correctness of inputs.</p>
</section>
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
<div class="mr-auto">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="configuration_reference.html">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="request_lifecycle.html">
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<polyline points="15 18 9 12 15 6"></polyline>
</svg>
Configuration Reference
Request Lifecycle
</a>
</div>
<div class="ml-auto">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="../llm_provider.html">
LLM Provider
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
</a>
</div>
</div></div><aside class="hidden text-sm xl:block" id="right-sidebar">
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
<ul>
@ -225,10 +235,10 @@ for ensuring the correctness of inputs.</p>
</div>
</footer>
</div>
<script src="../_static/documentation_options.js?v=a0703c7e"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script defer="defer" src="../_static/theme.js?v=1808ab49"></script>
<script src="../_static/design-tabs.js?v=f930bc37"></script>
<script src="../../_static/documentation_options.js?v=a0703c7e"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script defer="defer" src="../../_static/theme.js?v=1808ab49"></script>
<script src="../../_static/design-tabs.js?v=f930bc37"></script>
</body>
</html>

View file

@ -18,7 +18,7 @@
<link href="./docs/concepts/tech_overview/listener.html" rel="canonical"/>
<link href="../../_static/favicon.ico" rel="icon"/>
<link href="../../search.html" rel="search" title="Search"/>
<link href="model_serving.html" rel="next" title="Model Serving"/>
<link href="prompt.html" rel="next" title="Prompts"/>
<link href="threading_model.html" rel="prev" title="Threading Model"/>
<script>
<!-- Prevent Flash of wrong theme -->
@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -154,7 +154,7 @@
<div id="content" role="main">
<section id="listener">
<span id="arch-overview-listeners"></span><h1>Listener<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#listener"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
<p>Listener is a top level primitive in Arch, which simplifies the configuration required to bind incoming
<p><strong>Listener</strong> is a top level primitive in Arch, which simplifies the configuration required to bind incoming
connections from downstream clients, and for egress connections to LLMs (hosted or API)</p>
<p>Arch builds on Envoys Listener subsystem to streamline connection managemet for developers. Arch minimizes
the complexity of Envoys listener setup by using best-practices and exposing only essential settings,
@ -165,33 +165,33 @@ simplification ensures that connections are secure, reliable, and optimized for
<p>Developers can configure Arch to accept connections from downstream clients. A downstream listener acts as the
primary entry point for incoming traffic, handling initial connection setup, including network filtering, gurdrails,
and additional network security checks. For more details on prompt security and safety,
see <a class="reference internal" href="prompt.html#arch-overview-prompt-handling"><span class="std std-ref">here</span></a></p>
see <a class="reference internal" href="prompt.html#arch-overview-prompt-handling"><span class="std std-ref">here</span></a>.</p>
</section>
<section id="upstream-egress">
<h2>Upstream (Egress)<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#upstream-egress" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#upstream-egress'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<p>Arch automatically configures a listener to route requests from your application to upstream LLM API providers (or hosts).
When you start Arch, it creates a listener for egress traffic based on the presence of the <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> configuration
section in the <code class="docutils literal notranslate"><span class="pre">prompt_config.yml</span></code> file. Arch binds itself to a local address such as <code class="docutils literal notranslate"><span class="pre">127.0.0.1:9000/v1</span></code> or a DNS-based
address like <code class="docutils literal notranslate"><span class="pre">arch.local:9000/v1</span></code> for outgoing traffic. For more details on LLM providers, read <a class="reference internal" href="../llm_provider.html#llm-provider"><span class="std std-ref">here</span></a></p>
When you start Arch, it creates a listener for egress traffic based on the presence of the <code class="docutils literal notranslate"><span class="pre">listener</span></code> configuration
section in the configuration file. Arch binds itself to a local address such as <code class="docutils literal notranslate"><span class="pre">127.0.0.1:9000/v1</span></code> or a DNS-based
address like <code class="docutils literal notranslate"><span class="pre">arch.local:9000/v1</span></code> for outgoing traffic. For more details on LLM providers, read <a class="reference internal" href="../llm_provider.html#llm-provider"><span class="std std-ref">here</span></a>.</p>
</section>
<section id="configure-listener">
<h2>Configure Listener<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#configure-listener" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#configure-listener'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<p>To configure a Downstream (Ingress) Listner, simply add the <code class="docutils literal notranslate"><span class="pre">listener</span></code> directive to your <code class="docutils literal notranslate"><span class="pre">prompt_config.yml</span></code> file:</p>
<p>To configure a Downstream (Ingress) Listner, simply add the <code class="docutils literal notranslate"><span class="pre">listener</span></code> directive to your configuration file:</p>
<div class="literal-block-wrapper docutils container" id="id1">
<div class="code-block-caption"><span class="caption-text">Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
</span><span id="line-2"><mark><span class="linenos"> 2</span>
</mark></span><span id="line-3"><mark><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
</span><span id="line-2"><span class="linenos"> 2</span>
</span><span id="line-3"><mark><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
</mark></span><span id="line-4"><mark><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
</mark></span><span id="line-5"><mark><span class="linenos"> 5</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
</mark></span><span id="line-6"><span class="linenos"> 6</span><span class="w"> </span><span class="c1"># Defines how Arch should parse the content from application/json or text/pain Content-type in the http request</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">huggingface</span>
</span><span id="line-8"><span class="linenos"> 8</span>
</mark></span><span id="line-6"><mark><span class="linenos"> 6</span><span class="w"> </span><span class="c1"># Defines how Arch should parse the content from application/json or text/pain Content-type in the http request</span>
</mark></span><span id="line-7"><mark><span class="linenos"> 7</span><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">huggingface</span>
</mark></span><span id="line-8"><span class="linenos"> 8</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
</span><span id="line-10"><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-16"><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
@ -212,8 +212,8 @@ address like <code class="docutils literal notranslate"><span class="pre">arch.l
</a>
</div>
<div class="ml-auto">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="model_serving.html">
Model Serving
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="prompt.html">
Prompts
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>

View file

@ -18,8 +18,8 @@
<link href="./docs/concepts/tech_overview/model_serving.html" rel="canonical"/>
<link href="../../_static/favicon.ico" rel="icon"/>
<link href="../../search.html" rel="search" title="Search"/>
<link href="prompt.html" rel="next" title="Prompt"/>
<link href="listener.html" rel="prev" title="Listener"/>
<link href="request_lifecycle.html" rel="next" title="Request Lifecycle"/>
<link href="prompt.html" rel="prev" title="Prompts"/>
<script>
<!-- Prevent Flash of wrong theme -->
const userPreference = localStorage.getItem('darkMode');
@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -153,15 +153,14 @@
</nav>
<div id="content" role="main">
<section id="model-serving">
<span id="arch-model-serving"></span><h1>Model Serving<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#model-serving"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
<p>Arch is a set of <strong>two</strong> self-contained processes that are designed to run alongside your application
<span id="id1"></span><h1>Model Serving<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#model-serving"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
<p>Arch is a set of <cite>two</cite> self-contained processes that are designed to run alongside your application
servers (or on a separate host connected via a network). The first process is designated to manage low-level
networking and HTTP related comcerns, and the other process is for <strong>model serving</strong>, which helps Arch make
networking and HTTP related comcerns, and the other process is for model serving, which helps Arch make
intelligent decisions about the incoming prompts. The model server is designed to call the purpose-built
LLMs in Arch.</p>
<a class="reference internal image-reference" href="../../_images/arch-system-architecture.jpg"><img alt="../../_images/arch-system-architecture.jpg" class="align-center" src="../../_images/arch-system-architecture.jpg" style="width: 50%;"/>
<a class="reference internal image-reference" href="../../_images/arch-system-architecture.jpg"><img alt="../../_images/arch-system-architecture.jpg" class="align-center" src="../../_images/arch-system-architecture.jpg" style="width: 40%;"/>
</a>
<hr class="docutils"/>
<p>Arch is designed to be deployed in your cloud VPC, on a on-premises host, and can work on devices that dont
have a GPU. Note, GPU devices are need for fast and cost-efficient use, so that Arch (model server, specifically)
can process prompts quickly and forward control back to the applicaton host. There are three modes in which Arch
@ -176,11 +175,11 @@ might not be available.</p>
</div>
</section>
<section id="local-serving-gpu-fast">
<h2>Local Serving (GPU- Fast)<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#local-serving-gpu-fast" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#local-serving-gpu-fast'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<h2>Local Serving (GPU - Fast)<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#local-serving-gpu-fast" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#local-serving-gpu-fast'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<p>The following bash commands enable you to configure the model server subsystem in Arch to run locally on the
machine and utilize the GPU available for fast inference across all model use cases, including function calling
guardails, etc.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="gp">$ </span>archgw<span class="w"> </span>up<span class="w"> </span>--local
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="gp">$ </span>archgw<span class="w"> </span>up<span class="w"> </span>--local-gpu
</span></code></pre></div>
</div>
</section>
@ -202,16 +201,16 @@ how to generate API keys for model serving</p>
</section>
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
<div class="mr-auto">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="listener.html">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="prompt.html">
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<polyline points="15 18 9 12 15 6"></polyline>
</svg>
Listener
Prompts
</a>
</div>
<div class="ml-auto">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="prompt.html">
Prompt
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="request_lifecycle.html">
Request Lifecycle
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
@ -221,7 +220,7 @@ how to generate API keys for model serving</p>
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
<ul>
<li><a :data-current="activeSection === '#local-serving-cpu-moderate'" class="reference internal" href="#local-serving-cpu-moderate">Local Serving (CPU - Moderate)</a></li>
<li><a :data-current="activeSection === '#local-serving-gpu-fast'" class="reference internal" href="#local-serving-gpu-fast">Local Serving (GPU- Fast)</a></li>
<li><a :data-current="activeSection === '#local-serving-gpu-fast'" class="reference internal" href="#local-serving-gpu-fast">Local Serving (GPU - Fast)</a></li>
<li><a :data-current="activeSection === '#cloud-serving-gpu-blazing-fast'" class="reference internal" href="#cloud-serving-gpu-blazing-fast">Cloud Serving (GPU - Blazing Fast)</a></li>
</ul>
</div>

View file

@ -7,9 +7,9 @@
<meta content="white" media="(prefers-color-scheme: light)" name="theme-color"/>
<meta content="black" media="(prefers-color-scheme: dark)" name="theme-color"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<title>Prompt | Arch Docs v0.1</title>
<meta content="Prompt | Arch Docs v0.1" property="og:title"/>
<meta content="Prompt | Arch Docs v0.1" name="twitter:title"/>
<title>Prompts | Arch Docs v0.1</title>
<meta content="Prompts | Arch Docs v0.1" property="og:title"/>
<meta content="Prompts | Arch Docs v0.1" name="twitter:title"/>
<link href="../../_static/pygments.css?v=75ebff74" rel="stylesheet" type="text/css"/>
<link href="../../_static/theme.css?v=edd7d3d2" rel="stylesheet" type="text/css"/>
<link href="../../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
@ -18,8 +18,8 @@
<link href="./docs/concepts/tech_overview/prompt.html" rel="canonical"/>
<link href="../../_static/favicon.ico" rel="icon"/>
<link href="../../search.html" rel="search" title="Search"/>
<link href="request_lifecycle.html" rel="next" title="Request Lifecycle"/>
<link href="model_serving.html" rel="prev" title="Model Serving"/>
<link href="model_serving.html" rel="next" title="Model Serving"/>
<link href="listener.html" rel="prev" title="Listener"/>
<script>
<!-- Prevent Flash of wrong theme -->
const userPreference = localStorage.getItem('darkMode');
@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -149,20 +149,19 @@
</svg>
</a>
<div class="mr-1">/</div><a class="hover:text-foreground overflow-hidden text-ellipsis whitespace-nowrap" href="tech_overview.html">Tech Overview</a>
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">Prompt</span>
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">Prompts</span>
</nav>
<div id="content" role="main">
<section id="prompt">
<span id="arch-overview-prompt-handling"></span><h1>Prompt<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#prompt"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
<section id="prompts">
<span id="arch-overview-prompt-handling"></span><h1>Prompts<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#prompts"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
<p>Archs primary design point is to securely accept, process and handle prompts. To do that effectively,
Arch relies on Envoys HTTP <a class="reference external" href="https://www.envoyproxy.io/docs/envoy/v1.31.2/intro/arch_overview/http/http_connection_management" rel="nofollow noopener">connection management<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>,
subsystem and its <strong>prompt handler</strong> subsystem engineered with purpose-built LLMs to
implement critical functionality on behalf of developers so that you can stay focused on business logic.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Archs <strong>prompt handler</strong> subsystem interacts with the <strong>model</strong> subsytem through Envoys cluster manager
system to ensure robust, resilient and fault-tolerant experience in managing incoming prompts. Read more
about the <a class="reference internal" href="model_serving.html#arch-model-serving"><span class="std std-ref">model subsystem</span></a> and how the LLMs are hosted in Arch.</p>
<p>Archs <strong>prompt handler</strong> subsystem interacts with the <strong>model subsytem</strong> through Envoys cluster manager system to ensure robust, resilient and fault-tolerant experience in managing incoming prompts.</p>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p>Read more about the <a class="reference internal" href="model_serving.html#model-serving"><span class="std std-ref">model subsystem</span></a> and how the LLMs are hosted in Arch.</p>
</div>
<section id="messages">
<h2>Messages<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#messages" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#messages'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
@ -176,15 +175,15 @@ containing two key-value pairs:</p>
</ul>
</div></blockquote>
</section>
<section id="prompt-guardrails">
<h2>Prompt Guardrails<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#prompt-guardrails" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#prompt-guardrails'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<section id="prompt-guard">
<h2>Prompt Guard<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#prompt-guard" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#prompt-guard'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<p>Arch is engineered with <a class="reference internal" href="../../guides/prompt_guard.html#prompt-guard"><span class="std std-ref">Arch-Guard</span></a>, an industry leading safety layer, powered by a
compact and high-performimg LLM that monitors incoming prompts to detect and reject jailbreak attempts -
ensuring that unauthorized or harmful behaviors are intercepted early in the process.</p>
<p>To add jailbreak guardrails, see example below:</p>
<div class="literal-block-wrapper docutils container" id="id1">
<div class="code-block-caption"><span class="caption-text">Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
</span><span id="line-2"><span class="linenos"> 2</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
@ -194,46 +193,27 @@ ensuring that unauthorized or harmful behaviors are intercepted early in the pro
</span><span id="line-8"><span class="linenos"> 8</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
</span><span id="line-10"><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-16"><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-17"><span class="linenos">17</span>
</span><span id="line-18"><span class="linenos">18</span><span class="c1"># default system prompt used by all prompt targets</span>
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
</span><span id="line-20"><span class="linenos">20</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span><span id="line-21"><span class="linenos">21</span>
</span><span id="line-22"><mark><span class="linenos">22</span><span class="nt">prompt_guards</span><span class="p">:</span>
</mark></span><span id="line-23"><mark><span class="linenos">23</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
</mark></span><span id="line-24"><mark><span class="linenos">24</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
</mark></span><span id="line-25"><mark><span class="linenos">25</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
</mark></span><span id="line-26"><mark><span class="linenos">26</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="s">"Looks</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">you're</span><span class="nv"> </span><span class="s">curious</span><span class="nv"> </span><span class="s">about</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">abilities,</span><span class="nv"> </span><span class="s">but</span><span class="nv"> </span><span class="s">I</span><span class="nv"> </span><span class="s">can</span><span class="nv"> </span><span class="s">only</span><span class="nv"> </span><span class="s">provide</span><span class="nv"> </span><span class="s">assistance</span><span class="nv"> </span><span class="s">within</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">programmed</span><span class="nv"> </span><span class="s">parameters."</span>
</mark></span><span id="line-27"><span class="linenos">27</span>
</span><span id="line-28"><span class="linenos">28</span><span class="nt">prompt_targets</span><span class="p">:</span>
</span><span id="line-29"><span class="linenos">29</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"reboot_network_device"</span>
</span><span id="line-30"><span class="linenos">30</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Helps</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">operators</span><span class="nv"> </span><span class="s">perform</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">operations</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">rebooting</span><span class="nv"> </span><span class="s">a</span><span class="nv"> </span><span class="s">device."</span>
</span><span id="line-31"><span class="linenos">31</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-32"><span class="linenos">32</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
</span><span id="line-33"><span class="linenos">33</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/action"</span>
</span><span id="line-34"><span class="linenos">34</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</span><span id="line-35"><span class="linenos">35</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_id"</span>
</span><span id="line-36"><span class="linenos">36</span><span class="w"> </span><span class="c1"># additional type options include: int | float | bool | string | list | dict</span>
</span><span id="line-37"><span class="linenos">37</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
</span><span id="line-38"><span class="linenos">38</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Identifier</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
</span><span id="line-39"><span class="linenos">39</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-40"><span class="linenos">40</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"confirmation"</span>
</span><span id="line-41"><span class="linenos">41</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
</span><span id="line-42"><span class="linenos">42</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Confirmation</span><span class="nv"> </span><span class="s">flag</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">proceed</span><span class="nv"> </span><span class="s">with</span><span class="nv"> </span><span class="s">reboot."</span>
</span><span id="line-43"><span class="linenos">43</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="s">"no"</span>
</span><span id="line-44"><span class="linenos">44</span><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">yes</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">no</span><span class="p p-Indicator">]</span>
</span></code></pre></div>
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span><span id="line-20"><span class="linenos">20</span>
</span><span id="line-21"><mark><span class="linenos">21</span><span class="nt">prompt_guards</span><span class="p">:</span>
</mark></span><span id="line-22"><mark><span class="linenos">22</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
</mark></span><span id="line-23"><mark><span class="linenos">23</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
</mark></span><span id="line-24"><mark><span class="linenos">24</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
</mark></span><span id="line-25"><mark><span class="linenos">25</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.</span>
</mark></span></code></pre></div>
</div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>As a roadmap item, Arch will expose the ability for developers to define custom guardrails via Arch-Guard-v2,
<p>As a roadmap item, Arch will expose the ability for developers to define custom guardrails via Arch-Guard,
and add support for additional safety checks defined by developers and hazardous categories like, violent crimes, privacy, hate,
etc. To offer feedback on our roadmap, please visit our <a class="reference external" href="https://github.com/orgs/katanemo/projects/1" rel="nofollow noopener">github page<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a></p>
</div>
@ -247,7 +227,7 @@ when a users intent has changed so that you can build faster, more accurate R
<p>Configuring <code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code> is simple. See example below:</p>
<div class="literal-block-wrapper docutils container" id="id2">
<div class="code-block-caption"><span class="caption-text">Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
</span><span id="line-2"><span class="linenos"> 2</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
@ -257,70 +237,71 @@ when a users intent has changed so that you can build faster, more accurate R
</span><span id="line-8"><span class="linenos"> 8</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
</span><span id="line-10"><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-16"><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-17"><span class="linenos">17</span>
</span><span id="line-18"><span class="linenos">18</span><span class="c1"># default system prompt used by all prompt targets</span>
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
</span><span id="line-20"><span class="linenos">20</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span><span id="line-21"><span class="linenos">21</span>
</span><span id="line-22"><span class="linenos">22</span><span class="nt">prompt_guards</span><span class="p">:</span>
</span><span id="line-23"><span class="linenos">23</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
</span><span id="line-24"><span class="linenos">24</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
</span><span id="line-25"><span class="linenos">25</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
</span><span id="line-26"><span class="linenos">26</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="s">"Looks</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">you're</span><span class="nv"> </span><span class="s">curious</span><span class="nv"> </span><span class="s">about</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">abilities,</span><span class="nv"> </span><span class="s">but</span><span class="nv"> </span><span class="s">I</span><span class="nv"> </span><span class="s">can</span><span class="nv"> </span><span class="s">only</span><span class="nv"> </span><span class="s">provide</span><span class="nv"> </span><span class="s">assistance</span><span class="nv"> </span><span class="s">within</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">programmed</span><span class="nv"> </span><span class="s">parameters."</span>
</span><span id="line-27"><span class="linenos">27</span>
</span><span id="line-28"><span class="linenos">28</span><span class="nt">prompt_targets</span><span class="p">:</span>
</span><span id="line-29"><mark><span class="linenos">29</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"reboot_network_device"</span>
</mark></span><span id="line-30"><mark><span class="linenos">30</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Helps</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">operators</span><span class="nv"> </span><span class="s">perform</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">operations</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">rebooting</span><span class="nv"> </span><span class="s">a</span><span class="nv"> </span><span class="s">device."</span>
</mark></span><span id="line-31"><mark><span class="linenos">31</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</mark></span><span id="line-32"><mark><span class="linenos">32</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
</mark></span><span id="line-33"><mark><span class="linenos">33</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/action"</span>
</mark></span><span id="line-34"><mark><span class="linenos">34</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</mark></span><span id="line-35"><mark><span class="linenos">35</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_id"</span>
</mark></span><span id="line-36"><mark><span class="linenos">36</span><span class="w"> </span><span class="c1"># additional type options include: int | float | bool | string | list | dict</span>
</mark></span><span id="line-37"><mark><span class="linenos">37</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
</mark></span><span id="line-38"><mark><span class="linenos">38</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Identifier</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
</mark></span><span id="line-39"><span class="linenos">39</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-40"><span class="linenos">40</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"confirmation"</span>
</span><span id="line-41"><span class="linenos">41</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
</span><span id="line-42"><span class="linenos">42</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Confirmation</span><span class="nv"> </span><span class="s">flag</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">proceed</span><span class="nv"> </span><span class="s">with</span><span class="nv"> </span><span class="s">reboot."</span>
</span><span id="line-43"><span class="linenos">43</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="s">"no"</span>
</span><span id="line-44"><span class="linenos">44</span><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">yes</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">no</span><span class="p p-Indicator">]</span>
</span><span id="line-45"><span class="linenos">45</span>
</span><span id="line-46"><span class="linenos">46</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"information_extraction"</span>
</span><span id="line-47"><span class="linenos">47</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-48"><span class="linenos">48</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"This</span><span class="nv"> </span><span class="s">prompt</span><span class="nv"> </span><span class="s">handles</span><span class="nv"> </span><span class="s">all</span><span class="nv"> </span><span class="s">scenarios</span><span class="nv"> </span><span class="s">that</span><span class="nv"> </span><span class="s">are</span><span class="nv"> </span><span class="s">question</span><span class="nv"> </span><span class="s">and</span><span class="nv"> </span><span class="s">answer</span><span class="nv"> </span><span class="s">in</span><span class="nv"> </span><span class="s">nature.</span><span class="nv"> </span><span class="s">Like</span><span class="nv"> </span><span class="s">summarization,</span><span class="nv"> </span><span class="s">information</span><span class="nv"> </span><span class="s">extraction,</span><span class="nv"> </span><span class="s">etc."</span>
</span><span id="line-49"><span class="linenos">49</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-50"><span class="linenos">50</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
</span><span id="line-51"><span class="linenos">51</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/summary"</span>
</span><span id="line-52"><span class="linenos">52</span><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
</span><span id="line-53"><span class="linenos">53</span><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-54"><span class="linenos">54</span><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
</span><span id="line-55"><span class="linenos">55</span><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
</span><span id="line-56"><span class="linenos">56</span><span class="w"> </span><span class="no">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
</span><span id="line-57"><span class="linenos">57</span>
</span><span id="line-58"><span class="linenos">58</span><span class="nt">error_target</span><span class="p">:</span>
</span><span id="line-59"><span class="linenos">59</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-60"><span class="linenos">60</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
</span><span id="line-61"><span class="linenos">61</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
</span><span id="line-62"><span class="linenos">62</span>
</span><span id="line-63"><span class="linenos">63</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
</span><span id="line-64"><span class="linenos">64</span><span class="nt">endpoints</span><span class="p">:</span>
</span><span id="line-65"><span class="linenos">65</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
</span><span id="line-66"><span class="linenos">66</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
</span><span id="line-67"><span class="linenos">67</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
</span><span id="line-68"><span class="linenos">68</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
</span><span id="line-69"><span class="linenos">69</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
</span><span id="line-70"><span class="linenos">70</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
</span><span id="line-71"><span class="linenos">71</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span><span id="line-20"><span class="linenos">20</span>
</span><span id="line-21"><span class="linenos">21</span><span class="nt">prompt_guards</span><span class="p">:</span>
</span><span id="line-22"><span class="linenos">22</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
</span><span id="line-23"><span class="linenos">23</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
</span><span id="line-24"><span class="linenos">24</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
</span><span id="line-25"><span class="linenos">25</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.</span>
</span><span id="line-26"><span class="linenos">26</span>
</span><span id="line-27"><span class="linenos">27</span><span class="nt">prompt_targets</span><span class="p">:</span>
</span><span id="line-28"><span class="linenos">28</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">information_extraction</span>
</span><span id="line-29"><span class="linenos">29</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-30"><span class="linenos">30</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.</span>
</span><span id="line-31"><span class="linenos">31</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-32"><span class="linenos">32</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
</span><span id="line-33"><span class="linenos">33</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/summary</span>
</span><span id="line-34"><span class="linenos">34</span><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
</span><span id="line-35"><span class="linenos">35</span><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-36"><span class="linenos">36</span><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
</span><span id="line-37"><span class="linenos">37</span><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
</span><span id="line-38"><span class="linenos">38</span>
</span><span id="line-39"><mark><span class="linenos">39</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_network_device</span>
</mark></span><span id="line-40"><mark><span class="linenos">40</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Reboot a specific network device</span>
</mark></span><span id="line-41"><mark><span class="linenos">41</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</mark></span><span id="line-42"><mark><span class="linenos">42</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
</mark></span><span id="line-43"><mark><span class="linenos">43</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/action</span>
</mark></span><span id="line-44"><mark><span class="linenos">44</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</mark></span><span id="line-45"><mark><span class="linenos">45</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_id</span>
</mark></span><span id="line-46"><mark><span class="linenos">46</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">str</span>
</mark></span><span id="line-47"><mark><span class="linenos">47</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Identifier of the network device to reboot.</span>
</mark></span><span id="line-48"><mark><span class="linenos">48</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</mark></span><span id="line-49"><mark><span class="linenos">49</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">confirmation</span>
</mark></span><span id="line-50"><mark><span class="linenos">50</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">bool</span>
</mark></span><span id="line-51"><mark><span class="linenos">51</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Confirmation flag to proceed with reboot.</span>
</mark></span><span id="line-52"><mark><span class="linenos">52</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</mark></span><span id="line-53"><mark><span class="linenos">53</span><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">true</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">false</span><span class="p p-Indicator">]</span>
</mark></span><span id="line-54"><span class="linenos">54</span>
</span><span id="line-55"><span class="linenos">55</span><span class="nt">error_target</span><span class="p">:</span>
</span><span id="line-56"><span class="linenos">56</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-57"><span class="linenos">57</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
</span><span id="line-58"><span class="linenos">58</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
</span><span id="line-59"><span class="linenos">59</span>
</span><span id="line-60"><span class="linenos">60</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
</span><span id="line-61"><span class="linenos">61</span><span class="nt">endpoints</span><span class="p">:</span>
</span><span id="line-62"><span class="linenos">62</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
</span><span id="line-63"><span class="linenos">63</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
</span><span id="line-64"><span class="linenos">64</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
</span><span id="line-65"><span class="linenos">65</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
</span><span id="line-66"><span class="linenos">66</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
</span><span id="line-67"><span class="linenos">67</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
</span><span id="line-68"><span class="linenos">68</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span></code></pre></div>
</div>
</div>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p>Check <a class="reference internal" href="../prompt_target.html#prompt-target"><span class="std std-ref">Prompt Target</span></a> for more details!</p>
</div>
<section id="intent-detection-and-prompt-matching">
<h3>Intent Detection and Prompt Matching:<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#intent-detection-and-prompt-matching" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#intent-detection-and-prompt-matching'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<p>Arch uses fast Natural Language Inference (NLI) and embedding approaches to first detect the intent of each
@ -382,28 +363,24 @@ traffic, apply rate limits, and utilize a large set of traffic management capabi
</span><span id="line-14"><span class="nb">print</span><span class="p">(</span><span class="s2">"OpenAI Response:"</span><span class="p">,</span> <span class="n">response</span><span class="o">.</span><span class="n">choices</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span>
</span></code></pre></div>
</div>
<p>In these examples:</p>
<blockquote>
<div><p>The OpenAI client is used to send traffic directly through the Arch egress proxy to the LLM of your choice, such as OpenAI.
The OpenAI client is configured to route traffic via Arch by setting the proxy to 127.0.0.1:51001, assuming Arch is
running locally and bound to that address and port.</p>
</div></blockquote>
<p>This setup allows you to take advantage of Archs advanced traffic management features while interacting with LLM APIs like OpenAI.</p>
<p>In these examples, the OpenAI client is used to send traffic directly through the Arch egress proxy to the LLM of your choice, such as OpenAI.
The OpenAI client is configured to route traffic via Arch by setting the proxy to <code class="docutils literal notranslate"><span class="pre">127.0.0.1:51001</span></code>, assuming Arch is running locally and bound to that address and port.
This setup allows you to take advantage of Archs advanced traffic management features while interacting with LLM APIs like OpenAI.</p>
</section>
</section>
</section>
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
<div class="mr-auto">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="model_serving.html">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="listener.html">
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<polyline points="15 18 9 12 15 6"></polyline>
</svg>
Model Serving
Listener
</a>
</div>
<div class="ml-auto">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="request_lifecycle.html">
Request Lifecycle
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="model_serving.html">
Model Serving
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
@ -413,7 +390,7 @@ running locally and bound to that address and port.</p>
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
<ul>
<li><a :data-current="activeSection === '#messages'" class="reference internal" href="#messages">Messages</a></li>
<li><a :data-current="activeSection === '#prompt-guardrails'" class="reference internal" href="#prompt-guardrails">Prompt Guardrails</a></li>
<li><a :data-current="activeSection === '#prompt-guard'" class="reference internal" href="#prompt-guard">Prompt Guard</a></li>
<li><a :data-current="activeSection === '#prompt-targets'" class="reference internal" href="#prompt-targets">Prompt Targets</a><ul>
<li><a :data-current="activeSection === '#intent-detection-and-prompt-matching'" class="reference internal" href="#intent-detection-and-prompt-matching">Intent Detection and Prompt Matching:</a></li>
<li><a :data-current="activeSection === '#agentic-apps-via-prompt-targets'" class="reference internal" href="#agentic-apps-via-prompt-targets">Agentic Apps via Prompt Targets</a></li>

View file

@ -18,8 +18,8 @@
<link href="./docs/concepts/tech_overview/request_lifecycle.html" rel="canonical"/>
<link href="../../_static/favicon.ico" rel="icon"/>
<link href="../../search.html" rel="search" title="Search"/>
<link href="../llm_provider.html" rel="next" title="LLM Provider"/>
<link href="prompt.html" rel="prev" title="Prompt"/>
<link href="error_target.html" rel="next" title="Error Target"/>
<link href="model_serving.html" rel="prev" title="Model Serving"/>
<script>
<!-- Prevent Flash of wrong theme -->
const userPreference = localStorage.getItem('darkMode');
@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompt</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -199,7 +199,7 @@ lifecycle. The downstream and upstream HTTP/2 codec lives here.</p></li>
forwarding prompts <code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code> and establishes the lifecycle of any <strong>upstream</strong> connection to a
hosted endpoint that implements domain-specific business logic for incoming promots. This is where knowledge
of targets and endpoint health, load balancing and connection pooling exists.</p></li>
<li><p><a class="reference internal" href="model_serving.html#arch-model-serving"><span class="std std-ref">Model serving subsystem</span></a> which helps Arch make intelligent decisions about the
<li><p><a class="reference internal" href="model_serving.html#model-serving"><span class="std std-ref">Model serving subsystem</span></a> which helps Arch make intelligent decisions about the
incoming prompts. The model server is designed to call the purpose-built LLMs in Arch.</p></li>
</ul>
<p>The three subsystems are bridged with either the HTTP router filter, and the cluster manager subsystems of Envoy.</p>
@ -214,7 +214,7 @@ enables scaling to very high core count CPUs.</p>
<section id="configuration">
<h2>Configuration<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#configuration" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#configuration'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<p>Today, only support a static bootstrap configuration file for simplicity today:</p>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
</span><span id="line-2">
</span><span id="line-3"><span class="nt">listener</span><span class="p">:</span>
</span><span id="line-4"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
@ -224,67 +224,64 @@ enables scaling to very high core count CPUs.</p>
</span><span id="line-8">
</span><span id="line-9"><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
</span><span id="line-10"><span class="nt">llm_providers</span><span class="p">:</span>
</span><span id="line-11"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
</span><span id="line-12"><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
</span><span id="line-11"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
</span><span id="line-12"><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
</span><span id="line-14"><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-15"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-16"><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-17">
</span><span id="line-18"><span class="c1"># default system prompt used by all prompt targets</span>
</span><span id="line-19"><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
</span><span id="line-20"><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span><span id="line-21">
</span><span id="line-22"><span class="nt">prompt_guards</span><span class="p">:</span>
</span><span id="line-23"><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
</span><span id="line-24"><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
</span><span id="line-25"><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
</span><span id="line-26"><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="s">"Looks</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">you're</span><span class="nv"> </span><span class="s">curious</span><span class="nv"> </span><span class="s">about</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">abilities,</span><span class="nv"> </span><span class="s">but</span><span class="nv"> </span><span class="s">I</span><span class="nv"> </span><span class="s">can</span><span class="nv"> </span><span class="s">only</span><span class="nv"> </span><span class="s">provide</span><span class="nv"> </span><span class="s">assistance</span><span class="nv"> </span><span class="s">within</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">programmed</span><span class="nv"> </span><span class="s">parameters."</span>
</span><span id="line-27">
</span><span id="line-28"><span class="nt">prompt_targets</span><span class="p">:</span>
</span><span id="line-29"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"reboot_network_device"</span>
</span><span id="line-30"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Helps</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">operators</span><span class="nv"> </span><span class="s">perform</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">operations</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">rebooting</span><span class="nv"> </span><span class="s">a</span><span class="nv"> </span><span class="s">device."</span>
</span><span id="line-19"><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span><span id="line-20">
</span><span id="line-21"><span class="nt">prompt_guards</span><span class="p">:</span>
</span><span id="line-22"><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
</span><span id="line-23"><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
</span><span id="line-24"><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
</span><span id="line-25"><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.</span>
</span><span id="line-26">
</span><span id="line-27"><span class="nt">prompt_targets</span><span class="p">:</span>
</span><span id="line-28"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">information_extraction</span>
</span><span id="line-29"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-30"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.</span>
</span><span id="line-31"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-32"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
</span><span id="line-33"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/action"</span>
</span><span id="line-34"><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</span><span id="line-35"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_id"</span>
</span><span id="line-36"><span class="w"> </span><span class="c1"># additional type options include: int | float | bool | string | list | dict</span>
</span><span id="line-37"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
</span><span id="line-38"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Identifier</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
</span><span id="line-39"><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-40"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"confirmation"</span>
</span><span id="line-41"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
</span><span id="line-42"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Confirmation</span><span class="nv"> </span><span class="s">flag</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">proceed</span><span class="nv"> </span><span class="s">with</span><span class="nv"> </span><span class="s">reboot."</span>
</span><span id="line-43"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="s">"no"</span>
</span><span id="line-44"><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">yes</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">no</span><span class="p p-Indicator">]</span>
</span><span id="line-45">
</span><span id="line-46"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"information_extraction"</span>
</span><span id="line-47"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-48"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"This</span><span class="nv"> </span><span class="s">prompt</span><span class="nv"> </span><span class="s">handles</span><span class="nv"> </span><span class="s">all</span><span class="nv"> </span><span class="s">scenarios</span><span class="nv"> </span><span class="s">that</span><span class="nv"> </span><span class="s">are</span><span class="nv"> </span><span class="s">question</span><span class="nv"> </span><span class="s">and</span><span class="nv"> </span><span class="s">answer</span><span class="nv"> </span><span class="s">in</span><span class="nv"> </span><span class="s">nature.</span><span class="nv"> </span><span class="s">Like</span><span class="nv"> </span><span class="s">summarization,</span><span class="nv"> </span><span class="s">information</span><span class="nv"> </span><span class="s">extraction,</span><span class="nv"> </span><span class="s">etc."</span>
</span><span id="line-49"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-50"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
</span><span id="line-51"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/summary"</span>
</span><span id="line-52"><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
</span><span id="line-53"><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-54"><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
</span><span id="line-55"><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
</span><span id="line-56"><span class="w"> </span><span class="no">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
</span><span id="line-57">
</span><span id="line-58"><span class="nt">error_target</span><span class="p">:</span>
</span><span id="line-59"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-60"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
</span><span id="line-61"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
</span><span id="line-62">
</span><span id="line-63"><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
</span><span id="line-64"><span class="nt">endpoints</span><span class="p">:</span>
</span><span id="line-65"><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
</span><span id="line-66"><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
</span><span id="line-67"><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
</span><span id="line-68"><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
</span><span id="line-69"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
</span><span id="line-70"><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
</span><span id="line-71"><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span><span id="line-33"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/summary</span>
</span><span id="line-34"><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
</span><span id="line-35"><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-36"><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
</span><span id="line-37"><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
</span><span id="line-38">
</span><span id="line-39"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_network_device</span>
</span><span id="line-40"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Reboot a specific network device</span>
</span><span id="line-41"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-42"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
</span><span id="line-43"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/action</span>
</span><span id="line-44"><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</span><span id="line-45"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_id</span>
</span><span id="line-46"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">str</span>
</span><span id="line-47"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Identifier of the network device to reboot.</span>
</span><span id="line-48"><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-49"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">confirmation</span>
</span><span id="line-50"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">bool</span>
</span><span id="line-51"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Confirmation flag to proceed with reboot.</span>
</span><span id="line-52"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</span><span id="line-53"><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">true</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">false</span><span class="p p-Indicator">]</span>
</span><span id="line-54">
</span><span id="line-55"><span class="nt">error_target</span><span class="p">:</span>
</span><span id="line-56"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-57"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
</span><span id="line-58"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
</span><span id="line-59">
</span><span id="line-60"><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
</span><span id="line-61"><span class="nt">endpoints</span><span class="p">:</span>
</span><span id="line-62"><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
</span><span id="line-63"><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
</span><span id="line-64"><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
</span><span id="line-65"><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
</span><span id="line-66"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
</span><span id="line-67"><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
</span><span id="line-68"><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span></code></pre></div>
</div>
</section>
@ -374,16 +371,16 @@ processing request headers and then finalized by the HCM during post-request pro
</section>
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
<div class="mr-auto">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="prompt.html">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="model_serving.html">
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<polyline points="15 18 9 12 15 6"></polyline>
</svg>
Prompt
Model Serving
</a>
</div>
<div class="ml-auto">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="../llm_provider.html">
LLM Provider
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="error_target.html">
Error Target
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -163,19 +163,19 @@
<li class="toctree-l2"><a class="reference internal" href="listener.html#configure-listener">Configure Listener</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="model_serving.html">Model Serving</a><ul>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html#local-serving-cpu-moderate">Local Serving (CPU - Moderate)</a></li>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html#local-serving-gpu-fast">Local Serving (GPU- Fast)</a></li>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html#cloud-serving-gpu-blazing-fast">Cloud Serving (GPU - Blazing Fast)</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="prompt.html">Prompt</a><ul>
<li class="toctree-l1"><a class="reference internal" href="prompt.html">Prompts</a><ul>
<li class="toctree-l2"><a class="reference internal" href="prompt.html#messages">Messages</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html#prompt-guardrails">Prompt Guardrails</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html#prompt-guard">Prompt Guard</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html#prompt-targets">Prompt Targets</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html#prompting-llms">Prompting LLMs</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="model_serving.html">Model Serving</a><ul>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html#local-serving-cpu-moderate">Local Serving (CPU - Moderate)</a></li>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html#local-serving-gpu-fast">Local Serving (GPU - Fast)</a></li>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html#cloud-serving-gpu-blazing-fast">Cloud Serving (GPU - Blazing Fast)</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a><ul>
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html#terminology">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html#network-topology">Network topology</a></li>
@ -186,6 +186,12 @@
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html#id1">Overview</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="error_target.html">Error Target</a><ul>
<li class="toctree-l2"><a class="reference internal" href="error_target.html#key-concepts">Key Concepts</a></li>
<li class="toctree-l2"><a class="reference internal" href="error_target.html#error-header-example">Error Header Example</a></li>
<li class="toctree-l2"><a class="reference internal" href="error_target.html#best-practices-and-tips">Best Practices and Tips</a></li>
</ul>
</li>
</ul>
</div>
</section>

View file

@ -101,9 +101,10 @@
<li class="toctree-l2 current"><a class="current reference internal" href="#">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -160,31 +160,31 @@ to keep things consistent in logs, traces and in code.</p>
<p><strong>Upstream(Egress)</strong>: An upstream host that receives connections and prompts from Arch, and returns context or responses for a prompt</p>
<a class="reference internal image-reference" href="../../_images/network-topology-ingress-egress.jpg"><img alt="../../_images/network-topology-ingress-egress.jpg" class="align-center" src="../../_images/network-topology-ingress-egress.jpg" style="width: 100%;"/>
</a>
<p><strong>Listener</strong>: A listener is a named network location (e.g., port, address, path etc.) that Arch listens on to process prompts
<p><strong>Listener</strong>: A <a class="reference internal" href="listener.html#arch-overview-listeners"><span class="std std-ref">listener</span></a> is a named network location (e.g., port, address, path etc.) that Arch listens on to process prompts
before forwarding them to your application server endpoints. rch enables you to configure one listener for downstream connections
(like port 80, 443) and creates a separate internal listener for calls that initiate from your application code to LLMs.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>When you start Arch, you specify a listener address/port that you want to bind downstream. But, Arch uses are predefined port
that you can use (<code class="docutils literal notranslate"><span class="pre">127.0.0.1:10000</span></code>) to proxy egress calls originating from your application to LLMs (API-based or hosted).
For more details, check out <a class="reference internal" href="../llm_provider.html#llm-provider"><span class="std std-ref">LLM providers</span></a></p>
For more details, check out <a class="reference internal" href="../llm_provider.html#llm-provider"><span class="std std-ref">LLM provider</span></a>.</p>
</div>
<p><strong>Instance</strong>: An instance of the Arch gateway. When you start Arch it creates at most two processes. One to handle Layer 7
networking operations (auth, tls, observability, etc) and the second process to serve models that enable it to make smart
decisions on how to accept, handle and forward prompts. The second process is optional, as the model serving sevice could be
hosted on a different network (an API call). But these two processes are considered a single instance of Arch.</p>
<p><strong>Prompt Targets</strong>: Arch offers a primitive called <code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code> to help separate business logic from undifferentiated
<p><strong>Prompt Target</strong>: Arch offers a primitive called <a class="reference internal" href="../prompt_target.html#prompt-target"><span class="std std-ref">prompt_target</span></a> to help separate business logic from undifferentiated
work in building generative AI apps. Prompt targets are endpoints that receive prompts that are processed by Arch.
For example, Arch enriches incoming prompts with metadata like knowing when a request is a follow-up or clarifying prompt
so that you can build faster, more accurate retrieval (RAG) apps. To support agentic apps, like scheduling travel plans or
sharing comments on a document - via prompts, Bolt uses its function calling abilities to extract critical information from
the incoming prompt (or a set of prompts) needed by a downstream backend API or function call before calling it directly.</p>
<p><strong>Error Targets</strong>: Error targets are those endpoints that receive forwarded errors from Arch when issues arise,
<p><strong>Error Target</strong>: <a class="reference internal" href="error_target.html#error-target"><span class="std std-ref">Error targets</span></a> are those endpoints that receive forwarded errors from Arch when issues arise,
such as failing to properly call a function/API, detecting violations of guardrails, or encountering other processing errors.
These errors are communicated to the application via headers (X-Arch-[ERROR-TYPE]), allowing it to handle the errors gracefully
These errors are communicated to the application via headers <code class="docutils literal notranslate"><span class="pre">X-Arch-[ERROR-TYPE]</span></code>, allowing it to handle the errors gracefully
and take appropriate actions.</p>
<p><strong>Model Serving</strong>: Arch is a set of <strong>two</strong> self-contained processes that are designed to run alongside your application servers
(or on a separate hostconnected via a network).The <strong>model serving</strong> process helps Arch make intelligent decisions about the
<p><strong>Model Serving</strong>: Arch is a set of <cite>two</cite> self-contained processes that are designed to run alongside your application servers
(or on a separate hostconnected via a network).The <a class="reference internal" href="model_serving.html#model-serving"><span class="std std-ref">model serving</span></a> process helps Arch make intelligent decisions about the
incoming prompts. The model server is designed to call the (fast) purpose-built LLMs in Arch.</p>
</section>
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -161,7 +161,7 @@ threads perform filtering, and forwarding.</p>
thread. All the functionality around prompt handling from a downstream client is handled in a separate worker thread.
This allows the majority of Arch to be largely single threaded (embarrassingly parallel) with a small amount
of more complex code handling coordination between the worker threads.</p>
<p>Generally Arch is written to be 100% non-blocking.</p>
<p>Generally, Arch is written to be 100% non-blocking.</p>
<div class="admonition tip">
<p class="admonition-title">Tip</p>
<p>For most workloads we recommend configuring the number of worker threads to be equal to the number of

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -152,7 +152,7 @@
</nav>
<div id="content" role="main">
<section id="overview">
<h1>Overview<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#overview"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
<span id="id1"></span><h1>Overview<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#overview"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
<p>Welcome to Arch, the intelligent prompt gateway designed to help developers build <strong>fast</strong>, <strong>secure</strong>, and <strong>personalized</strong> generative AI apps at ANY scale.
In this documentation, you will learn how to quickly set up Arch to trigger API calls via prompts, apply prompt guardrails without writing any application-level logic,
simplify the interaction with upstream LLMs, and improve observability all while simplifying your application development process.</p>
@ -165,7 +165,7 @@ simplify the interaction with upstream LLMs, and improve observability all while
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
Overview</div>
<svg aria-hidden="true" class="sd-octicon sd-octicon-apps" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M1.5 3.25c0-.966.784-1.75 1.75-1.75h2.5c.966 0 1.75.784 1.75 1.75v2.5A1.75 1.75 0 0 1 5.75 7.5h-2.5A1.75 1.75 0 0 1 1.5 5.75Zm7 0c0-.966.784-1.75 1.75-1.75h2.5c.966 0 1.75.784 1.75 1.75v2.5a1.75 1.75 0 0 1-1.75 1.75h-2.5A1.75 1.75 0 0 1 8.5 5.75Zm-7 7c0-.966.784-1.75 1.75-1.75h2.5c.966 0 1.75.784 1.75 1.75v2.5a1.75 1.75 0 0 1-1.75 1.75h-2.5a1.75 1.75 0 0 1-1.75-1.75Zm7 0c0-.966.784-1.75 1.75-1.75h2.5c.966 0 1.75.784 1.75 1.75v2.5a1.75 1.75 0 0 1-1.75 1.75h-2.5a1.75 1.75 0 0 1-1.75-1.75ZM3.25 3a.25.25 0 0 0-.25.25v2.5c0 .138.112.25.25.25h2.5A.25.25 0 0 0 6 5.75v-2.5A.25.25 0 0 0 5.75 3Zm7 0a.25.25 0 0 0-.25.25v2.5c0 .138.112.25.25.25h2.5a.25.25 0 0 0 .25-.25v-2.5a.25.25 0 0 0-.25-.25Zm-7 7a.25.25 0 0 0-.25.25v2.5c0 .138.112.25.25.25h2.5a.25.25 0 0 0 .25-.25v-2.5a.25.25 0 0 0-.25-.25Zm7 0a.25.25 0 0 0-.25.25v2.5c0 .138.112.25.25.25h2.5a.25.25 0 0 0 .25-.25v-2.5a.25.25 0 0 0-.25-.25Z"></path></svg> Overview</div>
<p class="sd-card-text">Overview of Arch and Doc navigation</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="overview.html"><span>overview.html</span></a></div>
@ -174,7 +174,7 @@ Overview</div>
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
Intro to Arch</div>
<svg aria-hidden="true" class="sd-octicon sd-octicon-book" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path></svg> Intro to Arch</div>
<p class="sd-card-text">Explore Archs features and developer workflow</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="intro_to_arch.html"><span>intro_to_arch.html</span></a></div>
@ -183,7 +183,7 @@ Intro to Arch</div>
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
Quickstart</div>
<svg aria-hidden="true" class="sd-octicon sd-octicon-rocket" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M14.064 0h.186C15.216 0 16 .784 16 1.75v.186a8.752 8.752 0 0 1-2.564 6.186l-.458.459c-.314.314-.641.616-.979.904v3.207c0 .608-.315 1.172-.833 1.49l-2.774 1.707a.749.749 0 0 1-1.11-.418l-.954-3.102a1.214 1.214 0 0 1-.145-.125L3.754 9.816a1.218 1.218 0 0 1-.124-.145L.528 8.717a.749.749 0 0 1-.418-1.11l1.71-2.774A1.748 1.748 0 0 1 3.31 4h3.204c.288-.338.59-.665.904-.979l.459-.458A8.749 8.749 0 0 1 14.064 0ZM8.938 3.623h-.002l-.458.458c-.76.76-1.437 1.598-2.02 2.5l-1.5 2.317 2.143 2.143 2.317-1.5c.902-.583 1.74-1.26 2.499-2.02l.459-.458a7.25 7.25 0 0 0 2.123-5.127V1.75a.25.25 0 0 0-.25-.25h-.186a7.249 7.249 0 0 0-5.125 2.123ZM3.56 14.56c-.732.732-2.334 1.045-3.005 1.148a.234.234 0 0 1-.201-.064.234.234 0 0 1-.064-.201c.103-.671.416-2.273 1.15-3.003a1.502 1.502 0 1 1 2.12 2.12Zm6.94-3.935c-.088.06-.177.118-.266.175l-2.35 1.521.548 1.783 1.949-1.2a.25.25 0 0 0 .119-.213ZM3.678 8.116 5.2 5.766c.058-.09.117-.178.176-.266H3.309a.25.25 0 0 0-.213.119l-1.2 1.95ZM12 5a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path></svg> Quickstart</div>
<p class="sd-card-text">Learn how to quickly set up and integrate</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="quickstart.html"><span>quickstart.html</span></a></div>
@ -200,28 +200,28 @@ Quickstart</div>
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
Tech Overview</div>
<svg aria-hidden="true" class="sd-octicon sd-octicon-package" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="m8.878.392 5.25 3.045c.54.314.872.89.872 1.514v6.098a1.75 1.75 0 0 1-.872 1.514l-5.25 3.045a1.75 1.75 0 0 1-1.756 0l-5.25-3.045A1.75 1.75 0 0 1 1 11.049V4.951c0-.624.332-1.201.872-1.514L7.122.392a1.75 1.75 0 0 1 1.756 0ZM7.875 1.69l-4.63 2.685L8 7.133l4.755-2.758-4.63-2.685a.248.248 0 0 0-.25 0ZM2.5 5.677v5.372c0 .09.047.171.125.216l4.625 2.683V8.432Zm6.25 8.271 4.625-2.683a.25.25 0 0 0 .125-.216V5.677L8.75 8.432Z"></path></svg> Tech Overview</div>
<p class="sd-card-text">Learn about the technology stack</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../Concepts/tech_overview/tech_overview.html"><span>../Concepts/tech_overview/tech_overview.html</span></a></div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../concepts/tech_overview/tech_overview.html"><span>../concepts/tech_overview/tech_overview.html</span></a></div>
</div>
<div class="sd-col sd-d-flex-row docutils">
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
LLM Provider</div>
<svg aria-hidden="true" class="sd-octicon sd-octicon-webhook" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M5.5 4.25a2.25 2.25 0 0 1 4.5 0 .75.75 0 0 0 1.5 0 3.75 3.75 0 1 0-6.14 2.889l-2.272 4.258a.75.75 0 0 0 1.324.706L7 7.25a.75.75 0 0 0-.309-1.015A2.25 2.25 0 0 1 5.5 4.25Z"></path><path d="M7.364 3.607a.75.75 0 0 1 1.03.257l2.608 4.349a3.75 3.75 0 1 1-.628 6.785.75.75 0 0 1 .752-1.299 2.25 2.25 0 1 0-.033-3.88.75.75 0 0 1-1.03-.256L7.107 4.636a.75.75 0 0 1 .257-1.03Z"></path><path d="M2.9 8.776A.75.75 0 0 1 2.625 9.8 2.25 2.25 0 1 0 6 11.75a.75.75 0 0 1 .75-.751h5.5a.75.75 0 0 1 0 1.5H7.425a3.751 3.751 0 1 1-5.55-3.998.75.75 0 0 1 1.024.274Z"></path></svg> LLM Provider</div>
<p class="sd-card-text">Explore Archs LLM integration options</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../Concepts/llm_provider.html"><span>../Concepts/llm_provider.html</span></a></div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../concepts/llm_provider.html"><span>../concepts/llm_provider.html</span></a></div>
</div>
<div class="sd-col sd-d-flex-row docutils">
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
Targets</div>
<svg aria-hidden="true" class="sd-octicon sd-octicon-workflow" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M0 1.75C0 .784.784 0 1.75 0h3.5C6.216 0 7 .784 7 1.75v3.5A1.75 1.75 0 0 1 5.25 7H4v4a1 1 0 0 0 1 1h4v-1.25C9 9.784 9.784 9 10.75 9h3.5c.966 0 1.75.784 1.75 1.75v3.5A1.75 1.75 0 0 1 14.25 16h-3.5A1.75 1.75 0 0 1 9 14.25v-.75H5A2.5 2.5 0 0 1 2.5 11V7h-.75A1.75 1.75 0 0 1 0 5.25Zm1.75-.25a.25.25 0 0 0-.25.25v3.5c0 .138.112.25.25.25h3.5a.25.25 0 0 0 .25-.25v-3.5a.25.25 0 0 0-.25-.25Zm9 9a.25.25 0 0 0-.25.25v3.5c0 .138.112.25.25.25h3.5a.25.25 0 0 0 .25-.25v-3.5a.25.25 0 0 0-.25-.25Z"></path></svg> Prompt Target</div>
<p class="sd-card-text">Understand how Arch handles prompts</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../Concepts/prompt_target.html"><span>../Concepts/prompt_target.html</span></a></div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../concepts/prompt_target.html"><span>../concepts/prompt_target.html</span></a></div>
</div>
</div>
</div>
@ -235,16 +235,16 @@ Targets</div>
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
Prompt Guard</div>
<svg aria-hidden="true" class="sd-octicon sd-octicon-shield-check" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="m8.533.133 5.25 1.68A1.75 1.75 0 0 1 15 3.48V7c0 1.566-.32 3.182-1.303 4.682-.983 1.498-2.585 2.813-5.032 3.855a1.697 1.697 0 0 1-1.33 0c-2.447-1.042-4.049-2.357-5.032-3.855C1.32 10.182 1 8.566 1 7V3.48a1.75 1.75 0 0 1 1.217-1.667l5.25-1.68a1.748 1.748 0 0 1 1.066 0Zm-.61 1.429.001.001-5.25 1.68a.251.251 0 0 0-.174.237V7c0 1.36.275 2.666 1.057 3.859.784 1.194 2.121 2.342 4.366 3.298a.196.196 0 0 0 .154 0c2.245-.957 3.582-2.103 4.366-3.297C13.225 9.666 13.5 8.358 13.5 7V3.48a.25.25 0 0 0-.174-.238l-5.25-1.68a.25.25 0 0 0-.153 0ZM11.28 6.28l-3.5 3.5a.75.75 0 0 1-1.06 0l-1.5-1.5a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l.97.97 2.97-2.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path></svg> Prompt Guard</div>
<p class="sd-card-text">Instructions on securing and validating prompts</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../guides/tech_overview/tech_overview.html"><span>../guides/tech_overview/tech_overview.html</span></a></div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../guides/prompt_guard.html"><span>../guides/prompt_guard.html</span></a></div>
</div>
<div class="sd-col sd-d-flex-row docutils">
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
Function Calling</div>
<svg aria-hidden="true" class="sd-octicon sd-octicon-code-square" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M0 1.75C0 .784.784 0 1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25Zm7.47 3.97a.75.75 0 0 1 1.06 0l2 2a.75.75 0 0 1 0 1.06l-2 2a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L10.69 8 9.22 6.53a.75.75 0 0 1 0-1.06ZM6.78 6.53 5.31 8l1.47 1.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-2-2a.75.75 0 0 1 0-1.06l2-2a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path></svg> Function Calling</div>
<p class="sd-card-text">A guide to effective function calling</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../guides/function_calling.html"><span>../guides/function_calling.html</span></a></div>
@ -253,10 +253,10 @@ Function Calling</div>
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
Observability</div>
<svg aria-hidden="true" class="sd-octicon sd-octicon-issue-opened" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M8 9.5a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path><path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Z"></path></svg> Observability</div>
<p class="sd-card-text">Learn to monitor and troubleshoot Arch</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../guides/prompt_target.html"><span>../guides/prompt_target.html</span></a></div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../guides/observability/observability.html"><span>../guides/observability/observability.html</span></a></div>
</div>
</div>
</div>
@ -270,7 +270,7 @@ Observability</div>
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
Agentic Workflow</div>
<svg aria-hidden="true" class="sd-octicon sd-octicon-dependabot" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M5.75 7.5a.75.75 0 0 1 .75.75v1.5a.75.75 0 0 1-1.5 0v-1.5a.75.75 0 0 1 .75-.75Zm5.25.75a.75.75 0 0 0-1.5 0v1.5a.75.75 0 0 0 1.5 0v-1.5Z"></path><path d="M6.25 0h2A.75.75 0 0 1 9 .75V3.5h3.25a2.25 2.25 0 0 1 2.25 2.25V8h.75a.75.75 0 0 1 0 1.5h-.75v2.75a2.25 2.25 0 0 1-2.25 2.25h-8.5a2.25 2.25 0 0 1-2.25-2.25V9.5H.75a.75.75 0 0 1 0-1.5h.75V5.75A2.25 2.25 0 0 1 3.75 3.5H7.5v-2H6.25a.75.75 0 0 1 0-1.5ZM3 5.75v6.5c0 .414.336.75.75.75h8.5a.75.75 0 0 0 .75-.75v-6.5a.75.75 0 0 0-.75-.75h-8.5a.75.75 0 0 0-.75.75Z"></path></svg> Agentic Workflow</div>
<p class="sd-card-text">Discover how to create and manage custom agents within Arch</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../build_with_arch/agent.html"><span>../build_with_arch/agent.html</span></a></div>
@ -279,7 +279,7 @@ Agentic Workflow</div>
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
RAG Application</div>
<svg aria-hidden="true" class="sd-octicon sd-octicon-stack" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M7.122.392a1.75 1.75 0 0 1 1.756 0l5.003 2.902c.83.481.83 1.68 0 2.162L8.878 8.358a1.75 1.75 0 0 1-1.756 0L2.119 5.456a1.251 1.251 0 0 1 0-2.162ZM8.125 1.69a.248.248 0 0 0-.25 0l-4.63 2.685 4.63 2.685a.248.248 0 0 0 .25 0l4.63-2.685ZM1.601 7.789a.75.75 0 0 1 1.025-.273l5.249 3.044a.248.248 0 0 0 .25 0l5.249-3.044a.75.75 0 0 1 .752 1.298l-5.248 3.044a1.75 1.75 0 0 1-1.756 0L1.874 8.814A.75.75 0 0 1 1.6 7.789Zm0 3.5a.75.75 0 0 1 1.025-.273l5.249 3.044a.248.248 0 0 0 .25 0l5.249-3.044a.75.75 0 0 1 .752 1.298l-5.248 3.044a1.75 1.75 0 0 1-1.756 0l-5.248-3.044a.75.75 0 0 1-.273-1.025Z"></path></svg> RAG Application</div>
<p class="sd-card-text">Integrate RAG for knowledge-driven responses</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="../build_with_arch/rag.html"><span>../build_with_arch/rag.html</span></a></div>

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -196,53 +196,50 @@ Below is an example configuration to get you started, including:</p>
<li><p><code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code>: Defines endpoints that handle specific types of prompts.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">error_target</span></code>: Specifies where to route errors for handling.</p></li>
</ul>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
</span><span id="line-2"><span class="nt">listen</span><span class="p">:</span>
</span><span id="line-3"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
</span><span id="line-4"><span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
</span><span id="line-5">
</span><span id="line-6"><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
</span><span id="line-7"><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
</span><span id="line-2">
</span><span id="line-3"><span class="nt">listen</span><span class="p">:</span>
</span><span id="line-4"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
</span><span id="line-5"><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
</span><span id="line-6"><span class="w"> </span><span class="c1"># Defines how Arch should parse the content from application/json or text/pain Content-type in the http request</span>
</span><span id="line-7"><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">huggingface</span>
</span><span id="line-8">
</span><span id="line-9"><span class="nt">llm_providers</span><span class="p">:</span>
</span><span id="line-10"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
</span><span id="line-11"><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
</span><span id="line-12"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
</span><span id="line-13"><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-14"><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-15">
</span><span id="line-16"><span class="nt">prompt_targets</span><span class="p">:</span>
</span><span id="line-17"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_devices</span>
</span><span id="line-18"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">&gt;</span>
</span><span id="line-19"><span class="w"> </span><span class="no">This prompt target handles user requests to reboot devices.</span>
</span><span id="line-20"><span class="w"> </span><span class="no">It ensures that when users request to reboot specific devices or device groups, the system processes the reboot commands accurately.</span>
</span><span id="line-21">
</span><span id="line-22"><span class="w"> </span><span class="no">**Examples of user prompts:**</span>
</span><span id="line-23">
</span><span id="line-24"><span class="w"> </span><span class="no">- "Please reboot device 12345."</span>
</span><span id="line-25"><span class="w"> </span><span class="no">- "Restart all devices in tenant group tenant-XYZ</span>
</span><span id="line-26"><span class="w"> </span><span class="no">- "I need to reboot devices A, B, and C."</span>
</span><span id="line-27">
</span><span id="line-28"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_reboot</span>
</span><span id="line-29"><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</span><span id="line-30"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_ids"</span>
</span><span id="line-31"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span><span class="w"> </span><span class="c1"># Options: integer | float | list | dictionary | set</span>
</span><span id="line-32"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"A</span><span class="nv"> </span><span class="s">list</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">identifiers</span><span class="nv"> </span><span class="s">(IDs)</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
</span><span id="line-33"><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</span><span id="line-34"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_group"</span>
</span><span id="line-35"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">string</span><span class="w"> </span><span class="c1"># Options: string | integer | float | list | dictionary | set</span>
</span><span id="line-36"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"The</span><span class="nv"> </span><span class="s">name</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">group</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
</span><span id="line-37"><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</span><span id="line-38">
</span><span id="line-39"><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
</span><span id="line-40"><span class="nt">endpoints</span><span class="p">:</span>
</span><span id="line-41"><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
</span><span id="line-42"><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
</span><span id="line-43"><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing for example endpoint: [ ip1:port, ip2:port ]</span>
</span><span id="line-44"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
</span><span id="line-45"><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
</span><span id="line-46"><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span><span id="line-47"><span class="w"> </span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
</span><span id="line-9"><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
</span><span id="line-10"><span class="nt">llm_providers</span><span class="p">:</span>
</span><span id="line-11"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
</span><span id="line-12"><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
</span><span id="line-14"><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-15"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-16"><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-17">
</span><span id="line-18"><span class="c1"># default system prompt used by all prompt targets</span>
</span><span id="line-19"><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span><span id="line-20">
</span><span id="line-21"><span class="nt">prompt_targets</span><span class="p">:</span>
</span><span id="line-22"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_devices</span>
</span><span id="line-23"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Reboot specific devices or device groups</span>
</span><span id="line-24">
</span><span id="line-25"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_reboot</span>
</span><span id="line-26"><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</span><span id="line-27"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_ids</span>
</span><span id="line-28"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span>
</span><span id="line-29"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">A list of device identifiers (IDs) to reboot.</span>
</span><span id="line-30"><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</span><span id="line-31"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_group</span>
</span><span id="line-32"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">str</span>
</span><span id="line-33"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">The name of the device group to reboot</span>
</span><span id="line-34"><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</span><span id="line-35">
</span><span id="line-36"><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
</span><span id="line-37"><span class="nt">endpoints</span><span class="p">:</span>
</span><span id="line-38"><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
</span><span id="line-39"><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
</span><span id="line-40"><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
</span><span id="line-41"><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
</span><span id="line-42"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
</span><span id="line-43"><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
</span><span id="line-44"><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span></code></pre></div>
</div>
</section>
@ -256,7 +253,7 @@ Below is an example configuration to get you started, including:</p>
<h2>Next Steps<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#next-steps" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#next-steps'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
<p>Congratulations! Youve successfully set up Arch and made your first prompt-based request. To further enhance your GenAI applications, explore the following resources:</p>
<ul class="simple">
<li><p>Full Documentation: Comprehensive guides and references.</p></li>
<li><p><a class="reference internal" href="overview.html#overview"><span class="std std-ref">Full Documentation</span></a>: Comprehensive guides and references.</p></li>
<li><p><a class="reference external" href="https://github.com/katanemo/arch" rel="nofollow noopener">GitHub Repository<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>: Access the source code, contribute, and track updates.</p></li>
<li><p><a class="reference external" href="https://github.com/katanemo/arch#contact" rel="nofollow noopener">Support<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>: Get help and connect with the Arch community .</p></li>
</ul>

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -248,7 +248,7 @@ Achieving performance on par with GPT-4, these models set a new benchmark in the
<p>Heres a step-by-step guide to configuring function calling within your Arch setup:</p>
<section id="step-1-define-the-function">
<h3>Step 1: Define the Function<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-1-define-the-function" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-1-define-the-function'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<p>Create or identify the backend function you want Arch to call. This could be an API endpoint, a script, or any other executable backend logic.</p>
<p>First, create or identify the backend function you want Arch to call. This could be an API endpoint, a script, or any other executable backend logic.</p>
<div class="literal-block-wrapper docutils container" id="id3">
<div class="code-block-caption"><span class="caption-text">Example Function</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kn">import</span> <span class="nn">requests</span>
@ -277,9 +277,10 @@ Achieving performance on par with GPT-4, these models set a new benchmark in the
</section>
<section id="step-2-configure-prompt-targets">
<h3>Step 2: Configure Prompt Targets<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-configure-prompt-targets" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-configure-prompt-targets'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<p>Map the function to a prompt target, defining the intent and parameters that Arch will extract from the users prompt.</p>
<p>Next, map the function to a prompt target, defining the intent and parameters that Arch will extract from the users prompt.
Specify the parameters your function needs and how Arch should interpret these.</p>
<div class="literal-block-wrapper docutils container" id="id4">
<div class="code-block-caption"><span class="caption-text">Example Config</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id4"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="code-block-caption"><span class="caption-text">Prompt Target Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id4"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">prompt_targets</span><span class="p">:</span>
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">get_weather</span>
</span><span id="line-3"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Get the current weather for a location</span>
@ -299,9 +300,10 @@ Achieving performance on par with GPT-4, these models set a new benchmark in the
</div>
</div>
</section>
<section id="step-3-validate-parameters">
<h3>Step 3: Validate Parameters<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-3-validate-parameters" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-3-validate-parameters'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<p>Arch will validate parameters and ensure that the required parameters (e.g., location) are present in the prompt, and add validation rules if necessary.
<section id="step-3-arch-takes-over">
<h3>Step 3: Arch Takes Over<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-3-arch-takes-over" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-3-arch-takes-over'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<p>Once you have defined the functions and configured the prompt targets, Arch takes care of the remaining work.
It will automatically validate parameters validate parameters and ensure that the required parameters (e.g., location) are present in the prompt, and add validation rules if necessary.
Here is ane example validation schema using the <a class="reference external" href="https://json-schema.org/docs" rel="nofollow noopener">jsonschema<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a> library</p>
<div class="literal-block-wrapper docutils container" id="id5">
<div class="code-block-caption"><span class="caption-text">Example Validation Schema</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id5"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
@ -354,11 +356,8 @@ Here is ane example validation schema using the <a class="reference external" hr
</span></code></pre></div>
</div>
</div>
</section>
<section id="step-4-execute-and-return-the-response">
<h3>Step 4: Execute and Return the Response<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-4-execute-and-return-the-response" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-4-execute-and-return-the-response'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<p>Once the function is called, format the response and send it back to Arch-Function.
Next, Arch-Function provides users with coherent and user-friendly responses.</p>
<p>Once the functions are called, Arch formats the response and deliver back to users.
By completing these setup steps, you enable Arch to manage the process from validation to response, ensuring users receive consistent, reliable results.</p>
</section>
</section>
<section id="example-use-cases">
@ -415,8 +414,7 @@ Next, Arch-Function provides users with coherent and user-friendly responses.</p
<li><a :data-current="activeSection === '#implementing-function-calling'" class="reference internal" href="#implementing-function-calling">Implementing Function Calling</a><ul>
<li><a :data-current="activeSection === '#step-1-define-the-function'" class="reference internal" href="#step-1-define-the-function">Step 1: Define the Function</a></li>
<li><a :data-current="activeSection === '#step-2-configure-prompt-targets'" class="reference internal" href="#step-2-configure-prompt-targets">Step 2: Configure Prompt Targets</a></li>
<li><a :data-current="activeSection === '#step-3-validate-parameters'" class="reference internal" href="#step-3-validate-parameters">Step 3: Validate Parameters</a></li>
<li><a :data-current="activeSection === '#step-4-execute-and-return-the-response'" class="reference internal" href="#step-4-execute-and-return-the-response">Step 4: Execute and Return the Response</a></li>
<li><a :data-current="activeSection === '#step-3-arch-takes-over'" class="reference internal" href="#step-3-arch-takes-over">Step 3: Arch Takes Over</a></li>
</ul>
</li>
<li><a :data-current="activeSection === '#example-use-cases'" class="reference internal" href="#example-use-cases">Example Use Cases</a></li>

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../concepts/llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../concepts/llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../concepts/llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../concepts/llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>

View file

@ -101,9 +101,10 @@
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
@ -128,7 +129,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -212,6 +212,19 @@ Arch-Guard is designed to address this challenge.</p>
It excels at detecting explicitly malicious prompts and assessing toxic content, providing an essential layer of security for LLM applications.</p>
<p>By embedding Arch-Guard within the Arch architecture, we empower developers to build robust, LLM-powered applications while prioritizing security and safety. With Arch-Guard, you can navigate the complexities of prompt management with confidence, knowing you have a reliable defense against malicious input.</p>
</section>
<section id="example-configuration">
<h3>Example Configuration<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#example-configuration" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#example-configuration'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
<p>Here is an example of using Arch-Guard in Arch:</p>
<div class="literal-block-wrapper docutils container" id="id3">
<div class="code-block-caption"><span class="caption-text">Arch-Guard Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos">1</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
</span><span id="line-2"><span class="linenos">2</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
</span><span id="line-3"><span class="linenos">3</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
</span><span id="line-4"><span class="linenos">4</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.</span>
</span></code></pre></div>
</div>
</div>
</section>
</section>
<section id="how-arch-guard-works">
<h2>How Arch-Guard Works<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#how-arch-guard-works" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#how-arch-guard-works'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
@ -281,6 +294,7 @@ By implementing Prompt Guard, developers can provide a robust layer of input val
<li><a :data-current="activeSection === '#why-prompt-guard'" class="reference internal" href="#why-prompt-guard">Why Prompt Guard</a></li>
<li><a :data-current="activeSection === '#arch-guard'" class="reference internal" href="#arch-guard">Arch-Guard</a><ul>
<li><a :data-current="activeSection === '#what-is-arch-guard'" class="reference internal" href="#what-is-arch-guard">What Is Arch-Guard</a></li>
<li><a :data-current="activeSection === '#example-configuration'" class="reference internal" href="#example-configuration">Example Configuration</a></li>
</ul>
</li>
<li><a :data-current="activeSection === '#how-arch-guard-works'" class="reference internal" href="#how-arch-guard-works">How Arch-Guard Works</a></li>

View file

@ -100,9 +100,10 @@
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="concepts/llm_provider.html">LLM Provider</a></li>
@ -127,7 +128,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="resources/error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -175,9 +175,10 @@ Concepts</label><div class="sd-tab-content docutils">
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="concepts/llm_provider.html">LLM Provider</a></li>
@ -220,7 +221,6 @@ Resources</label><div class="sd-tab-content docutils">
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="resources/error_target.html">Error Targets</a></li>
</ul>
</div>
</div>

Binary file not shown.

View file

@ -18,7 +18,6 @@
<link href="./docs/resources/configuration_reference.html" rel="canonical"/>
<link href="../_static/favicon.ico" rel="icon"/>
<link href="../search.html" rel="search" title="Search"/>
<link href="error_target.html" rel="next" title="Error Targets"/>
<link href="../build_with_arch/rag.html" rel="prev" title="RAG Application"/>
<script>
<!-- Prevent Flash of wrong theme -->
@ -101,9 +100,10 @@
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
@ -128,7 +128,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul class="current">
<li class="toctree-l1 current"><a class="current reference internal" href="#">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="error_target.html">Error Targets</a></li>
</ul>
</nav>
</div>
@ -160,7 +159,7 @@ options, etc). Our belief that the simple things, should be simple. So we offert
that they can spend more of their time in building features unique to their AI experience.</p>
<div class="literal-block-wrapper docutils container" id="id1">
<div class="code-block-caption"><span class="caption-text"><a class="reference download internal" download="" href="../_downloads/ca9d3b7116524473d8adbde7cf15d167/arch_config_full_reference.yaml"><code class="xref download docutils literal notranslate"><span class="pre">Arch</span> <span class="pre">Configuration</span> <span class="pre">-</span> <span class="pre">Full</span> <span class="pre">Reference</span></code></a></span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() =&gt; $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
</span><span id="line-2"><span class="linenos"> 2</span>
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
@ -170,9 +169,9 @@ that they can spend more of their time in building features unique to their AI e
</span><span id="line-8"><span class="linenos"> 8</span><span class="w"> </span><span class="nt">common_tls_context</span><span class="p">:</span><span class="w"> </span><span class="c1"># If you configure port 443, you'll need to update the listener with your TLS certificates</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="w"> </span><span class="nt">tls_certificates</span><span class="p">:</span>
</span><span id="line-10"><span class="linenos"> 10</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">certificate_chain</span><span class="p">:</span>
</span><span id="line-11"><span class="linenos"> 11</span><span class="w"> </span><span class="nt">filename</span><span class="p">:</span><span class="w"> </span><span class="s">"/etc/certs/cert.pem"</span>
</span><span id="line-11"><span class="linenos"> 11</span><span class="w"> </span><span class="nt">filename</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/etc/certs/cert.pem</span>
</span><span id="line-12"><span class="linenos"> 12</span><span class="w"> </span><span class="nt">private_key</span><span class="p">:</span>
</span><span id="line-13"><span class="linenos"> 13</span><span class="w"> </span><span class="nt">filename</span><span class="p">:</span><span class="w"> </span><span class="s">"/etc/certs/key.pem"</span>
</span><span id="line-13"><span class="linenos"> 13</span><span class="w"> </span><span class="nt">filename</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/etc/certs/key.pem</span>
</span><span id="line-14"><span class="linenos"> 14</span>
</span><span id="line-15"><span class="linenos"> 15</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
</span><span id="line-16"><span class="linenos"> 16</span><span class="nt">endpoints</span><span class="p">:</span>
@ -180,42 +179,42 @@ that they can spend more of their time in building features unique to their AI e
</span><span id="line-18"><span class="linenos"> 18</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
</span><span id="line-19"><span class="linenos"> 19</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
</span><span id="line-20"><span class="linenos"> 20</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
</span><span id="line-21"><span class="linenos"> 21</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
</span><span id="line-21"><span class="linenos"> 21</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
</span><span id="line-22"><span class="linenos"> 22</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
</span><span id="line-23"><span class="linenos"> 23</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span><span id="line-24"><span class="linenos"> 24</span>
</span><span id="line-25"><span class="linenos"> 25</span><span class="w"> </span><span class="nt">mistral_local</span><span class="p">:</span>
</span><span id="line-26"><span class="linenos"> 26</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:8001"</span>
</span><span id="line-26"><span class="linenos"> 26</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:8001</span>
</span><span id="line-27"><span class="linenos"> 27</span>
</span><span id="line-28"><span class="linenos"> 28</span><span class="w"> </span><span class="nt">error_target</span><span class="p">:</span>
</span><span id="line-29"><span class="linenos"> 29</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"error_target_1"</span>
</span><span id="line-29"><span class="linenos"> 29</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
</span><span id="line-30"><span class="linenos"> 30</span>
</span><span id="line-31"><span class="linenos"> 31</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
</span><span id="line-32"><span class="linenos"> 32</span><span class="nt">llm_providers</span><span class="p">:</span>
</span><span id="line-33"><span class="linenos"> 33</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
</span><span id="line-34"><span class="linenos"> 34</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
</span><span id="line-35"><span class="linenos"> 35</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
</span><span id="line-33"><span class="linenos"> 33</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
</span><span id="line-34"><span class="linenos"> 34</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
</span><span id="line-35"><span class="linenos"> 35</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
</span><span id="line-36"><span class="linenos"> 36</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-37"><span class="linenos"> 37</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-38"><span class="linenos"> 38</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-39"><span class="linenos"> 39</span><span class="w"> </span><span class="nt">rate_limits</span><span class="p">:</span>
</span><span id="line-40"><span class="linenos"> 40</span><span class="w"> </span><span class="nt">selector</span><span class="p">:</span><span class="w"> </span><span class="c1">#optional headers, to add rate limiting based on http headers like JWT tokens or API keys</span>
</span><span id="line-41"><span class="linenos"> 41</span><span class="w"> </span><span class="nt">http_header</span><span class="p">:</span>
</span><span id="line-42"><span class="linenos"> 42</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Authorization"</span>
</span><span id="line-42"><span class="linenos"> 42</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Authorization</span>
</span><span id="line-43"><span class="linenos"> 43</span><span class="w"> </span><span class="nt">value</span><span class="p">:</span><span class="w"> </span><span class="s">""</span><span class="w"> </span><span class="c1"># Empty value means each separate value has a separate limit</span>
</span><span id="line-44"><span class="linenos"> 44</span><span class="w"> </span><span class="nt">limit</span><span class="p">:</span>
</span><span id="line-45"><span class="linenos"> 45</span><span class="w"> </span><span class="nt">tokens</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">100000</span><span class="w"> </span><span class="c1"># Tokens per unit</span>
</span><span id="line-46"><span class="linenos"> 46</span><span class="w"> </span><span class="nt">unit</span><span class="p">:</span><span class="w"> </span><span class="s">"minute"</span>
</span><span id="line-46"><span class="linenos"> 46</span><span class="w"> </span><span class="nt">unit</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">minute</span>
</span><span id="line-47"><span class="linenos"> 47</span>
</span><span id="line-48"><span class="linenos"> 48</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Mistral8x7b"</span>
</span><span id="line-49"><span class="linenos"> 49</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"mistral"</span>
</span><span id="line-50"><span class="linenos"> 50</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_API_KEY</span>
</span><span id="line-51"><span class="linenos"> 51</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="s">"mistral-8x7b"</span>
</span><span id="line-48"><span class="linenos"> 48</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Mistral8x7b</span>
</span><span id="line-49"><span class="linenos"> 49</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral</span>
</span><span id="line-50"><span class="linenos"> 50</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">MISTRAL_API_KEY</span>
</span><span id="line-51"><span class="linenos"> 51</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral-8x7b</span>
</span><span id="line-52"><span class="linenos"> 52</span>
</span><span id="line-53"><span class="linenos"> 53</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"MistralLocal7b"</span>
</span><span id="line-54"><span class="linenos"> 54</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"local"</span>
</span><span id="line-55"><span class="linenos"> 55</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="s">"mistral-7b-instruct"</span>
</span><span id="line-56"><span class="linenos"> 56</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"mistral_local"</span>
</span><span id="line-53"><span class="linenos"> 53</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">MistralLocal7b</span>
</span><span id="line-54"><span class="linenos"> 54</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">local</span>
</span><span id="line-55"><span class="linenos"> 55</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral-7b-instruct</span>
</span><span id="line-56"><span class="linenos"> 56</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral_local</span>
</span><span id="line-57"><span class="linenos"> 57</span>
</span><span id="line-58"><span class="linenos"> 58</span><span class="c1"># provides a way to override default settings for the arch system</span>
</span><span id="line-59"><span class="linenos"> 59</span><span class="nt">overrides</span><span class="p">:</span>
@ -224,51 +223,48 @@ that they can spend more of their time in building features unique to their AI e
</span><span id="line-62"><span class="linenos"> 62</span><span class="w"> </span><span class="nt">prompt_target_intent_matching_threshold</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.60</span>
</span><span id="line-63"><span class="linenos"> 63</span>
</span><span id="line-64"><span class="linenos"> 64</span><span class="c1"># default system prompt used by all prompt targets</span>
</span><span id="line-65"><span class="linenos"> 65</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
</span><span id="line-66"><span class="linenos"> 66</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span><span id="line-67"><span class="linenos"> 67</span>
</span><span id="line-68"><span class="linenos"> 68</span><span class="nt">prompt_guards</span><span class="p">:</span>
</span><span id="line-69"><span class="linenos"> 69</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
</span><span id="line-70"><span class="linenos"> 70</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
</span><span id="line-71"><span class="linenos"> 71</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
</span><span id="line-72"><span class="linenos"> 72</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="s">"Looks</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">you're</span><span class="nv"> </span><span class="s">curious</span><span class="nv"> </span><span class="s">about</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">abilities,</span><span class="nv"> </span><span class="s">but</span><span class="nv"> </span><span class="s">I</span><span class="nv"> </span><span class="s">can</span><span class="nv"> </span><span class="s">only</span><span class="nv"> </span><span class="s">provide</span><span class="nv"> </span><span class="s">assistance</span><span class="nv"> </span><span class="s">within</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">programmed</span><span class="nv"> </span><span class="s">parameters."</span>
</span><span id="line-73"><span class="linenos"> 73</span>
</span><span id="line-74"><span class="linenos"> 74</span><span class="nt">prompt_targets</span><span class="p">:</span>
</span><span id="line-75"><span class="linenos"> 75</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"reboot_network_device"</span>
</span><span id="line-76"><span class="linenos"> 76</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Helps</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">operators</span><span class="nv"> </span><span class="s">perform</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">operations</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">rebooting</span><span class="nv"> </span><span class="s">a</span><span class="nv"> </span><span class="s">device."</span>
</span><span id="line-65"><span class="linenos"> 65</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
</span><span id="line-66"><span class="linenos"> 66</span>
</span><span id="line-67"><span class="linenos"> 67</span><span class="nt">prompt_guards</span><span class="p">:</span>
</span><span id="line-68"><span class="linenos"> 68</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
</span><span id="line-69"><span class="linenos"> 69</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
</span><span id="line-70"><span class="linenos"> 70</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
</span><span id="line-71"><span class="linenos"> 71</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.</span>
</span><span id="line-72"><span class="linenos"> 72</span>
</span><span id="line-73"><span class="linenos"> 73</span><span class="nt">prompt_targets</span><span class="p">:</span>
</span><span id="line-74"><span class="linenos"> 74</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">information_extraction</span>
</span><span id="line-75"><span class="linenos"> 75</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-76"><span class="linenos"> 76</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.</span>
</span><span id="line-77"><span class="linenos"> 77</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-78"><span class="linenos"> 78</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
</span><span id="line-79"><span class="linenos"> 79</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/action"</span>
</span><span id="line-80"><span class="linenos"> 80</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</span><span id="line-81"><span class="linenos"> 81</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_id"</span>
</span><span id="line-82"><span class="linenos"> 82</span><span class="w"> </span><span class="c1"># additional type options include: int | float | bool | string | list | dict</span>
</span><span id="line-83"><span class="linenos"> 83</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
</span><span id="line-84"><span class="linenos"> 84</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Identifier</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
</span><span id="line-85"><span class="linenos"> 85</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-86"><span class="linenos"> 86</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"confirmation"</span>
</span><span id="line-87"><span class="linenos"> 87</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
</span><span id="line-88"><span class="linenos"> 88</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Confirmation</span><span class="nv"> </span><span class="s">flag</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">proceed</span><span class="nv"> </span><span class="s">with</span><span class="nv"> </span><span class="s">reboot."</span>
</span><span id="line-89"><span class="linenos"> 89</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="s">"no"</span>
</span><span id="line-90"><span class="linenos"> 90</span><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">yes</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">no</span><span class="p p-Indicator">]</span>
</span><span id="line-91"><span class="linenos"> 91</span>
</span><span id="line-92"><span class="linenos"> 92</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"information_extraction"</span>
</span><span id="line-93"><span class="linenos"> 93</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-94"><span class="linenos"> 94</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"This</span><span class="nv"> </span><span class="s">prompt</span><span class="nv"> </span><span class="s">handles</span><span class="nv"> </span><span class="s">all</span><span class="nv"> </span><span class="s">scenarios</span><span class="nv"> </span><span class="s">that</span><span class="nv"> </span><span class="s">are</span><span class="nv"> </span><span class="s">question</span><span class="nv"> </span><span class="s">and</span><span class="nv"> </span><span class="s">answer</span><span class="nv"> </span><span class="s">in</span><span class="nv"> </span><span class="s">nature.</span><span class="nv"> </span><span class="s">Like</span><span class="nv"> </span><span class="s">summarization,</span><span class="nv"> </span><span class="s">information</span><span class="nv"> </span><span class="s">extraction,</span><span class="nv"> </span><span class="s">etc."</span>
</span><span id="line-95"><span class="linenos"> 95</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-96"><span class="linenos"> 96</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
</span><span id="line-97"><span class="linenos"> 97</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/summary"</span>
</span><span id="line-98"><span class="linenos"> 98</span><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
</span><span id="line-99"><span class="linenos"> 99</span><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-100"><span class="linenos">100</span><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
</span><span id="line-101"><span class="linenos">101</span><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
</span><span id="line-102"><span class="linenos">102</span><span class="w"> </span><span class="no">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
</span><span id="line-103"><span class="linenos">103</span>
</span><span id="line-104"><span class="linenos">104</span><span class="nt">error_target</span><span class="p">:</span>
</span><span id="line-105"><span class="linenos">105</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-106"><span class="linenos">106</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
</span><span id="line-107"><span class="linenos">107</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
</span><span id="line-108"><span class="linenos">108</span>
</span><span id="line-109"><span class="linenos">109</span><span class="nt">tracing</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">100</span><span class="w"> </span><span class="c1">#sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.</span>
</span><span id="line-79"><span class="linenos"> 79</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/summary</span>
</span><span id="line-80"><span class="linenos"> 80</span><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
</span><span id="line-81"><span class="linenos"> 81</span><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-82"><span class="linenos"> 82</span><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
</span><span id="line-83"><span class="linenos"> 83</span><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
</span><span id="line-84"><span class="linenos"> 84</span>
</span><span id="line-85"><span class="linenos"> 85</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_network_device</span>
</span><span id="line-86"><span class="linenos"> 86</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Reboot a specific network device</span>
</span><span id="line-87"><span class="linenos"> 87</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-88"><span class="linenos"> 88</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
</span><span id="line-89"><span class="linenos"> 89</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/action</span>
</span><span id="line-90"><span class="linenos"> 90</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
</span><span id="line-91"><span class="linenos"> 91</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_id</span>
</span><span id="line-92"><span class="linenos"> 92</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">str</span>
</span><span id="line-93"><span class="linenos"> 93</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Identifier of the network device to reboot.</span>
</span><span id="line-94"><span class="linenos"> 94</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-95"><span class="linenos"> 95</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">confirmation</span>
</span><span id="line-96"><span class="linenos"> 96</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">bool</span>
</span><span id="line-97"><span class="linenos"> 97</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Confirmation flag to proceed with reboot.</span>
</span><span id="line-98"><span class="linenos"> 98</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</span><span id="line-99"><span class="linenos"> 99</span><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">true</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">false</span><span class="p p-Indicator">]</span>
</span><span id="line-100"><span class="linenos">100</span>
</span><span id="line-101"><span class="linenos">101</span><span class="nt">error_target</span><span class="p">:</span>
</span><span id="line-102"><span class="linenos">102</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
</span><span id="line-103"><span class="linenos">103</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
</span><span id="line-104"><span class="linenos">104</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
</span><span id="line-105"><span class="linenos">105</span>
</span><span id="line-106"><span class="linenos">106</span><span class="nt">tracing</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">100</span><span class="w"> </span><span class="c1">#sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.</span>
</span></code></pre></div>
</div>
</div>
@ -282,14 +278,6 @@ that they can spend more of their time in building features unique to their AI e
RAG Application
</a>
</div>
<div class="ml-auto">
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="error_target.html">
Error Targets
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
</a>
</div>
</div></div>
</main>
</div>

View file

@ -135,9 +135,10 @@
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/terminology.html">Terminology</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/threading_model.html">Threading Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/listener.html">Listener</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/prompt.html">Prompts</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/model_serving.html">Model Serving</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/prompt.html">Prompt</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/error_target.html">Error Target</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="concepts/llm_provider.html">LLM Provider</a></li>
@ -162,7 +163,6 @@
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="resources/configuration_reference.html">Configuration Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="resources/error_target.html">Error Targets</a></li>
</ul>
</nav>

File diff suppressed because one or more lines are too long

View file

@ -1,2 +1,2 @@
<?xml version='1.0' encoding='utf-8'?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url><loc>./docsbuild_with_arch/agent.html</loc></url><url><loc>./docsbuild_with_arch/rag.html</loc></url><url><loc>./docsconcepts/llm_provider.html</loc></url><url><loc>./docsconcepts/prompt_target.html</loc></url><url><loc>./docsconcepts/tech_overview/listener.html</loc></url><url><loc>./docsconcepts/tech_overview/model_serving.html</loc></url><url><loc>./docsconcepts/tech_overview/prompt.html</loc></url><url><loc>./docsconcepts/tech_overview/request_lifecycle.html</loc></url><url><loc>./docsconcepts/tech_overview/tech_overview.html</loc></url><url><loc>./docsconcepts/tech_overview/terminology.html</loc></url><url><loc>./docsconcepts/tech_overview/threading_model.html</loc></url><url><loc>./docsget_started/intro_to_arch.html</loc></url><url><loc>./docsget_started/overview.html</loc></url><url><loc>./docsget_started/quickstart.html</loc></url><url><loc>./docsguides/function_calling.html</loc></url><url><loc>./docsguides/observability/access_logging.html</loc></url><url><loc>./docsguides/observability/monitoring.html</loc></url><url><loc>./docsguides/observability/observability.html</loc></url><url><loc>./docsguides/observability/tracing.html</loc></url><url><loc>./docsguides/prompt_guard.html</loc></url><url><loc>./docsindex.html</loc></url><url><loc>./docsresources/configuration_reference.html</loc></url><url><loc>./docsresources/error_target.html</loc></url><url><loc>./docssearch.html</loc></url></urlset>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url><loc>./docsbuild_with_arch/agent.html</loc></url><url><loc>./docsbuild_with_arch/rag.html</loc></url><url><loc>./docsconcepts/llm_provider.html</loc></url><url><loc>./docsconcepts/prompt_target.html</loc></url><url><loc>./docsconcepts/tech_overview/error_target.html</loc></url><url><loc>./docsconcepts/tech_overview/listener.html</loc></url><url><loc>./docsconcepts/tech_overview/model_serving.html</loc></url><url><loc>./docsconcepts/tech_overview/prompt.html</loc></url><url><loc>./docsconcepts/tech_overview/request_lifecycle.html</loc></url><url><loc>./docsconcepts/tech_overview/tech_overview.html</loc></url><url><loc>./docsconcepts/tech_overview/terminology.html</loc></url><url><loc>./docsconcepts/tech_overview/threading_model.html</loc></url><url><loc>./docsget_started/intro_to_arch.html</loc></url><url><loc>./docsget_started/overview.html</loc></url><url><loc>./docsget_started/quickstart.html</loc></url><url><loc>./docsguides/function_calling.html</loc></url><url><loc>./docsguides/observability/access_logging.html</loc></url><url><loc>./docsguides/observability/monitoring.html</loc></url><url><loc>./docsguides/observability/observability.html</loc></url><url><loc>./docsguides/observability/tracing.html</loc></url><url><loc>./docsguides/prompt_guard.html</loc></url><url><loc>./docsindex.html</loc></url><url><loc>./docsresources/configuration_reference.html</loc></url><url><loc>./docssearch.html</loc></url></urlset>