mirror of
https://github.com/katanemo/plano.git
synced 2026-05-03 04:42:49 +02:00
deploy: b30ad791f7
This commit is contained in:
parent
f4b686c7fc
commit
3e881c6eec
28 changed files with 819 additions and 820 deletions
|
|
@ -1,4 +1,4 @@
|
|||
version: "0.1-beta"
|
||||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0 # or 127.0.0.1
|
||||
|
|
@ -8,9 +8,9 @@ listener:
|
|||
common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
|
||||
tls_certificates:
|
||||
- certificate_chain:
|
||||
filename: "/etc/certs/cert.pem"
|
||||
filename: /etc/certs/cert.pem
|
||||
private_key:
|
||||
filename: "/etc/certs/key.pem"
|
||||
filename: /etc/certs/key.pem
|
||||
|
||||
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||
endpoints:
|
||||
|
|
@ -18,42 +18,42 @@ endpoints:
|
|||
# value could be ip address or a hostname with port
|
||||
# this could also be a list of endpoints for load balancing
|
||||
# for example endpoint: [ ip1:port, ip2:port ]
|
||||
endpoint: "127.0.0.1:80"
|
||||
endpoint: 127.0.0.1:80
|
||||
# max time to wait for a connection to be established
|
||||
connect_timeout: 0.005s
|
||||
|
||||
mistral_local:
|
||||
endpoint: "127.0.0.1:8001"
|
||||
endpoint: 127.0.0.1:8001
|
||||
|
||||
error_target:
|
||||
endpoint: "error_target_1"
|
||||
endpoint: error_target_1
|
||||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
- name: "OpenAI"
|
||||
provider: "openai"
|
||||
access_key: $OPENAI_API_KEY
|
||||
- name: OpenAI
|
||||
provider: openai
|
||||
access_key: OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
default: true
|
||||
stream: true
|
||||
rate_limits:
|
||||
selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
|
||||
http_header:
|
||||
name: "Authorization"
|
||||
name: Authorization
|
||||
value: "" # Empty value means each separate value has a separate limit
|
||||
limit:
|
||||
tokens: 100000 # Tokens per unit
|
||||
unit: "minute"
|
||||
unit: minute
|
||||
|
||||
- name: "Mistral8x7b"
|
||||
provider: "mistral"
|
||||
access_key: $MISTRAL_API_KEY
|
||||
model: "mistral-8x7b"
|
||||
- name: Mistral8x7b
|
||||
provider: mistral
|
||||
access_key: MISTRAL_API_KEY
|
||||
model: mistral-8x7b
|
||||
|
||||
- name: "MistralLocal7b"
|
||||
provider: "local"
|
||||
model: "mistral-7b-instruct"
|
||||
endpoint: "mistral_local"
|
||||
- name: MistralLocal7b
|
||||
provider: local
|
||||
model: mistral-7b-instruct
|
||||
endpoint: mistral_local
|
||||
|
||||
# provides a way to override default settings for the arch system
|
||||
overrides:
|
||||
|
|
@ -62,44 +62,41 @@ overrides:
|
|||
prompt_target_intent_matching_threshold: 0.60
|
||||
|
||||
# default system prompt used by all prompt targets
|
||||
system_prompt: |
|
||||
You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
|
||||
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
|
||||
|
||||
prompt_guards:
|
||||
input_guards:
|
||||
jailbreak:
|
||||
on_exception:
|
||||
message: "Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters."
|
||||
message: Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.
|
||||
|
||||
prompt_targets:
|
||||
- name: "reboot_network_device"
|
||||
description: "Helps network operators perform device operations like rebooting a device."
|
||||
endpoint:
|
||||
name: app_server
|
||||
path: "/agent/action"
|
||||
parameters:
|
||||
- name: "device_id"
|
||||
# additional type options include: int | float | bool | string | list | dict
|
||||
type: "string"
|
||||
description: "Identifier of the network device to reboot."
|
||||
required: true
|
||||
- name: "confirmation"
|
||||
type: "string"
|
||||
description: "Confirmation flag to proceed with reboot."
|
||||
default: "no"
|
||||
enum: [yes, no]
|
||||
|
||||
- name: "information_extraction"
|
||||
- name: information_extraction
|
||||
default: true
|
||||
description: "This prompt handles all scenarios that are question and answer in nature. Like summarization, information extraction, etc."
|
||||
description: handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.
|
||||
endpoint:
|
||||
name: app_server
|
||||
path: "/agent/summary"
|
||||
path: /agent/summary
|
||||
# Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM
|
||||
auto_llm_dispatch_on_response: true
|
||||
# override system prompt for this prompt target
|
||||
system_prompt: |
|
||||
You are a helpful information extraction assistant. Use the information that is provided to you.
|
||||
system_prompt: You are a helpful information extraction assistant. Use the information that is provided to you.
|
||||
|
||||
- name: reboot_network_device
|
||||
description: Reboot a specific network device
|
||||
endpoint:
|
||||
name: app_server
|
||||
path: /agent/action
|
||||
parameters:
|
||||
- name: device_id
|
||||
type: str
|
||||
description: Identifier of the network device to reboot.
|
||||
required: true
|
||||
- name: confirmation
|
||||
type: bool
|
||||
description: Confirmation flag to proceed with reboot.
|
||||
default: false
|
||||
enum: [true, false]
|
||||
|
||||
error_target:
|
||||
endpoint:
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -160,7 +160,7 @@ you have the flexibility to support “agentic” apps tailored to specific use
|
|||
claims to creating ad campaigns - via prompts.</p>
|
||||
<p>Arch analyzes prompts, extracts critical information from prompts, engages in lightweight conversation with
|
||||
the user to gather any missing parameters and makes API calls so that you can focus on writing business logic.
|
||||
Arch does this via its purpose-built <a class="reference internal" href="../guides/function_calling.html#function-calling"><span class="std std-ref">Arch-FC LLM</span></a> - the fastest (200ms p90 - 10x faser than GPT-4o)
|
||||
Arch does this via its purpose-built <a class="reference internal" href="../guides/function_calling.html#function-calling"><span class="std std-ref">Arch-Function</span></a> - the fastest (200ms p90 - 10x faser than GPT-4o)
|
||||
and cheapest (100x than GPT-40) function-calling LLM that matches performance with frontier models.</p>
|
||||
<a class="reference internal image-reference" href="../_images/function-calling-flow.jpg"><img alt="../_images/function-calling-flow.jpg" class="align-center" src="../_images/function-calling-flow.jpg" style="width: 100%;"/>
|
||||
</a>
|
||||
|
|
@ -169,175 +169,171 @@ and cheapest (100x than GPT-40) function-calling LLM that matches performance wi
|
|||
<p>In the most common scenario, users will request a single action via prompts, and Arch efficiently processes the
|
||||
request by extracting relevant parameters, validating the input, and calling the designated function or API. Here
|
||||
is how you would go about enabling this scenario with Arch:</p>
|
||||
<section id="step-1-define-prompt-targets-with-functions">
|
||||
<h3>Step 1: Define prompt targets with functions<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-1-define-prompt-targets-with-functions" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-1-define-prompt-targets-with-functions'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<div class="literal-block-wrapper docutils container" id="id1">
|
||||
<div class="code-block-caption"><span class="caption-text">Define prompt targets that can enable users to engage with API and backened functions of an app</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span><span class="nt">listen</span><span class="p">:</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
|
||||
</span><span id="line-5"><span class="linenos"> 5</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
<section id="step-1-define-prompt-targets">
|
||||
<h3>Step 1: Define Prompt Targets<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-1-define-prompt-targets" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-1-define-prompt-targets'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<div class="literal-block-wrapper docutils container" id="id2">
|
||||
<div class="code-block-caption"><span class="caption-text">Prompt Target Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listen</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
|
||||
</span><span id="line-5"><span class="linenos"> 5</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span><span class="w"> </span><span class="c1"># Defines how Arch should parse the content from application/json or text/pain Content-type in the http request</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">huggingface</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span>
|
||||
</span><span id="line-16"><mark><span class="linenos">16</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</mark></span><span id="line-17"><mark><span class="linenos">17</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_devices</span>
|
||||
</mark></span><span id="line-18"><mark><span class="linenos">18</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">></span>
|
||||
</mark></span><span id="line-19"><mark><span class="linenos">19</span><span class="w"> </span><span class="no">This prompt target handles user requests to reboot devices.</span>
|
||||
</mark></span><span id="line-20"><mark><span class="linenos">20</span><span class="w"> </span><span class="no">It ensures that when users request to reboot specific devices or device groups, the system processes the reboot commands accurately.</span>
|
||||
</mark></span><span id="line-21"><mark><span class="linenos">21</span>
|
||||
</mark></span><span id="line-22"><mark><span class="linenos">22</span><span class="w"> </span><span class="no">**Examples of user prompts:**</span>
|
||||
</mark></span><span id="line-23"><mark><span class="linenos">23</span>
|
||||
</mark></span><span id="line-24"><mark><span class="linenos">24</span><span class="w"> </span><span class="no">- "Please reboot device 12345."</span>
|
||||
</mark></span><span id="line-25"><mark><span class="linenos">25</span><span class="w"> </span><span class="no">- "Restart all devices in tenant group tenant-XYZ</span>
|
||||
</mark></span><span id="line-26"><mark><span class="linenos">26</span><span class="w"> </span><span class="no">- "I need to reboot devices A, B, and C."</span>
|
||||
</mark></span><span id="line-27"><mark><span class="linenos">27</span>
|
||||
</mark></span><span id="line-28"><mark><span class="linenos">28</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_reboot</span>
|
||||
</mark></span><span id="line-29"><mark><span class="linenos">29</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</mark></span><span id="line-30"><mark><span class="linenos">30</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_ids"</span>
|
||||
</mark></span><span id="line-31"><mark><span class="linenos">31</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span><span class="w"> </span><span class="c1"># Options: integer | float | list | dictionary | set</span>
|
||||
</mark></span><span id="line-32"><mark><span class="linenos">32</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"A</span><span class="nv"> </span><span class="s">list</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">identifiers</span><span class="nv"> </span><span class="s">(IDs)</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</mark></span><span id="line-33"><mark><span class="linenos">33</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</mark></span><span id="line-34"><mark><span class="linenos">34</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_group"</span>
|
||||
</mark></span><span id="line-35"><mark><span class="linenos">35</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">string</span><span class="w"> </span><span class="c1"># Options: string | integer | float | list | dictionary | set</span>
|
||||
</mark></span><span id="line-36"><mark><span class="linenos">36</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"The</span><span class="nv"> </span><span class="s">name</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">group</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</mark></span><span id="line-37"><mark><span class="linenos">37</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</mark></span><span id="line-38"><span class="linenos">38</span>
|
||||
</span><span id="line-39"><span class="linenos">39</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-40"><span class="linenos">40</span><span class="nt">endpoints</span><span class="p">:</span>
|
||||
</span><span id="line-41"><span class="linenos">41</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||||
</span><span id="line-42"><span class="linenos">42</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-43"><span class="linenos">43</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||||
</span><span id="line-44"><span class="linenos">44</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-45"><span class="linenos">45</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
|
||||
</span><span id="line-46"><span class="linenos">46</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-47"><span class="linenos">47</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span><span class="c1"># default system prompt used by all prompt targets</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span>
|
||||
</span><span id="line-21"><mark><span class="linenos">21</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</mark></span><span id="line-22"><mark><span class="linenos">22</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_devices</span>
|
||||
</mark></span><span id="line-23"><mark><span class="linenos">23</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Reboot specific devices or device groups</span>
|
||||
</mark></span><span id="line-24"><mark><span class="linenos">24</span>
|
||||
</mark></span><span id="line-25"><mark><span class="linenos">25</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_reboot</span>
|
||||
</mark></span><span id="line-26"><mark><span class="linenos">26</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</mark></span><span id="line-27"><mark><span class="linenos">27</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_ids</span>
|
||||
</mark></span><span id="line-28"><mark><span class="linenos">28</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span>
|
||||
</mark></span><span id="line-29"><mark><span class="linenos">29</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">A list of device identifiers (IDs) to reboot.</span>
|
||||
</mark></span><span id="line-30"><mark><span class="linenos">30</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</mark></span><span id="line-31"><mark><span class="linenos">31</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_group</span>
|
||||
</mark></span><span id="line-32"><mark><span class="linenos">32</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">str</span>
|
||||
</mark></span><span id="line-33"><mark><span class="linenos">33</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">The name of the device group to reboot</span>
|
||||
</mark></span><span id="line-34"><mark><span class="linenos">34</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</mark></span><span id="line-35"><span class="linenos">35</span>
|
||||
</span><span id="line-36"><span class="linenos">36</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-37"><span class="linenos">37</span><span class="nt">endpoints</span><span class="p">:</span>
|
||||
</span><span id="line-38"><span class="linenos">38</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||||
</span><span id="line-39"><span class="linenos">39</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-40"><span class="linenos">40</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||||
</span><span id="line-41"><span class="linenos">41</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-42"><span class="linenos">42</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
|
||||
</span><span id="line-43"><span class="linenos">43</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-44"><span class="linenos">44</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="step-2-process-request-parameters-in-flask">
|
||||
<h3>Step 2: Process request parameters in Flask<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-process-request-parameters-in-flask" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-process-request-parameters-in-flask'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<section id="step-2-process-request-parameters">
|
||||
<h3>Step 2: Process Request Parameters<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-process-request-parameters" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-process-request-parameters'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Once the prompt targets are configured as above, handling those parameters is</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id2">
|
||||
<div class="code-block-caption"><span class="caption-text">Parameter handling with Flask</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="literal-block-wrapper docutils container" id="id3">
|
||||
<div class="code-block-caption"><span class="caption-text">Parameter handling with Flask</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="kn">from</span> <span class="nn">flask</span> <span class="kn">import</span> <span class="n">Flask</span><span class="p">,</span> <span class="n">request</span><span class="p">,</span> <span class="n">jsonify</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="n">app</span> <span class="o">=</span> <span class="n">Flask</span><span class="p">(</span><span class="vm">__name__</span><span class="p">)</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span>
|
||||
</span><span id="line-5"><span class="linenos"> 5</span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s1">'/agent/device_summary'</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s1">'POST'</span><span class="p">])</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span><span class="k">def</span> <span class="nf">get_device_summary</span><span class="p">():</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="sd">"""</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span><span class="sd"> Endpoint to retrieve device statistics based on device IDs and an optional time range.</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="sd"> """</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span> <span class="n">data</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">get_json</span><span class="p">()</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span> <span class="c1"># Validate 'device_ids' parameter</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span> <span class="n">device_ids</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'device_ids'</span><span class="p">)</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">device_ids</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">device_ids</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s2">"'device_ids' parameter is required and must be a list"</span><span class="p">}),</span> <span class="mi">400</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span> <span class="c1"># Validate 'time_range' parameter (optional, defaults to 7)</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span> <span class="n">time_range</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'time_range'</span><span class="p">,</span> <span class="mi">7</span><span class="p">)</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">time_range</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s2">"'time_range' must be an integer"</span><span class="p">}),</span> <span class="mi">400</span>
|
||||
</span><span id="line-21"><span class="linenos">21</span>
|
||||
</span><span id="line-22"><span class="linenos">22</span> <span class="c1"># Simulate retrieving statistics for the given device IDs and time range</span>
|
||||
</span><span id="line-23"><span class="linenos">23</span> <span class="c1"># In a real application, you would query your database or external service here</span>
|
||||
</span><span id="line-24"><span class="linenos">24</span> <span class="n">statistics</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
</span><span id="line-25"><span class="linenos">25</span> <span class="k">for</span> <span class="n">device_id</span> <span class="ow">in</span> <span class="n">device_ids</span><span class="p">:</span>
|
||||
</span><span id="line-26"><span class="linenos">26</span> <span class="c1"># Placeholder for actual data retrieval</span>
|
||||
</span><span id="line-27"><span class="linenos">27</span> <span class="n">stats</span> <span class="o">=</span> <span class="p">{</span>
|
||||
</span><span id="line-28"><span class="linenos">28</span> <span class="s1">'device_id'</span><span class="p">:</span> <span class="n">device_id</span><span class="p">,</span>
|
||||
</span><span id="line-29"><span class="linenos">29</span> <span class="s1">'time_range'</span><span class="p">:</span> <span class="sa">f</span><span class="s1">'Last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s1"> days'</span><span class="p">,</span>
|
||||
</span><span id="line-30"><span class="linenos">30</span> <span class="s1">'data'</span><span class="p">:</span> <span class="sa">f</span><span class="s1">'Statistics data for device </span><span class="si">{</span><span class="n">device_id</span><span class="si">}</span><span class="s1"> over the last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s1"> days.'</span>
|
||||
</span><span id="line-31"><span class="linenos">31</span> <span class="p">}</span>
|
||||
</span><span id="line-32"><span class="linenos">32</span> <span class="n">statistics</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
|
||||
</span><span id="line-33"><span class="linenos">33</span>
|
||||
</span><span id="line-34"><span class="linenos">34</span> <span class="n">response</span> <span class="o">=</span> <span class="p">{</span>
|
||||
</span><span id="line-35"><span class="linenos">35</span> <span class="s1">'statistics'</span><span class="p">:</span> <span class="n">statistics</span>
|
||||
</span><span id="line-36"><span class="linenos">36</span> <span class="p">}</span>
|
||||
</span><span id="line-37"><span class="linenos">37</span>
|
||||
</span><span id="line-38"><span class="linenos">38</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span><span class="n">response</span><span class="p">),</span> <span class="mi">200</span>
|
||||
</span><span id="line-39"><span class="linenos">39</span>
|
||||
</span><span id="line-40"><span class="linenos">40</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
</span><span id="line-41"><span class="linenos">41</span> <span class="n">app</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">debug</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
</span><span id="line-5"><span class="linenos"> 5</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s2">"/agent/device_summary"</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s2">"POST"</span><span class="p">])</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span><span class="k">def</span> <span class="nf">get_device_summary</span><span class="p">():</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span><span class="w"> </span><span class="sd">"""</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="sd"> Endpoint to retrieve device statistics based on device IDs and an optional time range.</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span><span class="sd"> """</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span> <span class="n">data</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">get_json</span><span class="p">()</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span> <span class="c1"># Validate 'device_ids' parameter</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span> <span class="n">device_ids</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"device_ids"</span><span class="p">)</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">device_ids</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">device_ids</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span> <span class="p">{</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"'device_ids' parameter is required and must be a list"</span><span class="p">}</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span> <span class="p">),</span> <span class="mi">400</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span> <span class="c1"># Validate 'time_range' parameter (optional, defaults to 7)</span>
|
||||
</span><span id="line-21"><span class="linenos">21</span> <span class="n">time_range</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"time_range"</span><span class="p">,</span> <span class="mi">7</span><span class="p">)</span>
|
||||
</span><span id="line-22"><span class="linenos">22</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">time_range</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
||||
</span><span id="line-23"><span class="linenos">23</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"'time_range' must be an integer"</span><span class="p">}),</span> <span class="mi">400</span>
|
||||
</span><span id="line-24"><span class="linenos">24</span>
|
||||
</span><span id="line-25"><span class="linenos">25</span> <span class="c1"># Simulate retrieving statistics for the given device IDs and time range</span>
|
||||
</span><span id="line-26"><span class="linenos">26</span> <span class="c1"># In a real application, you would query your database or external service here</span>
|
||||
</span><span id="line-27"><span class="linenos">27</span> <span class="n">statistics</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
</span><span id="line-28"><span class="linenos">28</span> <span class="k">for</span> <span class="n">device_id</span> <span class="ow">in</span> <span class="n">device_ids</span><span class="p">:</span>
|
||||
</span><span id="line-29"><span class="linenos">29</span> <span class="c1"># Placeholder for actual data retrieval</span>
|
||||
</span><span id="line-30"><span class="linenos">30</span> <span class="n">stats</span> <span class="o">=</span> <span class="p">{</span>
|
||||
</span><span id="line-31"><span class="linenos">31</span> <span class="s2">"device_id"</span><span class="p">:</span> <span class="n">device_id</span><span class="p">,</span>
|
||||
</span><span id="line-32"><span class="linenos">32</span> <span class="s2">"time_range"</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"Last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s2"> days"</span><span class="p">,</span>
|
||||
</span><span id="line-33"><span class="linenos">33</span> <span class="s2">"data"</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"Statistics data for device </span><span class="si">{</span><span class="n">device_id</span><span class="si">}</span><span class="s2"> over the last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s2"> days."</span><span class="p">,</span>
|
||||
</span><span id="line-34"><span class="linenos">34</span> <span class="p">}</span>
|
||||
</span><span id="line-35"><span class="linenos">35</span> <span class="n">statistics</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
|
||||
</span><span id="line-36"><span class="linenos">36</span>
|
||||
</span><span id="line-37"><span class="linenos">37</span> <span class="n">response</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"statistics"</span><span class="p">:</span> <span class="n">statistics</span><span class="p">}</span>
|
||||
</span><span id="line-38"><span class="linenos">38</span>
|
||||
</span><span id="line-39"><span class="linenos">39</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span><span class="n">response</span><span class="p">),</span> <span class="mi">200</span>
|
||||
</span><span id="line-40"><span class="linenos">40</span>
|
||||
</span><span id="line-41"><span class="linenos">41</span>
|
||||
</span><span id="line-42"><span class="linenos">42</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span>
|
||||
</span><span id="line-43"><span class="linenos">43</span> <span class="n">app</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">debug</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="parallel-multiple-function-calling">
|
||||
<h2>Parallel/ Multiple Function Calling<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#parallel-multiple-function-calling" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#parallel-multiple-function-calling'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<h2>Parallel & Multiple Function Calling<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#parallel-multiple-function-calling" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#parallel-multiple-function-calling'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>In more complex use cases, users may request multiple actions or need multiple APIs/functions to be called
|
||||
simultaneously or sequentially. With Arch, you can handle these scenarios efficiently using parallel or multiple
|
||||
function calling. This allows your application to engage in a broader range of interactions, such as updating
|
||||
different datasets, triggering events across systems, or collecting results from multiple services in one prompt.</p>
|
||||
<p>Arch-FC1B is built to manage these parallel tasks efficiently, ensuring low latency and high throughput, even
|
||||
when multiple functions are invoked. It provides two mechanisms to handle these cases:</p>
|
||||
<section id="step-1-define-multiple-function-targets">
|
||||
<h3>Step 1: Define Multiple Function Targets<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-1-define-multiple-function-targets" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-1-define-multiple-function-targets'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<section id="id1">
|
||||
<h3>Step 1: Define Prompt Targets<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#id1'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>When enabling multiple function calling, define the prompt targets in a way that supports multiple functions or
|
||||
API calls based on the user’s prompt. These targets can be triggered in parallel or sequentially, depending on
|
||||
the user’s intent.</p>
|
||||
<p>Example of Multiple Prompt Targets in YAML:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id3">
|
||||
<div class="code-block-caption"><span class="caption-text">Define prompt targets that can enable users to engage with API and backened functions of an app</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span><span class="nt">listen</span><span class="p">:</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
|
||||
</span><span id="line-5"><span class="linenos"> 5</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
<div class="literal-block-wrapper docutils container" id="id4">
|
||||
<div class="code-block-caption"><span class="caption-text">Prompt Target Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id4"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listen</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
|
||||
</span><span id="line-5"><span class="linenos"> 5</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span><span class="w"> </span><span class="c1"># Defines how Arch should parse the content from application/json or text/pain Content-type in the http request</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">huggingface</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span>
|
||||
</span><span id="line-16"><mark><span class="linenos">16</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</mark></span><span id="line-17"><mark><span class="linenos">17</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_devices</span>
|
||||
</mark></span><span id="line-18"><mark><span class="linenos">18</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">></span>
|
||||
</mark></span><span id="line-19"><mark><span class="linenos">19</span><span class="w"> </span><span class="no">This prompt target handles user requests to reboot devices.</span>
|
||||
</mark></span><span id="line-20"><mark><span class="linenos">20</span><span class="w"> </span><span class="no">It ensures that when users request to reboot specific devices or device groups, the system processes the reboot commands accurately.</span>
|
||||
</mark></span><span id="line-21"><mark><span class="linenos">21</span>
|
||||
</mark></span><span id="line-22"><mark><span class="linenos">22</span><span class="w"> </span><span class="no">**Examples of user prompts:**</span>
|
||||
</mark></span><span id="line-23"><mark><span class="linenos">23</span>
|
||||
</mark></span><span id="line-24"><mark><span class="linenos">24</span><span class="w"> </span><span class="no">- "Please reboot device 12345."</span>
|
||||
</mark></span><span id="line-25"><mark><span class="linenos">25</span><span class="w"> </span><span class="no">- "Restart all devices in tenant group tenant-XYZ</span>
|
||||
</mark></span><span id="line-26"><mark><span class="linenos">26</span><span class="w"> </span><span class="no">- "I need to reboot devices A, B, and C."</span>
|
||||
</mark></span><span id="line-27"><mark><span class="linenos">27</span>
|
||||
</mark></span><span id="line-28"><mark><span class="linenos">28</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_reboot</span>
|
||||
</mark></span><span id="line-29"><mark><span class="linenos">29</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</mark></span><span id="line-30"><mark><span class="linenos">30</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_ids"</span>
|
||||
</mark></span><span id="line-31"><mark><span class="linenos">31</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span><span class="w"> </span><span class="c1"># Options: integer | float | list | dictionary | set</span>
|
||||
</mark></span><span id="line-32"><mark><span class="linenos">32</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"A</span><span class="nv"> </span><span class="s">list</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">identifiers</span><span class="nv"> </span><span class="s">(IDs)</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</mark></span><span id="line-33"><mark><span class="linenos">33</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</mark></span><span id="line-34"><mark><span class="linenos">34</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_group"</span>
|
||||
</mark></span><span id="line-35"><mark><span class="linenos">35</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">string</span><span class="w"> </span><span class="c1"># Options: string | integer | float | list | dictionary | set</span>
|
||||
</mark></span><span id="line-36"><mark><span class="linenos">36</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"The</span><span class="nv"> </span><span class="s">name</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">group</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</mark></span><span id="line-37"><mark><span class="linenos">37</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</mark></span><span id="line-38"><span class="linenos">38</span>
|
||||
</span><span id="line-39"><span class="linenos">39</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-40"><span class="linenos">40</span><span class="nt">endpoints</span><span class="p">:</span>
|
||||
</span><span id="line-41"><span class="linenos">41</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||||
</span><span id="line-42"><span class="linenos">42</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-43"><span class="linenos">43</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||||
</span><span id="line-44"><span class="linenos">44</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-45"><span class="linenos">45</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
|
||||
</span><span id="line-46"><span class="linenos">46</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-47"><span class="linenos">47</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span><span class="c1"># default system prompt used by all prompt targets</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span>
|
||||
</span><span id="line-21"><mark><span class="linenos">21</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</mark></span><span id="line-22"><mark><span class="linenos">22</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_devices</span>
|
||||
</mark></span><span id="line-23"><mark><span class="linenos">23</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Reboot specific devices or device groups</span>
|
||||
</mark></span><span id="line-24"><mark><span class="linenos">24</span>
|
||||
</mark></span><span id="line-25"><mark><span class="linenos">25</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_reboot</span>
|
||||
</mark></span><span id="line-26"><mark><span class="linenos">26</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</mark></span><span id="line-27"><mark><span class="linenos">27</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_ids</span>
|
||||
</mark></span><span id="line-28"><mark><span class="linenos">28</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span>
|
||||
</mark></span><span id="line-29"><mark><span class="linenos">29</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">A list of device identifiers (IDs) to reboot.</span>
|
||||
</mark></span><span id="line-30"><mark><span class="linenos">30</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</mark></span><span id="line-31"><mark><span class="linenos">31</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_group</span>
|
||||
</mark></span><span id="line-32"><mark><span class="linenos">32</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">str</span>
|
||||
</mark></span><span id="line-33"><mark><span class="linenos">33</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">The name of the device group to reboot</span>
|
||||
</mark></span><span id="line-34"><mark><span class="linenos">34</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</mark></span><span id="line-35"><span class="linenos">35</span>
|
||||
</span><span id="line-36"><span class="linenos">36</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-37"><span class="linenos">37</span><span class="nt">endpoints</span><span class="p">:</span>
|
||||
</span><span id="line-38"><span class="linenos">38</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||||
</span><span id="line-39"><span class="linenos">39</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-40"><span class="linenos">40</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||||
</span><span id="line-41"><span class="linenos">41</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-42"><span class="linenos">42</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
|
||||
</span><span id="line-43"><span class="linenos">43</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-44"><span class="linenos">44</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -365,12 +361,12 @@ the user’s intent.</p>
|
|||
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
|
||||
<ul>
|
||||
<li><a :data-current="activeSection === '#single-function-call'" class="reference internal" href="#single-function-call">Single Function Call</a><ul>
|
||||
<li><a :data-current="activeSection === '#step-1-define-prompt-targets-with-functions'" class="reference internal" href="#step-1-define-prompt-targets-with-functions">Step 1: Define prompt targets with functions</a></li>
|
||||
<li><a :data-current="activeSection === '#step-2-process-request-parameters-in-flask'" class="reference internal" href="#step-2-process-request-parameters-in-flask">Step 2: Process request parameters in Flask</a></li>
|
||||
<li><a :data-current="activeSection === '#step-1-define-prompt-targets'" class="reference internal" href="#step-1-define-prompt-targets">Step 1: Define Prompt Targets</a></li>
|
||||
<li><a :data-current="activeSection === '#step-2-process-request-parameters'" class="reference internal" href="#step-2-process-request-parameters">Step 2: Process Request Parameters</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a :data-current="activeSection === '#parallel-multiple-function-calling'" class="reference internal" href="#parallel-multiple-function-calling">Parallel/ Multiple Function Calling</a><ul>
|
||||
<li><a :data-current="activeSection === '#step-1-define-multiple-function-targets'" class="reference internal" href="#step-1-define-multiple-function-targets">Step 1: Define Multiple Function Targets</a></li>
|
||||
<li><a :data-current="activeSection === '#parallel-multiple-function-calling'" class="reference internal" href="#parallel-multiple-function-calling">Parallel & Multiple Function Calling</a><ul>
|
||||
<li><a :data-current="activeSection === '#id1'" class="reference internal" href="#id1">Step 1: Define Prompt Targets</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -157,49 +157,72 @@
|
|||
Retrieval-Augmented Generation (RAG) applications.</p>
|
||||
<section id="intent-drift-detection">
|
||||
<h2>Intent-drift Detection<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#intent-drift-detection" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#intent-drift-detection'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>Developers struggle to handle <a class="reference external" href="https://www.reddit.com/r/ChatGPTPromptGenius/comments/17dzmpy/how_to_use_rag_with_conversation_history_for/?" rel="nofollow noopener">follow-up<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>
|
||||
or <a class="reference external" href="https://www.reddit.com/r/LocalLLaMA/comments/18mqwg6/best_practice_for_rag_with_followup_chat/" rel="nofollow noopener">clarifying<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>
|
||||
questions. Specifically, when users ask for changes or additions to previous responses their AI applications often
|
||||
generate entirely new responses instead of adjusting previous ones. Arch offers <em>intent-drift</em> tracking as a feature so
|
||||
that developers can know when the user has shifted away from a previous intent so that they can dramatically improve
|
||||
retrieval accuracy, lower overall token cost and improve the speed of their responses back to users.</p>
|
||||
<p>Developers struggle to handle <code class="docutils literal notranslate"><span class="pre">follow-up</span></code> or <code class="docutils literal notranslate"><span class="pre">clarification</span></code> questions.
|
||||
Specifically, when users ask for changes or additions to previous responses their AI applications often generate entirely new responses instead of adjusting previous ones.
|
||||
Arch offers <strong>intent-drift</strong> tracking as a feature so that developers can know when the user has shifted away from a previous intent so that they can dramatically improve retrieval accuracy, lower overall token cost and improve the speed of their responses back to users.</p>
|
||||
<p>Arch uses its built-in lightweight NLI and embedding models to know if the user has steered away from an active intent.
|
||||
Arch’s intent-drift detection mechanism is based on its’ <em>prompt_targets</em> primtive. Arch tries to match an incoming
|
||||
prompt to one of the <em>prompt_targets</em> configured in the gateway. Once it detects that the user has moved away from an active
|
||||
Arch’s intent-drift detection mechanism is based on its’ <a class="reference internal" href="../concepts/prompt_target.html#prompt-target"><span class="std std-ref">prompt_targets</span></a> primtive. Arch tries to match an incoming
|
||||
prompt to one of the prompt_targets configured in the gateway. Once it detects that the user has moved away from an active
|
||||
active intent, Arch adds the <code class="docutils literal notranslate"><span class="pre">x-arch-intent-drift</span></code> headers to the request before sending it your application servers.</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id1">
|
||||
<div class="code-block-caption"><span class="caption-text">Intent Detection Example</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s1">'/process_rag'</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s1">'POST'</span><span class="p">])</span>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s2">"/process_rag"</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s2">"POST"</span><span class="p">])</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span><span class="k">def</span> <span class="nf">process_rag</span><span class="p">():</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span> <span class="c1"># Extract JSON data from the request</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span> <span class="n">data</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">get_json</span><span class="p">()</span>
|
||||
</span><span id="line-5"><span class="linenos"> 5</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span> <span class="n">user_id</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'user_id'</span><span class="p">)</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span> <span class="n">user_id</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"user_id"</span><span class="p">)</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">user_id</span><span class="p">:</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s1">'User ID is required'</span><span class="p">}),</span> <span class="mi">400</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"User ID is required"</span><span class="p">}),</span> <span class="mi">400</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span> <span class="n">client_messages</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'messages'</span><span class="p">)</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span> <span class="n">client_messages</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"messages"</span><span class="p">)</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">client_messages</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">client_messages</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s1">'Messages array is required'</span><span class="p">}),</span> <span class="mi">400</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"Messages array is required"</span><span class="p">}),</span> <span class="mi">400</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span>
|
||||
</span><span id="line-14"><mark><span class="linenos">14</span> <span class="c1"># Extract the intent change marker from Arch's headers if present for the current prompt</span>
|
||||
</mark></span><span id="line-15"><mark><span class="linenos">15</span> <span class="n">intent_changed_header</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'x-arch-intent-marker'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
|
||||
</mark></span><span id="line-16"><mark><span class="linenos">16</span> <span class="k">if</span> <span class="n">intent_changed_header</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">''</span><span class="p">,</span> <span class="s1">'false'</span><span class="p">]:</span>
|
||||
</mark></span><span id="line-15"><mark><span class="linenos">15</span> <span class="n">intent_changed_header</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"x-arch-intent-marker"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
|
||||
</mark></span><span id="line-16"><mark><span class="linenos">16</span> <span class="k">if</span> <span class="n">intent_changed_header</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">""</span><span class="p">,</span> <span class="s2">"false"</span><span class="p">]:</span>
|
||||
</mark></span><span id="line-17"><mark><span class="linenos">17</span> <span class="n">intent_changed</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
</mark></span><span id="line-18"><mark><span class="linenos">18</span> <span class="k">elif</span> <span class="n">intent_changed_header</span> <span class="o">==</span> <span class="s1">'true'</span><span class="p">:</span>
|
||||
</mark></span><span id="line-18"><mark><span class="linenos">18</span> <span class="k">elif</span> <span class="n">intent_changed_header</span> <span class="o">==</span> <span class="s2">"true"</span><span class="p">:</span>
|
||||
</mark></span><span id="line-19"><mark><span class="linenos">19</span> <span class="n">intent_changed</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
</mark></span><span id="line-20"><mark><span class="linenos">20</span> <span class="k">else</span><span class="p">:</span>
|
||||
</mark></span><span id="line-21"><mark><span class="linenos">21</span> <span class="c1"># Invalid value provided</span>
|
||||
</mark></span><span id="line-22"><mark><span class="linenos">22</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s1">'Invalid value for x-arch-prompt-intent-change header'</span><span class="p">}),</span> <span class="mi">400</span>
|
||||
</mark></span><span id="line-23"><span class="linenos">23</span>
|
||||
</span><span id="line-24"><span class="linenos">24</span> <span class="c1"># Update user conversation based on intent change</span>
|
||||
</span><span id="line-25"><span class="linenos">25</span> <span class="n">memory</span> <span class="o">=</span> <span class="n">update_user_conversation</span><span class="p">(</span><span class="n">user_id</span><span class="p">,</span> <span class="n">client_messages</span><span class="p">,</span> <span class="n">intent_changed</span><span class="p">)</span>
|
||||
</span><span id="line-26"><span class="linenos">26</span>
|
||||
</span><span id="line-27"><span class="linenos">27</span> <span class="c1"># Retrieve messages since last intent change for LLM</span>
|
||||
</span><span id="line-28"><span class="linenos">28</span> <span class="n">messages_for_llm</span> <span class="o">=</span> <span class="n">get_messages_since_last_intent</span><span class="p">(</span><span class="n">memory</span><span class="o">.</span><span class="n">chat_memory</span><span class="o">.</span><span class="n">messages</span><span class="p">)</span>
|
||||
</span><span id="line-29"><span class="linenos">29</span>
|
||||
</span><span id="line-30"><span class="linenos">30</span> <span class="c1"># Forward messages to upstream LLM</span>
|
||||
</span><span id="line-31"><span class="linenos">31</span> <span class="n">llm_response</span> <span class="o">=</span> <span class="n">forward_to_llm</span><span class="p">(</span><span class="n">messages_for_llm</span><span class="p">)</span>
|
||||
</mark></span><span id="line-22"><mark><span class="linenos">22</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span>
|
||||
</mark></span><span id="line-23"><mark><span class="linenos">23</span> <span class="p">{</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"Invalid value for x-arch-prompt-intent-change header"</span><span class="p">}</span>
|
||||
</mark></span><span id="line-24"><mark><span class="linenos">24</span> <span class="p">),</span> <span class="mi">400</span>
|
||||
</mark></span><span id="line-25"><span class="linenos">25</span>
|
||||
</span><span id="line-26"><span class="linenos">26</span> <span class="c1"># Update user conversation based on intent change</span>
|
||||
</span><span id="line-27"><span class="linenos">27</span> <span class="n">memory</span> <span class="o">=</span> <span class="n">update_user_conversation</span><span class="p">(</span><span class="n">user_id</span><span class="p">,</span> <span class="n">client_messages</span><span class="p">,</span> <span class="n">intent_changed</span><span class="p">)</span>
|
||||
</span><span id="line-28"><span class="linenos">28</span>
|
||||
</span><span id="line-29"><span class="linenos">29</span> <span class="c1"># Retrieve messages since last intent change for LLM</span>
|
||||
</span><span id="line-30"><span class="linenos">30</span> <span class="n">messages_for_llm</span> <span class="o">=</span> <span class="n">get_messages_since_last_intent</span><span class="p">(</span><span class="n">memory</span><span class="o">.</span><span class="n">chat_memory</span><span class="o">.</span><span class="n">messages</span><span class="p">)</span>
|
||||
</span><span id="line-31"><span class="linenos">31</span>
|
||||
</span><span id="line-32"><span class="linenos">32</span> <span class="c1"># Forward messages to upstream LLM</span>
|
||||
</span><span id="line-33"><span class="linenos">33</span> <span class="n">llm_response</span> <span class="o">=</span> <span class="n">forward_to_llm</span><span class="p">(</span><span class="n">messages_for_llm</span><span class="p">)</span>
|
||||
</span><span id="line-34"><span class="linenos">34</span>
|
||||
</span><span id="line-35"><span class="linenos">35</span> <span class="c1"># Prepare the messages to return</span>
|
||||
</span><span id="line-36"><span class="linenos">36</span> <span class="n">messages_to_return</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
</span><span id="line-37"><span class="linenos">37</span> <span class="k">for</span> <span class="n">message</span> <span class="ow">in</span> <span class="n">memory</span><span class="o">.</span><span class="n">chat_memory</span><span class="o">.</span><span class="n">messages</span><span class="p">:</span>
|
||||
</span><span id="line-38"><span class="linenos">38</span> <span class="n">role</span> <span class="o">=</span> <span class="s2">"user"</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">message</span><span class="p">,</span> <span class="n">HumanMessage</span><span class="p">)</span> <span class="k">else</span> <span class="s2">"assistant"</span>
|
||||
</span><span id="line-39"><span class="linenos">39</span> <span class="n">content</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">content</span>
|
||||
</span><span id="line-40"><span class="linenos">40</span> <span class="n">metadata</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">additional_kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"metadata"</span><span class="p">,</span> <span class="p">{})</span>
|
||||
</span><span id="line-41"><span class="linenos">41</span> <span class="n">message_entry</span> <span class="o">=</span> <span class="p">{</span>
|
||||
</span><span id="line-42"><span class="linenos">42</span> <span class="s2">"uuid"</span><span class="p">:</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"uuid"</span><span class="p">),</span>
|
||||
</span><span id="line-43"><span class="linenos">43</span> <span class="s2">"timestamp"</span><span class="p">:</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"timestamp"</span><span class="p">),</span>
|
||||
</span><span id="line-44"><span class="linenos">44</span> <span class="s2">"role"</span><span class="p">:</span> <span class="n">role</span><span class="p">,</span>
|
||||
</span><span id="line-45"><span class="linenos">45</span> <span class="s2">"content"</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
|
||||
</span><span id="line-46"><span class="linenos">46</span> <span class="s2">"intent_changed"</span><span class="p">:</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"intent_changed"</span><span class="p">,</span> <span class="kc">False</span><span class="p">),</span>
|
||||
</span><span id="line-47"><span class="linenos">47</span> <span class="p">}</span>
|
||||
</span><span id="line-48"><span class="linenos">48</span> <span class="n">messages_to_return</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">message_entry</span><span class="p">)</span>
|
||||
</span><span id="line-49"><span class="linenos">49</span>
|
||||
</span><span id="line-50"><span class="linenos">50</span> <span class="c1"># Prepare the response</span>
|
||||
</span><span id="line-51"><span class="linenos">51</span> <span class="n">response</span> <span class="o">=</span> <span class="p">{</span>
|
||||
</span><span id="line-52"><span class="linenos">52</span> <span class="s2">"user_id"</span><span class="p">:</span> <span class="n">user_id</span><span class="p">,</span>
|
||||
</span><span id="line-53"><span class="linenos">53</span> <span class="s2">"messages"</span><span class="p">:</span> <span class="n">messages_to_return</span><span class="p">,</span>
|
||||
</span><span id="line-54"><span class="linenos">54</span> <span class="s2">"llm_response"</span><span class="p">:</span> <span class="n">llm_response</span><span class="p">,</span>
|
||||
</span><span id="line-55"><span class="linenos">55</span> <span class="p">}</span>
|
||||
</span><span id="line-56"><span class="linenos">56</span>
|
||||
</span><span id="line-57"><span class="linenos">57</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span><span class="n">response</span><span class="p">),</span> <span class="mi">200</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -224,20 +247,20 @@ so that you can use the most relevant prompts for your retrieval and for prompti
|
|||
</span><span id="line-10"><span class="linenos">10</span><span class="c1"># Global dictionary to keep track of user memories</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="n">user_memories</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="k">def</span> <span class="nf">get_user_conversation</span><span class="p">(</span><span class="n">user_id</span><span class="p">):</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="sd">"""</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span><span class="sd"> Retrieve the user's conversation memory using LangChain.</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span><span class="sd"> If the user does not exist, initialize their conversation memory.</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span><span class="sd"> """</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span> <span class="k">if</span> <span class="n">user_id</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">user_memories</span><span class="p">:</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span> <span class="n">user_memories</span><span class="p">[</span><span class="n">user_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">ConversationBufferMemory</span><span class="p">(</span><span class="n">return_messages</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span> <span class="k">return</span> <span class="n">user_memories</span><span class="p">[</span><span class="n">user_id</span><span class="p">]</span>
|
||||
</span><span id="line-21"><span class="linenos">21</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span><span class="k">def</span> <span class="nf">get_user_conversation</span><span class="p">(</span><span class="n">user_id</span><span class="p">):</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="sd">"""</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span><span class="sd"> Retrieve the user's conversation memory using LangChain.</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span><span class="sd"> If the user does not exist, initialize their conversation memory.</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span><span class="sd"> """</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span> <span class="k">if</span> <span class="n">user_id</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">user_memories</span><span class="p">:</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span> <span class="n">user_memories</span><span class="p">[</span><span class="n">user_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">ConversationBufferMemory</span><span class="p">(</span><span class="n">return_messages</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
</span><span id="line-21"><span class="linenos">21</span> <span class="k">return</span> <span class="n">user_memories</span><span class="p">[</span><span class="n">user_id</span><span class="p">]</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="step-2-update-conversationbuffermemory-w-intent">
|
||||
<h3>Step 2: Update ConversationBufferMemory w/ intent<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-update-conversationbuffermemory-w-intent" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-update-conversationbuffermemory-w-intent'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<section id="step-2-update-conversationbuffermemory-with-intents">
|
||||
<h3>Step 2: Update ConversationBufferMemory with Intents<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-update-conversationbuffermemory-with-intents" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-update-conversationbuffermemory-with-intents'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="k">def</span> <span class="nf">update_user_conversation</span><span class="p">(</span><span class="n">user_id</span><span class="p">,</span> <span class="n">client_messages</span><span class="p">,</span> <span class="n">intent_changed</span><span class="p">):</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span><span class="w"> </span><span class="sd">"""</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="sd"> Update the user's conversation memory with new messages using LangChain.</span>
|
||||
|
|
@ -253,26 +276,26 @@ so that you can use the most relevant prompts for your retrieval and for prompti
|
|||
</span><span id="line-13"><span class="linenos">13</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span> <span class="c1"># Process each new message</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span> <span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">message</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">new_messages</span><span class="p">):</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span> <span class="n">role</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'role'</span><span class="p">)</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span> <span class="n">content</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'content'</span><span class="p">)</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span> <span class="n">role</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"role"</span><span class="p">)</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span> <span class="n">content</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"content"</span><span class="p">)</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span> <span class="n">metadata</span> <span class="o">=</span> <span class="p">{</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span> <span class="s1">'uuid'</span><span class="p">:</span> <span class="nb">str</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">()),</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span> <span class="s1">'timestamp'</span><span class="p">:</span> <span class="n">datetime</span><span class="o">.</span><span class="n">utcnow</span><span class="p">()</span><span class="o">.</span><span class="n">isoformat</span><span class="p">(),</span>
|
||||
</span><span id="line-21"><span class="linenos">21</span> <span class="s1">'intent_changed'</span><span class="p">:</span> <span class="kc">False</span> <span class="c1"># Default value</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span> <span class="s2">"uuid"</span><span class="p">:</span> <span class="nb">str</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">()),</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span> <span class="s2">"timestamp"</span><span class="p">:</span> <span class="n">datetime</span><span class="o">.</span><span class="n">utcnow</span><span class="p">()</span><span class="o">.</span><span class="n">isoformat</span><span class="p">(),</span>
|
||||
</span><span id="line-21"><span class="linenos">21</span> <span class="s2">"intent_changed"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> <span class="c1"># Default value</span>
|
||||
</span><span id="line-22"><span class="linenos">22</span> <span class="p">}</span>
|
||||
</span><span id="line-23"><span class="linenos">23</span>
|
||||
</span><span id="line-24"><span class="linenos">24</span> <span class="c1"># Mark the intent change on the last message if detected</span>
|
||||
</span><span id="line-25"><span class="linenos">25</span> <span class="k">if</span> <span class="n">intent_changed</span> <span class="ow">and</span> <span class="n">index</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">new_messages</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">:</span>
|
||||
</span><span id="line-26"><span class="linenos">26</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">'intent_changed'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
</span><span id="line-26"><span class="linenos">26</span> <span class="n">metadata</span><span class="p">[</span><span class="s2">"intent_changed"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
</span><span id="line-27"><span class="linenos">27</span>
|
||||
</span><span id="line-28"><span class="linenos">28</span> <span class="c1"># Create a new message with metadata</span>
|
||||
</span><span id="line-29"><span class="linenos">29</span> <span class="k">if</span> <span class="n">role</span> <span class="o">==</span> <span class="s1">'user'</span><span class="p">:</span>
|
||||
</span><span id="line-29"><span class="linenos">29</span> <span class="k">if</span> <span class="n">role</span> <span class="o">==</span> <span class="s2">"user"</span><span class="p">:</span>
|
||||
</span><span id="line-30"><span class="linenos">30</span> <span class="n">memory</span><span class="o">.</span><span class="n">chat_memory</span><span class="o">.</span><span class="n">add_message</span><span class="p">(</span>
|
||||
</span><span id="line-31"><span class="linenos">31</span> <span class="n">HumanMessage</span><span class="p">(</span><span class="n">content</span><span class="o">=</span><span class="n">content</span><span class="p">,</span> <span class="n">additional_kwargs</span><span class="o">=</span><span class="p">{</span><span class="s1">'metadata'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">})</span>
|
||||
</span><span id="line-31"><span class="linenos">31</span> <span class="n">HumanMessage</span><span class="p">(</span><span class="n">content</span><span class="o">=</span><span class="n">content</span><span class="p">,</span> <span class="n">additional_kwargs</span><span class="o">=</span><span class="p">{</span><span class="s2">"metadata"</span><span class="p">:</span> <span class="n">metadata</span><span class="p">})</span>
|
||||
</span><span id="line-32"><span class="linenos">32</span> <span class="p">)</span>
|
||||
</span><span id="line-33"><span class="linenos">33</span> <span class="k">elif</span> <span class="n">role</span> <span class="o">==</span> <span class="s1">'assistant'</span><span class="p">:</span>
|
||||
</span><span id="line-33"><span class="linenos">33</span> <span class="k">elif</span> <span class="n">role</span> <span class="o">==</span> <span class="s2">"assistant"</span><span class="p">:</span>
|
||||
</span><span id="line-34"><span class="linenos">34</span> <span class="n">memory</span><span class="o">.</span><span class="n">chat_memory</span><span class="o">.</span><span class="n">add_message</span><span class="p">(</span>
|
||||
</span><span id="line-35"><span class="linenos">35</span> <span class="n">AIMessage</span><span class="p">(</span><span class="n">content</span><span class="o">=</span><span class="n">content</span><span class="p">,</span> <span class="n">additional_kwargs</span><span class="o">=</span><span class="p">{</span><span class="s1">'metadata'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">})</span>
|
||||
</span><span id="line-35"><span class="linenos">35</span> <span class="n">AIMessage</span><span class="p">(</span><span class="n">content</span><span class="o">=</span><span class="n">content</span><span class="p">,</span> <span class="n">additional_kwargs</span><span class="o">=</span><span class="p">{</span><span class="s2">"metadata"</span><span class="p">:</span> <span class="n">metadata</span><span class="p">})</span>
|
||||
</span><span id="line-36"><span class="linenos">36</span> <span class="p">)</span>
|
||||
</span><span id="line-37"><span class="linenos">37</span> <span class="k">else</span><span class="p">:</span>
|
||||
</span><span id="line-38"><span class="linenos">38</span> <span class="c1"># Handle other roles if necessary</span>
|
||||
|
|
@ -292,11 +315,12 @@ so that you can use the most relevant prompts for your retrieval and for prompti
|
|||
</span><span id="line-6"><span class="linenos"> 6</span> <span class="k">for</span> <span class="n">message</span> <span class="ow">in</span> <span class="nb">reversed</span><span class="p">(</span><span class="n">messages</span><span class="p">):</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span> <span class="c1"># Insert message at the beginning to maintain correct order</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span> <span class="n">messages_since_intent</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">message</span><span class="p">)</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span> <span class="n">metadata</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">additional_kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'metadata'</span><span class="p">,</span> <span class="p">{})</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span> <span class="n">metadata</span> <span class="o">=</span> <span class="n">message</span><span class="o">.</span><span class="n">additional_kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"metadata"</span><span class="p">,</span> <span class="p">{})</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span> <span class="c1"># Break if intent_changed is True</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span> <span class="k">if</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'intent_changed'</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span> <span class="o">==</span> <span class="kc">True</span><span class="p">:</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span> <span class="k">if</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"intent_changed"</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span> <span class="o">==</span> <span class="kc">True</span><span class="p">:</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span> <span class="k">break</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span> <span class="k">return</span> <span class="n">messages_since_intent</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span> <span class="k">return</span> <span class="n">messages_since_intent</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p>You can used the last set of messages that match to an intent to prompt an LLM, use it with an vector-DB for
|
||||
|
|
@ -311,37 +335,31 @@ enabling Arch to retrieve critical information in a structured way for processin
|
|||
retrieval quality and speed of your application. By extracting parameters from the conversation, you can pull
|
||||
the appropriate chunks from a vector database or SQL-like data store to enhance accuracy. With Arch, you can
|
||||
streamline data retrieval and processing to build more efficient and precise RAG applications.</p>
|
||||
<section id="step-1-define-prompt-targets-with-parameter-definitions">
|
||||
<h3>Step 1: Define prompt targets with parameter definitions<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-1-define-prompt-targets-with-parameter-definitions" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-1-define-prompt-targets-with-parameter-definitions'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<section id="step-1-define-prompt-targets">
|
||||
<h3>Step 1: Define Prompt Targets<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-1-define-prompt-targets" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-1-define-prompt-targets'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<div class="literal-block-wrapper docutils container" id="id2">
|
||||
<div class="code-block-caption"><span class="caption-text">Prompt Targets</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">get_device_statistics</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">></span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="no">This prompt target ensures that when users request device-related statistics, the system accurately retrieves and presents the relevant data</span>
|
||||
</span><span id="line-5"><span class="linenos"> 5</span><span class="w"> </span><span class="no">based on the specified devices and time range. Examples of user prompts, include:</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="no">- "Show me the performance stats for device 12345 over the past week."</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span><span class="w"> </span><span class="no">- "What are the error rates for my devices in the last 24 hours?"</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="w"> </span><span class="no">- "I need statistics on device 789 over the last 10 days."</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_summary</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_ids"</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span><span class="w"> </span><span class="c1"># Options: integer | float | list | dictionary | set</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"A</span><span class="nv"> </span><span class="s">list</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">identifiers</span><span class="nv"> </span><span class="s">(IDs)</span><span class="nv"> </span><span class="s">for</span><span class="nv"> </span><span class="s">which</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">statistics</span><span class="nv"> </span><span class="s">are</span><span class="nv"> </span><span class="s">requested."</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"time_range"</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">integer</span><span class="w"> </span><span class="c1"># Options: integer | float | list | dictionary | set</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"The</span><span class="nv"> </span><span class="s">number</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">days</span><span class="nv"> </span><span class="s">in</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">past</span><span class="nv"> </span><span class="s">over</span><span class="nv"> </span><span class="s">which</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">retrieve</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">statistics.</span><span class="nv"> </span><span class="s">Defaults</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">7</span><span class="nv"> </span><span class="s">days</span><span class="nv"> </span><span class="s">if</span><span class="nv"> </span><span class="s">not</span><span class="nv"> </span><span class="s">specified."</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</span><span id="line-21"><span class="linenos">21</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">7</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Retrieve and present the relevant data based on the specified devices and time range</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span>
|
||||
</span><span id="line-5"><span class="linenos"> 5</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_summary</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_ids</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">A list of device identifiers (IDs) to reboot.</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">time_range</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">int</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">The number of days in the past over which to retrieve device statistics</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">7</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="step-2-process-request-parameters-in-flask">
|
||||
<h3>Step 2: Process request parameters in Flask<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-process-request-parameters-in-flask" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-process-request-parameters-in-flask'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<h3>Step 2: Process Request Parameters in Flask<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-process-request-parameters-in-flask" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-process-request-parameters-in-flask'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Once the prompt targets are configured as above, handling those parameters is</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id3">
|
||||
<div class="code-block-caption"><span class="caption-text">Parameter handling with Flask</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
|
|
@ -349,43 +367,45 @@ streamline data retrieval and processing to build more efficient and precise RAG
|
|||
</span><span id="line-2"><span class="linenos"> 2</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="n">app</span> <span class="o">=</span> <span class="n">Flask</span><span class="p">(</span><span class="vm">__name__</span><span class="p">)</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span>
|
||||
</span><span id="line-5"><span class="linenos"> 5</span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s1">'/agent/device_summary'</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s1">'POST'</span><span class="p">])</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span><span class="k">def</span> <span class="nf">get_device_summary</span><span class="p">():</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="sd">"""</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span><span class="sd"> Endpoint to retrieve device statistics based on device IDs and an optional time range.</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="sd"> """</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span> <span class="n">data</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">get_json</span><span class="p">()</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span> <span class="c1"># Validate 'device_ids' parameter</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span> <span class="n">device_ids</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'device_ids'</span><span class="p">)</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">device_ids</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">device_ids</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s2">"'device_ids' parameter is required and must be a list"</span><span class="p">}),</span> <span class="mi">400</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span> <span class="c1"># Validate 'time_range' parameter (optional, defaults to 7)</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span> <span class="n">time_range</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'time_range'</span><span class="p">,</span> <span class="mi">7</span><span class="p">)</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">time_range</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s1">'error'</span><span class="p">:</span> <span class="s2">"'time_range' must be an integer"</span><span class="p">}),</span> <span class="mi">400</span>
|
||||
</span><span id="line-21"><span class="linenos">21</span>
|
||||
</span><span id="line-22"><span class="linenos">22</span> <span class="c1"># Simulate retrieving statistics for the given device IDs and time range</span>
|
||||
</span><span id="line-23"><span class="linenos">23</span> <span class="c1"># In a real application, you would query your database or external service here</span>
|
||||
</span><span id="line-24"><span class="linenos">24</span> <span class="n">statistics</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
</span><span id="line-25"><span class="linenos">25</span> <span class="k">for</span> <span class="n">device_id</span> <span class="ow">in</span> <span class="n">device_ids</span><span class="p">:</span>
|
||||
</span><span id="line-26"><span class="linenos">26</span> <span class="c1"># Placeholder for actual data retrieval</span>
|
||||
</span><span id="line-27"><span class="linenos">27</span> <span class="n">stats</span> <span class="o">=</span> <span class="p">{</span>
|
||||
</span><span id="line-28"><span class="linenos">28</span> <span class="s1">'device_id'</span><span class="p">:</span> <span class="n">device_id</span><span class="p">,</span>
|
||||
</span><span id="line-29"><span class="linenos">29</span> <span class="s1">'time_range'</span><span class="p">:</span> <span class="sa">f</span><span class="s1">'Last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s1"> days'</span><span class="p">,</span>
|
||||
</span><span id="line-30"><span class="linenos">30</span> <span class="s1">'data'</span><span class="p">:</span> <span class="sa">f</span><span class="s1">'Statistics data for device </span><span class="si">{</span><span class="n">device_id</span><span class="si">}</span><span class="s1"> over the last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s1"> days.'</span>
|
||||
</span><span id="line-31"><span class="linenos">31</span> <span class="p">}</span>
|
||||
</span><span id="line-32"><span class="linenos">32</span> <span class="n">statistics</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
|
||||
</span><span id="line-33"><span class="linenos">33</span>
|
||||
</span><span id="line-34"><span class="linenos">34</span> <span class="n">response</span> <span class="o">=</span> <span class="p">{</span>
|
||||
</span><span id="line-35"><span class="linenos">35</span> <span class="s1">'statistics'</span><span class="p">:</span> <span class="n">statistics</span>
|
||||
</span><span id="line-36"><span class="linenos">36</span> <span class="p">}</span>
|
||||
</span><span id="line-37"><span class="linenos">37</span>
|
||||
</span><span id="line-38"><span class="linenos">38</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span><span class="n">response</span><span class="p">),</span> <span class="mi">200</span>
|
||||
</span><span id="line-39"><span class="linenos">39</span>
|
||||
</span><span id="line-40"><span class="linenos">40</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
</span><span id="line-41"><span class="linenos">41</span> <span class="n">app</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">debug</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
</span><span id="line-5"><span class="linenos"> 5</span>
|
||||
</span><span id="line-6"><span class="linenos"> 6</span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s2">"/agent/device_summary"</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s2">"POST"</span><span class="p">])</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span><span class="k">def</span> <span class="nf">get_device_summary</span><span class="p">():</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span><span class="w"> </span><span class="sd">"""</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="sd"> Endpoint to retrieve device statistics based on device IDs and an optional time range.</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span><span class="sd"> """</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span> <span class="n">data</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">get_json</span><span class="p">()</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span> <span class="c1"># Validate 'device_ids' parameter</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span> <span class="n">device_ids</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"device_ids"</span><span class="p">)</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">device_ids</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">device_ids</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span> <span class="p">{</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"'device_ids' parameter is required and must be a list"</span><span class="p">}</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span> <span class="p">),</span> <span class="mi">400</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span> <span class="c1"># Validate 'time_range' parameter (optional, defaults to 7)</span>
|
||||
</span><span id="line-21"><span class="linenos">21</span> <span class="n">time_range</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"time_range"</span><span class="p">,</span> <span class="mi">7</span><span class="p">)</span>
|
||||
</span><span id="line-22"><span class="linenos">22</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">time_range</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
||||
</span><span id="line-23"><span class="linenos">23</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s2">"error"</span><span class="p">:</span> <span class="s2">"'time_range' must be an integer"</span><span class="p">}),</span> <span class="mi">400</span>
|
||||
</span><span id="line-24"><span class="linenos">24</span>
|
||||
</span><span id="line-25"><span class="linenos">25</span> <span class="c1"># Simulate retrieving statistics for the given device IDs and time range</span>
|
||||
</span><span id="line-26"><span class="linenos">26</span> <span class="c1"># In a real application, you would query your database or external service here</span>
|
||||
</span><span id="line-27"><span class="linenos">27</span> <span class="n">statistics</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
</span><span id="line-28"><span class="linenos">28</span> <span class="k">for</span> <span class="n">device_id</span> <span class="ow">in</span> <span class="n">device_ids</span><span class="p">:</span>
|
||||
</span><span id="line-29"><span class="linenos">29</span> <span class="c1"># Placeholder for actual data retrieval</span>
|
||||
</span><span id="line-30"><span class="linenos">30</span> <span class="n">stats</span> <span class="o">=</span> <span class="p">{</span>
|
||||
</span><span id="line-31"><span class="linenos">31</span> <span class="s2">"device_id"</span><span class="p">:</span> <span class="n">device_id</span><span class="p">,</span>
|
||||
</span><span id="line-32"><span class="linenos">32</span> <span class="s2">"time_range"</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"Last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s2"> days"</span><span class="p">,</span>
|
||||
</span><span id="line-33"><span class="linenos">33</span> <span class="s2">"data"</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"Statistics data for device </span><span class="si">{</span><span class="n">device_id</span><span class="si">}</span><span class="s2"> over the last </span><span class="si">{</span><span class="n">time_range</span><span class="si">}</span><span class="s2"> days."</span><span class="p">,</span>
|
||||
</span><span id="line-34"><span class="linenos">34</span> <span class="p">}</span>
|
||||
</span><span id="line-35"><span class="linenos">35</span> <span class="n">statistics</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
|
||||
</span><span id="line-36"><span class="linenos">36</span>
|
||||
</span><span id="line-37"><span class="linenos">37</span> <span class="n">response</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"statistics"</span><span class="p">:</span> <span class="n">statistics</span><span class="p">}</span>
|
||||
</span><span id="line-38"><span class="linenos">38</span>
|
||||
</span><span id="line-39"><span class="linenos">39</span> <span class="k">return</span> <span class="n">jsonify</span><span class="p">(</span><span class="n">response</span><span class="p">),</span> <span class="mi">200</span>
|
||||
</span><span id="line-40"><span class="linenos">40</span>
|
||||
</span><span id="line-41"><span class="linenos">41</span>
|
||||
</span><span id="line-42"><span class="linenos">42</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span>
|
||||
</span><span id="line-43"><span class="linenos">43</span> <span class="n">app</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">debug</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -414,13 +434,13 @@ streamline data retrieval and processing to build more efficient and precise RAG
|
|||
<ul>
|
||||
<li><a :data-current="activeSection === '#intent-drift-detection'" class="reference internal" href="#intent-drift-detection">Intent-drift Detection</a><ul>
|
||||
<li><a :data-current="activeSection === '#step-1-define-conversationbuffermemory'" class="reference internal" href="#step-1-define-conversationbuffermemory">Step 1: Define ConversationBufferMemory</a></li>
|
||||
<li><a :data-current="activeSection === '#step-2-update-conversationbuffermemory-w-intent'" class="reference internal" href="#step-2-update-conversationbuffermemory-w-intent">Step 2: Update ConversationBufferMemory w/ intent</a></li>
|
||||
<li><a :data-current="activeSection === '#step-2-update-conversationbuffermemory-with-intents'" class="reference internal" href="#step-2-update-conversationbuffermemory-with-intents">Step 2: Update ConversationBufferMemory with Intents</a></li>
|
||||
<li><a :data-current="activeSection === '#step-3-get-messages-based-on-latest-drift'" class="reference internal" href="#step-3-get-messages-based-on-latest-drift">Step 3: Get Messages based on latest drift</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a :data-current="activeSection === '#parameter-extraction-for-rag'" class="reference internal" href="#parameter-extraction-for-rag">Parameter Extraction for RAG</a><ul>
|
||||
<li><a :data-current="activeSection === '#step-1-define-prompt-targets-with-parameter-definitions'" class="reference internal" href="#step-1-define-prompt-targets-with-parameter-definitions">Step 1: Define prompt targets with parameter definitions</a></li>
|
||||
<li><a :data-current="activeSection === '#step-2-process-request-parameters-in-flask'" class="reference internal" href="#step-2-process-request-parameters-in-flask">Step 2: Process request parameters in Flask</a></li>
|
||||
<li><a :data-current="activeSection === '#step-1-define-prompt-targets'" class="reference internal" href="#step-1-define-prompt-targets">Step 1: Define Prompt Targets</a></li>
|
||||
<li><a :data-current="activeSection === '#step-2-process-request-parameters-in-flask'" class="reference internal" href="#step-2-process-request-parameters-in-flask">Step 2: Process Request Parameters in Flask</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@
|
|||
<link href="../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../search.html" rel="search" title="Search"/>
|
||||
<link href="prompt_target.html" rel="next" title="Prompt Target"/>
|
||||
<link href="tech_overview/request_lifecycle.html" rel="prev" title="Request Lifecycle"/>
|
||||
<link href="tech_overview/error_target.html" rel="prev" title="Error Target"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
const userPreference = localStorage.getItem('darkMode');
|
||||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1 current"><a class="current reference internal" href="#">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -153,7 +153,7 @@
|
|||
<div id="content" role="main">
|
||||
<section id="llm-provider">
|
||||
<span id="id1"></span><h1>LLM Provider<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#llm-provider"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p><code class="docutils literal notranslate"><span class="pre">llm_provider</span></code> is a top-level primitive in Arch, helping developers centrally define, secure, observe,
|
||||
<p><strong>LLM provider</strong> is a top-level primitive in Arch, helping developers centrally define, secure, observe,
|
||||
and manage the usage of of their LLMs. Arch builds on Envoy’s reliable <a class="reference external" href="https://www.envoyproxy.io/docs/envoy/v1.31.2/intro/arch_overview/upstream/cluster_manager" rel="nofollow noopener">cluster subsystem<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>
|
||||
to manage egress traffic to LLMs, which includes intelligent routing, retry and fail-over mechanisms,
|
||||
ensuring high availability and fault tolerance. This abstraction also enables developers to seamlessly
|
||||
|
|
@ -162,7 +162,7 @@ across applications.</p>
|
|||
<p>Below is an example of how you can configure <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> with an instance of an Arch gateway.</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id2">
|
||||
<div class="code-block-caption"><span class="caption-text">Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
|
||||
|
|
@ -172,16 +172,15 @@ across applications.</p>
|
|||
</span><span id="line-8"><span class="linenos"> 8</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</span><span id="line-10"><mark><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</mark></span><span id="line-11"><mark><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
</mark></span><span id="line-12"><mark><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
|
||||
</mark></span><span id="line-13"><mark><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</mark></span><span id="line-11"><mark><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
|
||||
</mark></span><span id="line-12"><mark><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</mark></span><span id="line-13"><mark><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</mark></span><span id="line-14"><mark><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</mark></span><span id="line-15"><mark><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</mark></span><span id="line-16"><mark><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</mark></span><span id="line-17"><span class="linenos">17</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span><span class="c1"># default system prompt used by all prompt targets</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -219,11 +218,11 @@ make outbound LLM calls.</p>
|
|||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="tech_overview/request_lifecycle.html">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="tech_overview/error_target.html">
|
||||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="15 18 9 12 15 6"></polyline>
|
||||
</svg>
|
||||
Request Lifecycle
|
||||
Error Target
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -152,7 +152,7 @@
|
|||
</nav>
|
||||
<div id="content" role="main">
|
||||
<section id="prompt-target">
|
||||
<h1>Prompt Target<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#prompt-target"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<span id="id1"></span><h1>Prompt Target<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#prompt-target"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p><strong>Prompt Targets</strong> are a fundamental component of Arch, enabling developers to define how different types of user prompts are processed and routed within their generative AI applications.
|
||||
This section provides an in-depth look at prompt targets, including their purpose, configuration, usage, and best practices to help you effectively leverage this feature in your projects.</p>
|
||||
<section id="what-are-prompt-targets">
|
||||
|
|
@ -263,12 +263,13 @@ Here is a full list of parameter attributes that Arch can support:</p>
|
|||
</span><span id="line-7"> <span class="nb">type</span><span class="p">:</span> <span class="nb">str</span>
|
||||
</span><span id="line-8"> <span class="n">required</span><span class="p">:</span> <span class="n">true</span>
|
||||
</span><span id="line-9"> <span class="o">-</span> <span class="n">name</span><span class="p">:</span> <span class="n">unit</span>
|
||||
</span><span id="line-10"> <span class="n">description</span><span class="p">:</span> <span class="n">The</span> <span class="n">unit</span> <span class="n">of</span> <span class="n">temperature</span> <span class="n">to</span> <span class="k">return</span>
|
||||
</span><span id="line-10"> <span class="n">description</span><span class="p">:</span> <span class="n">The</span> <span class="n">unit</span> <span class="n">of</span> <span class="n">temperature</span>
|
||||
</span><span id="line-11"> <span class="nb">type</span><span class="p">:</span> <span class="nb">str</span>
|
||||
</span><span id="line-12"> <span class="n">enum</span><span class="p">:</span> <span class="p">[</span><span class="s2">"celsius"</span><span class="p">,</span> <span class="s2">"fahrenheit"</span><span class="p">]</span>
|
||||
</span><span id="line-13"> <span class="n">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-14"> <span class="n">name</span><span class="p">:</span> <span class="n">api_server</span>
|
||||
</span><span id="line-15"> <span class="n">path</span><span class="p">:</span> <span class="o">/</span><span class="n">weather</span>
|
||||
</span><span id="line-12"> <span class="n">default</span><span class="p">:</span> <span class="n">fahrenheit</span>
|
||||
</span><span id="line-13"> <span class="n">enum</span><span class="p">:</span> <span class="p">[</span><span class="n">celsius</span><span class="p">,</span> <span class="n">fahrenheit</span><span class="p">]</span>
|
||||
</span><span id="line-14"> <span class="n">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-15"> <span class="n">name</span><span class="p">:</span> <span class="n">api_server</span>
|
||||
</span><span id="line-16"> <span class="n">path</span><span class="p">:</span> <span class="o">/</span><span class="n">weather</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
|
|
|||
|
|
@ -1,24 +1,25 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html :class="{'dark': darkMode === 'dark' || (darkMode === 'system' && window.matchMedia('(prefers-color-scheme: dark)').matches)}" class="scroll-smooth" data-content_root="../" lang="en" x-data="{ darkMode: localStorage.getItem('darkMode') || localStorage.setItem('darkMode', 'system'), activeSection: '' }" x-init="$watch('darkMode', val => localStorage.setItem('darkMode', val))">
|
||||
<html :class="{'dark': darkMode === 'dark' || (darkMode === 'system' && window.matchMedia('(prefers-color-scheme: dark)').matches)}" class="scroll-smooth" data-content_root="../../" lang="en" x-data="{ darkMode: localStorage.getItem('darkMode') || localStorage.setItem('darkMode', 'system'), activeSection: '' }" x-init="$watch('darkMode', val => localStorage.setItem('darkMode', val))">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
|
||||
<meta charset="utf-8"/>
|
||||
<meta content="white" media="(prefers-color-scheme: light)" name="theme-color"/>
|
||||
<meta content="black" media="(prefers-color-scheme: dark)" name="theme-color"/>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport"/>
|
||||
<title>Error Targets | Arch Docs v0.1</title>
|
||||
<meta content="Error Targets | Arch Docs v0.1" property="og:title"/>
|
||||
<meta content="Error Targets | Arch Docs v0.1" name="twitter:title"/>
|
||||
<link href="../_static/pygments.css?v=75ebff74" rel="stylesheet" type="text/css"/>
|
||||
<link href="../_static/theme.css?v=edd7d3d2" rel="stylesheet" type="text/css"/>
|
||||
<link href="../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
|
||||
<link href="../_static/sphinx-design.min.css?v=95c83b7e" rel="stylesheet" type="text/css"/>
|
||||
<link href="../_static/awesome-sphinx-design.css?v=a54cf077" rel="stylesheet" type="text/css"/>
|
||||
<link href="./docs/resources/error_target.html" rel="canonical"/>
|
||||
<link href="../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../search.html" rel="search" title="Search"/>
|
||||
<link href="configuration_reference.html" rel="prev" title="Configuration Reference"/>
|
||||
<title>Error Target | Arch Docs v0.1</title>
|
||||
<meta content="Error Target | Arch Docs v0.1" property="og:title"/>
|
||||
<meta content="Error Target | Arch Docs v0.1" name="twitter:title"/>
|
||||
<link href="../../_static/pygments.css?v=75ebff74" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/theme.css?v=edd7d3d2" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/sphinx-design.min.css?v=95c83b7e" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/awesome-sphinx-design.css?v=a54cf077" rel="stylesheet" type="text/css"/>
|
||||
<link href="./docs/concepts/tech_overview/error_target.html" rel="canonical"/>
|
||||
<link href="../../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../../search.html" rel="search" title="Search"/>
|
||||
<link href="../llm_provider.html" rel="next" title="LLM Provider"/>
|
||||
<link href="request_lifecycle.html" rel="prev" title="Request Lifecycle"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
const userPreference = localStorage.getItem('darkMode');
|
||||
|
|
@ -37,8 +38,8 @@
|
|||
Skip to content
|
||||
</a><header class="sticky top-0 z-40 w-full border-b shadow-sm border-border supports-backdrop-blur:bg-background/60 bg-background/95 backdrop-blur"><div class="container flex items-center h-14">
|
||||
<div class="hidden mr-4 md:flex">
|
||||
<a class="flex items-center mr-6" href="../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Arch Docs v0.1</span>
|
||||
<a class="flex items-center mr-6" href="../../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="24" src="../../_static/favicon.ico" width="24"/><span class="hidden font-bold sm:inline-block text-clip whitespace-nowrap">Arch Docs v0.1</span>
|
||||
</a></div><button @click="showSidebar = true" class="inline-flex items-center justify-center h-10 px-0 py-2 mr-2 text-base font-medium transition-colors rounded-md hover:text-accent-foreground hover:bg-transparent md:hidden" type="button">
|
||||
<svg aria-hidden="true" fill="currentColor" height="24" viewbox="0 96 960 960" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M152.587 825.087q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440Zm0-203.587q-19.152 0-32.326-13.174T107.087 576q0-19.152 13.174-32.326t32.326-13.174h320q19.152 0 32.326 13.174T518.087 576q0 19.152-13.174 32.326T472.587 621.5h-320Zm0-203.587q-19.152 0-32.326-13.174t-13.174-32.326q0-19.152 13.174-32.326t32.326-13.174h440q19.152 0 32.326 13.174t13.174 32.326q0 19.152-13.174 32.326t-32.326 13.174h-440ZM708.913 576l112.174 112.174q12.674 12.674 12.674 31.826t-12.674 31.826Q808.413 764.5 789.261 764.5t-31.826-12.674l-144-144Q600 594.391 600 576t13.435-31.826l144-144q12.674-12.674 31.826-12.674t31.826 12.674q12.674 12.674 12.674 31.826t-12.674 31.826L708.913 576Z"></path>
|
||||
|
|
@ -46,7 +47,7 @@
|
|||
<span class="sr-only">Toggle navigation menu</span>
|
||||
</button>
|
||||
<div class="flex items-center justify-between flex-1 space-x-2 sm:space-x-4 md:justify-end">
|
||||
<div class="flex-1 w-full md:w-auto md:flex-none"><form @keydown.k.window.meta="$refs.search.focus()" action="../search.html" class="relative flex items-center group" id="searchbox" method="get">
|
||||
<div class="flex-1 w-full md:w-auto md:flex-none"><form @keydown.k.window.meta="$refs.search.focus()" action="../../search.html" class="relative flex items-center group" id="searchbox" method="get">
|
||||
<input aria-label="Search the docs" class="inline-flex items-center font-medium transition-colors bg-transparent focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 ring-offset-background border border-input hover:bg-accent focus:bg-accent hover:text-accent-foreground focus:text-accent-foreground hover:placeholder-accent-foreground py-2 px-4 relative h-9 w-full justify-start rounded-[0.5rem] text-sm text-muted-foreground sm:pr-12 md:w-40 lg:w-64" id="search-input" name="q" placeholder="Search ..." type="search" x-ref="search"/>
|
||||
<kbd class="pointer-events-none absolute right-1.5 top-2 hidden h-5 select-none text-muted-foreground items-center gap-1 rounded border border-border bg-muted px-1.5 font-mono text-[10px] font-medium opacity-100 sm:flex group-hover:bg-accent group-hover:text-accent-foreground">
|
||||
<span class="text-xs">⌘</span>
|
||||
|
|
@ -73,8 +74,8 @@
|
|||
</div>
|
||||
</header>
|
||||
<div class="flex-1"><div class="container flex-1 items-start md:grid md:grid-cols-[220px_minmax(0,1fr)] md:gap-6 lg:grid-cols-[240px_minmax(0,1fr)] lg:gap-10"><aside :aria-hidden="!showSidebar" :class="{ 'translate-x-0': showSidebar }" class="fixed inset-y-0 left-0 md:top-14 z-50 md:z-30 bg-background md:bg-transparent transition-all duration-100 -translate-x-full md:translate-x-0 ml-0 p-6 md:p-0 md:-ml-2 md:h-[calc(100vh-3.5rem)] w-5/6 md:w-full shrink-0 overflow-y-auto border-r border-border md:sticky" id="left-sidebar">
|
||||
<a class="!justify-start text-sm md:!hidden bg-background" href="../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Arch Docs v0.1</span>
|
||||
<a class="!justify-start text-sm md:!hidden bg-background" href="../../index.html">
|
||||
<img alt="Logo" class="mr-2 dark:invert" height="16" src="../../_static/favicon.ico" width="16"/><span class="font-bold text-clip whitespace-nowrap">Arch Docs v0.1</span>
|
||||
</a>
|
||||
<div class="relative overflow-hidden md:overflow-auto my-4 md:my-0 h-[calc(100vh-8rem)] md:h-auto">
|
||||
<div class="overflow-y-auto h-full w-full relative pr-6">
|
||||
|
|
@ -90,44 +91,44 @@
|
|||
<nav class="table w-full min-w-full my-6 lg:my-8">
|
||||
<p class="caption" role="heading"><span class="caption-text">Get Started</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../get_started/overview.html">Overview</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../get_started/intro_to_arch.html">Intro to Arch</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../get_started/quickstart.html">Quickstart</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/overview.html">Overview</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/intro_to_arch.html">Intro to Arch</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../get_started/quickstart.html">Quickstart</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Concepts</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../concepts/tech_overview/tech_overview.html">Tech Overview<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1 current" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="tech_overview.html">Tech Overview<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul class="current" x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/prompt_target.html">Prompt Target</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../prompt_target.html">Prompt Target</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../guides/prompt_guard.html">Prompt Guard</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../guides/function_calling.html">Function Calling</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../guides/observability/observability.html">Observability<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../guides/observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../guides/observability/monitoring.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../guides/observability/access_logging.html">Access Logging</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/prompt_guard.html">Prompt Guard</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../guides/function_calling.html">Function Calling</a></li>
|
||||
<li class="toctree-l1" x-data="{ expanded: $el.classList.contains('current') ? true : false }"><a :class="{ 'expanded' : expanded }" @click="expanded = !expanded" class="reference internal expandable" href="../../guides/observability/observability.html">Observability<button @click.prevent.stop="expanded = !expanded" type="button"><span class="sr-only"></span><svg fill="currentColor" height="18px" stroke="none" viewbox="0 0 24 24" width="18px" xmlns="http://www.w3.org/2000/svg"><path d="M10 6L8.59 7.41 13.17 12l-4.58 4.59L10 18l6-6z"></path></svg></button></a><ul x-show="expanded">
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/tracing.html">Tracing</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/monitoring.html">Monitoring</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../guides/observability/access_logging.html">Access Logging</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Build with Arch</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../build_with_arch/agent.html">Agentic Workflow</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../build_with_arch/rag.html">RAG Application</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/agent.html">Agentic Workflow</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../build_with_arch/rag.html">RAG Application</a></li>
|
||||
</ul>
|
||||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1"><a class="reference internal" href="configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1 current"><a class="current reference internal" href="#">Error Targets</a></li>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -141,52 +142,53 @@
|
|||
<main class="relative py-6 lg:gap-10 lg:py-8 xl:grid xl:grid-cols-[1fr_300px]">
|
||||
<div class="w-full min-w-0 mx-auto">
|
||||
<nav aria-label="breadcrumbs" class="flex items-center mb-4 space-x-1 text-sm text-muted-foreground">
|
||||
<a class="overflow-hidden text-ellipsis whitespace-nowrap hover:text-foreground" href="../index.html">
|
||||
<a class="overflow-hidden text-ellipsis whitespace-nowrap hover:text-foreground" href="../../index.html">
|
||||
<span class="hidden md:inline">Arch Docs v0.1</span>
|
||||
<svg aria-label="Home" class="md:hidden" fill="currentColor" height="18" stroke="none" viewbox="0 96 960 960" width="18" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M240 856h120V616h240v240h120V496L480 316 240 496v360Zm-80 80V456l320-240 320 240v480H520V696h-80v240H160Zm320-350Z"></path>
|
||||
</svg>
|
||||
</a>
|
||||
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">Error Targets</span>
|
||||
<div class="mr-1">/</div><a class="hover:text-foreground overflow-hidden text-ellipsis whitespace-nowrap" href="tech_overview.html">Tech Overview</a>
|
||||
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">Error Target</span>
|
||||
</nav>
|
||||
<div id="content" role="main">
|
||||
<section id="error-targets">
|
||||
<span id="error-target"></span><h1>Error Targets<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#error-targets"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<section id="error-target">
|
||||
<span id="id1"></span><h1>Error Target<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#error-target"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p><strong>Error targets</strong> are designed to capture and manage specific issues or exceptions that occur during Arch’s function or system’s execution.</p>
|
||||
<p>These endpoints receive errors forwarded from Arch when issues arise, such as improper function/API calls, guardrail violations, or other processing errors.
|
||||
The errors are communicated to the application via headers like <code class="docutils literal notranslate"><span class="pre">X-Arch-[ERROR-TYPE]</span></code>, enabling you to respond appropriately and handle errors gracefully.</p>
|
||||
<section id="key-concepts">
|
||||
<h2>Key Concepts<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#key-concepts" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#key-concepts'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p><strong>Error Type</strong>: Categorizes the nature of the error, such as “ValidationError” or “RuntimeError.” These error types help in identifying what
|
||||
kind of issue occurred and provide context for troubleshooting.</p>
|
||||
<p><strong>Error Message</strong>: A clear, human-readable message describing the error. This should provide enough detail to inform users or developers of
|
||||
the root cause or required action.</p>
|
||||
<dl class="simple">
|
||||
<dt><strong>Target Prompt</strong>: The specific prompt or operation where the error occurred. Understanding where the error happened helps with debugging</dt><dd><p>and pinpointing the source of the problem.</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<p><strong>Parameter-Specific Errors</strong>: Errors that arise due to invalid or missing parameters when invoking a function. These errors are critical
|
||||
for ensuring the correctness of inputs.</p>
|
||||
<ul class="simple">
|
||||
<li><p><strong>Error Type</strong>: Categorizes the nature of the error, such as “ValidationError” or “RuntimeError.” These error types help in identifying what kind of issue occurred and provide context for troubleshooting.</p></li>
|
||||
<li><p><strong>Error Message</strong>: A clear, human-readable message describing the error. This should provide enough detail to inform users or developers of the root cause or required action.</p></li>
|
||||
<li><p><strong>Target Prompt</strong>: The specific prompt or operation where the error occurred. Understanding where the error happened helps with debugging and pinpointing the source of the problem.</p></li>
|
||||
<li><p><strong>Parameter-Specific Errors</strong>: Errors that arise due to invalid or missing parameters when invoking a function. These errors are critical for ensuring the correctness of inputs.</p></li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="error-header-example">
|
||||
<h2>Error Header Example<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#error-header-example" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#error-header-example'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<div class="highlight-http notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kr">HTTP</span><span class="o">/</span><span class="m">1.1</span> <span class="m">400</span> <span class="ne">Bad Request</span>
|
||||
</span><span id="line-2"><span class="na">X-Arch-Error-Type</span><span class="o">:</span> <span class="l">FunctionValidationError</span>
|
||||
</span><span id="line-3"><span class="na">X-Arch-Error-Message</span><span class="o">:</span> <span class="l">Tools call parsing failure</span>
|
||||
</span><span id="line-4"><span class="na">X-Arch-Target-Prompt</span><span class="o">:</span> <span class="l">createUser</span>
|
||||
</span><span id="line-5"><span class="na">Content-Type</span><span class="o">:</span> <span class="l">application/json</span>
|
||||
<div class="literal-block-wrapper docutils container" id="id2">
|
||||
<div class="code-block-caption"><span class="caption-text">Error Header Example</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="w"> </span>HTTP/1.1<span class="w"> </span><span class="m">400</span><span class="w"> </span>Bad<span class="w"> </span>Request
|
||||
</span><span id="line-2"><span class="w"> </span>X-Arch-Error-Type:<span class="w"> </span>FunctionValidationError
|
||||
</span><span id="line-3"><span class="w"> </span>X-Arch-Error-Message:<span class="w"> </span>Tools<span class="w"> </span>call<span class="w"> </span>parsing<span class="w"> </span>failure
|
||||
</span><span id="line-4"><span class="w"> </span>X-Arch-Target-Prompt:<span class="w"> </span>createUser
|
||||
</span><span id="line-5"><span class="w"> </span>Content-Type:<span class="w"> </span>application/json
|
||||
</span><span id="line-6">
|
||||
</span><span id="line-7"><span class="nt">"messages"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||||
</span><span id="line-8"><span class="p">{</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="nt">"role"</span><span class="p">:</span><span class="w"> </span><span class="s2">"user"</span><span class="p">,</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="nt">"content"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Please create a user with the following ID: 1234"</span>
|
||||
</span><span id="line-11"><span class="p">},</span>
|
||||
</span><span id="line-12"><span class="p">{</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">"role"</span><span class="p">:</span><span class="w"> </span><span class="s2">"system"</span><span class="p">,</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="nt">"content"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Expected a string for 'user_id', but got an integer."</span>
|
||||
</span><span id="line-15"><span class="p">}]</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="s2">"messages"</span>:<span class="w"> </span><span class="o">[</span>
|
||||
</span><span id="line-8"><span class="w"> </span><span class="o">{</span>
|
||||
</span><span id="line-9"><span class="w"> </span><span class="s2">"role"</span>:<span class="w"> </span><span class="s2">"user"</span>,
|
||||
</span><span id="line-10"><span class="w"> </span><span class="s2">"content"</span>:<span class="w"> </span><span class="s2">"Please create a user with the following ID: 1234"</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="o">}</span>,
|
||||
</span><span id="line-12"><span class="w"> </span><span class="o">{</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="s2">"role"</span>:<span class="w"> </span><span class="s2">"system"</span>,
|
||||
</span><span id="line-14"><span class="w"> </span><span class="s2">"content"</span>:<span class="w"> </span><span class="s2">"Expected a string for 'user_id', but got an integer."</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="o">}</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="o">]</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="best-practices-and-tips">
|
||||
<h2>Best Practices and Tips<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#best-practices-and-tips" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#best-practices-and-tips'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
|
|
@ -199,13 +201,21 @@ for ensuring the correctness of inputs.</p>
|
|||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="configuration_reference.html">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="request_lifecycle.html">
|
||||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="15 18 9 12 15 6"></polyline>
|
||||
</svg>
|
||||
Configuration Reference
|
||||
Request Lifecycle
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="../llm_provider.html">
|
||||
LLM Provider
|
||||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="9 18 15 12 9 6"></polyline>
|
||||
</svg>
|
||||
</a>
|
||||
</div>
|
||||
</div></div><aside class="hidden text-sm xl:block" id="right-sidebar">
|
||||
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
|
||||
<ul>
|
||||
|
|
@ -225,10 +235,10 @@ for ensuring the correctness of inputs.</p>
|
|||
</div>
|
||||
</footer>
|
||||
</div>
|
||||
<script src="../_static/documentation_options.js?v=a0703c7e"></script>
|
||||
<script src="../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script defer="defer" src="../_static/theme.js?v=1808ab49"></script>
|
||||
<script src="../_static/design-tabs.js?v=f930bc37"></script>
|
||||
<script src="../../_static/documentation_options.js?v=a0703c7e"></script>
|
||||
<script src="../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script defer="defer" src="../../_static/theme.js?v=1808ab49"></script>
|
||||
<script src="../../_static/design-tabs.js?v=f930bc37"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -18,7 +18,7 @@
|
|||
<link href="./docs/concepts/tech_overview/listener.html" rel="canonical"/>
|
||||
<link href="../../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../../search.html" rel="search" title="Search"/>
|
||||
<link href="model_serving.html" rel="next" title="Model Serving"/>
|
||||
<link href="prompt.html" rel="next" title="Prompts"/>
|
||||
<link href="threading_model.html" rel="prev" title="Threading Model"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -154,7 +154,7 @@
|
|||
<div id="content" role="main">
|
||||
<section id="listener">
|
||||
<span id="arch-overview-listeners"></span><h1>Listener<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#listener"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p>Listener is a top level primitive in Arch, which simplifies the configuration required to bind incoming
|
||||
<p><strong>Listener</strong> is a top level primitive in Arch, which simplifies the configuration required to bind incoming
|
||||
connections from downstream clients, and for egress connections to LLMs (hosted or API)</p>
|
||||
<p>Arch builds on Envoy’s Listener subsystem to streamline connection managemet for developers. Arch minimizes
|
||||
the complexity of Envoy’s listener setup by using best-practices and exposing only essential settings,
|
||||
|
|
@ -165,33 +165,33 @@ simplification ensures that connections are secure, reliable, and optimized for
|
|||
<p>Developers can configure Arch to accept connections from downstream clients. A downstream listener acts as the
|
||||
primary entry point for incoming traffic, handling initial connection setup, including network filtering, gurdrails,
|
||||
and additional network security checks. For more details on prompt security and safety,
|
||||
see <a class="reference internal" href="prompt.html#arch-overview-prompt-handling"><span class="std std-ref">here</span></a></p>
|
||||
see <a class="reference internal" href="prompt.html#arch-overview-prompt-handling"><span class="std std-ref">here</span></a>.</p>
|
||||
</section>
|
||||
<section id="upstream-egress">
|
||||
<h2>Upstream (Egress)<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#upstream-egress" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#upstream-egress'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>Arch automatically configures a listener to route requests from your application to upstream LLM API providers (or hosts).
|
||||
When you start Arch, it creates a listener for egress traffic based on the presence of the <code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> configuration
|
||||
section in the <code class="docutils literal notranslate"><span class="pre">prompt_config.yml</span></code> file. Arch binds itself to a local address such as <code class="docutils literal notranslate"><span class="pre">127.0.0.1:9000/v1</span></code> or a DNS-based
|
||||
address like <code class="docutils literal notranslate"><span class="pre">arch.local:9000/v1</span></code> for outgoing traffic. For more details on LLM providers, read <a class="reference internal" href="../llm_provider.html#llm-provider"><span class="std std-ref">here</span></a></p>
|
||||
When you start Arch, it creates a listener for egress traffic based on the presence of the <code class="docutils literal notranslate"><span class="pre">listener</span></code> configuration
|
||||
section in the configuration file. Arch binds itself to a local address such as <code class="docutils literal notranslate"><span class="pre">127.0.0.1:9000/v1</span></code> or a DNS-based
|
||||
address like <code class="docutils literal notranslate"><span class="pre">arch.local:9000/v1</span></code> for outgoing traffic. For more details on LLM providers, read <a class="reference internal" href="../llm_provider.html#llm-provider"><span class="std std-ref">here</span></a>.</p>
|
||||
</section>
|
||||
<section id="configure-listener">
|
||||
<h2>Configure Listener<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#configure-listener" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#configure-listener'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>To configure a Downstream (Ingress) Listner, simply add the <code class="docutils literal notranslate"><span class="pre">listener</span></code> directive to your <code class="docutils literal notranslate"><span class="pre">prompt_config.yml</span></code> file:</p>
|
||||
<p>To configure a Downstream (Ingress) Listner, simply add the <code class="docutils literal notranslate"><span class="pre">listener</span></code> directive to your configuration file:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id1">
|
||||
<div class="code-block-caption"><span class="caption-text">Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
</span><span id="line-2"><mark><span class="linenos"> 2</span>
|
||||
</mark></span><span id="line-3"><mark><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span>
|
||||
</span><span id="line-3"><mark><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
|
||||
</mark></span><span id="line-4"><mark><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
|
||||
</mark></span><span id="line-5"><mark><span class="linenos"> 5</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
|
||||
</mark></span><span id="line-6"><span class="linenos"> 6</span><span class="w"> </span><span class="c1"># Defines how Arch should parse the content from application/json or text/pain Content-type in the http request</span>
|
||||
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">huggingface</span>
|
||||
</span><span id="line-8"><span class="linenos"> 8</span>
|
||||
</mark></span><span id="line-6"><mark><span class="linenos"> 6</span><span class="w"> </span><span class="c1"># Defines how Arch should parse the content from application/json or text/pain Content-type in the http request</span>
|
||||
</mark></span><span id="line-7"><mark><span class="linenos"> 7</span><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">huggingface</span>
|
||||
</mark></span><span id="line-8"><span class="linenos"> 8</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
|
|
@ -212,8 +212,8 @@ address like <code class="docutils literal notranslate"><span class="pre">arch.l
|
|||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="model_serving.html">
|
||||
Model Serving
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="prompt.html">
|
||||
Prompts
|
||||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="9 18 15 12 9 6"></polyline>
|
||||
</svg>
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@
|
|||
<link href="./docs/concepts/tech_overview/model_serving.html" rel="canonical"/>
|
||||
<link href="../../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../../search.html" rel="search" title="Search"/>
|
||||
<link href="prompt.html" rel="next" title="Prompt"/>
|
||||
<link href="listener.html" rel="prev" title="Listener"/>
|
||||
<link href="request_lifecycle.html" rel="next" title="Request Lifecycle"/>
|
||||
<link href="prompt.html" rel="prev" title="Prompts"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
const userPreference = localStorage.getItem('darkMode');
|
||||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -153,15 +153,14 @@
|
|||
</nav>
|
||||
<div id="content" role="main">
|
||||
<section id="model-serving">
|
||||
<span id="arch-model-serving"></span><h1>Model Serving<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#model-serving"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p>Arch is a set of <strong>two</strong> self-contained processes that are designed to run alongside your application
|
||||
<span id="id1"></span><h1>Model Serving<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#model-serving"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p>Arch is a set of <cite>two</cite> self-contained processes that are designed to run alongside your application
|
||||
servers (or on a separate host connected via a network). The first process is designated to manage low-level
|
||||
networking and HTTP related comcerns, and the other process is for <strong>model serving</strong>, which helps Arch make
|
||||
networking and HTTP related comcerns, and the other process is for model serving, which helps Arch make
|
||||
intelligent decisions about the incoming prompts. The model server is designed to call the purpose-built
|
||||
LLMs in Arch.</p>
|
||||
<a class="reference internal image-reference" href="../../_images/arch-system-architecture.jpg"><img alt="../../_images/arch-system-architecture.jpg" class="align-center" src="../../_images/arch-system-architecture.jpg" style="width: 50%;"/>
|
||||
<a class="reference internal image-reference" href="../../_images/arch-system-architecture.jpg"><img alt="../../_images/arch-system-architecture.jpg" class="align-center" src="../../_images/arch-system-architecture.jpg" style="width: 40%;"/>
|
||||
</a>
|
||||
<hr class="docutils"/>
|
||||
<p>Arch’ is designed to be deployed in your cloud VPC, on a on-premises host, and can work on devices that don’t
|
||||
have a GPU. Note, GPU devices are need for fast and cost-efficient use, so that Arch (model server, specifically)
|
||||
can process prompts quickly and forward control back to the applicaton host. There are three modes in which Arch
|
||||
|
|
@ -176,11 +175,11 @@ might not be available.</p>
|
|||
</div>
|
||||
</section>
|
||||
<section id="local-serving-gpu-fast">
|
||||
<h2>Local Serving (GPU- Fast)<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#local-serving-gpu-fast" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#local-serving-gpu-fast'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<h2>Local Serving (GPU - Fast)<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#local-serving-gpu-fast" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#local-serving-gpu-fast'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>The following bash commands enable you to configure the model server subsystem in Arch to run locally on the
|
||||
machine and utilize the GPU available for fast inference across all model use cases, including function calling
|
||||
guardails, etc.</p>
|
||||
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="gp">$ </span>archgw<span class="w"> </span>up<span class="w"> </span>--local
|
||||
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="gp">$ </span>archgw<span class="w"> </span>up<span class="w"> </span>--local-gpu
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
|
@ -202,16 +201,16 @@ how to generate API keys for model serving</p>
|
|||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="listener.html">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="prompt.html">
|
||||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="15 18 9 12 15 6"></polyline>
|
||||
</svg>
|
||||
Listener
|
||||
Prompts
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="prompt.html">
|
||||
Prompt
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="request_lifecycle.html">
|
||||
Request Lifecycle
|
||||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="9 18 15 12 9 6"></polyline>
|
||||
</svg>
|
||||
|
|
@ -221,7 +220,7 @@ how to generate API keys for model serving</p>
|
|||
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
|
||||
<ul>
|
||||
<li><a :data-current="activeSection === '#local-serving-cpu-moderate'" class="reference internal" href="#local-serving-cpu-moderate">Local Serving (CPU - Moderate)</a></li>
|
||||
<li><a :data-current="activeSection === '#local-serving-gpu-fast'" class="reference internal" href="#local-serving-gpu-fast">Local Serving (GPU- Fast)</a></li>
|
||||
<li><a :data-current="activeSection === '#local-serving-gpu-fast'" class="reference internal" href="#local-serving-gpu-fast">Local Serving (GPU - Fast)</a></li>
|
||||
<li><a :data-current="activeSection === '#cloud-serving-gpu-blazing-fast'" class="reference internal" href="#cloud-serving-gpu-blazing-fast">Cloud Serving (GPU - Blazing Fast)</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -7,9 +7,9 @@
|
|||
<meta content="white" media="(prefers-color-scheme: light)" name="theme-color"/>
|
||||
<meta content="black" media="(prefers-color-scheme: dark)" name="theme-color"/>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport"/>
|
||||
<title>Prompt | Arch Docs v0.1</title>
|
||||
<meta content="Prompt | Arch Docs v0.1" property="og:title"/>
|
||||
<meta content="Prompt | Arch Docs v0.1" name="twitter:title"/>
|
||||
<title>Prompts | Arch Docs v0.1</title>
|
||||
<meta content="Prompts | Arch Docs v0.1" property="og:title"/>
|
||||
<meta content="Prompts | Arch Docs v0.1" name="twitter:title"/>
|
||||
<link href="../../_static/pygments.css?v=75ebff74" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/theme.css?v=edd7d3d2" rel="stylesheet" type="text/css"/>
|
||||
<link href="../../_static/_static/custom.css" rel="stylesheet" type="text/css"/>
|
||||
|
|
@ -18,8 +18,8 @@
|
|||
<link href="./docs/concepts/tech_overview/prompt.html" rel="canonical"/>
|
||||
<link href="../../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../../search.html" rel="search" title="Search"/>
|
||||
<link href="request_lifecycle.html" rel="next" title="Request Lifecycle"/>
|
||||
<link href="model_serving.html" rel="prev" title="Model Serving"/>
|
||||
<link href="model_serving.html" rel="next" title="Model Serving"/>
|
||||
<link href="listener.html" rel="prev" title="Listener"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
const userPreference = localStorage.getItem('darkMode');
|
||||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -149,20 +149,19 @@
|
|||
</svg>
|
||||
</a>
|
||||
<div class="mr-1">/</div><a class="hover:text-foreground overflow-hidden text-ellipsis whitespace-nowrap" href="tech_overview.html">Tech Overview</a>
|
||||
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">Prompt</span>
|
||||
<div class="mr-1">/</div><span aria-current="page" class="font-medium text-foreground overflow-hidden text-ellipsis whitespace-nowrap">Prompts</span>
|
||||
</nav>
|
||||
<div id="content" role="main">
|
||||
<section id="prompt">
|
||||
<span id="arch-overview-prompt-handling"></span><h1>Prompt<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#prompt"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<section id="prompts">
|
||||
<span id="arch-overview-prompt-handling"></span><h1>Prompts<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#prompts"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p>Arch’s primary design point is to securely accept, process and handle prompts. To do that effectively,
|
||||
Arch relies on Envoy’s HTTP <a class="reference external" href="https://www.envoyproxy.io/docs/envoy/v1.31.2/intro/arch_overview/http/http_connection_management" rel="nofollow noopener">connection management<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>,
|
||||
subsystem and its <strong>prompt handler</strong> subsystem engineered with purpose-built LLMs to
|
||||
implement critical functionality on behalf of developers so that you can stay focused on business logic.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>Arch’s <strong>prompt handler</strong> subsystem interacts with the <strong>model</strong> subsytem through Envoy’s cluster manager
|
||||
system to ensure robust, resilient and fault-tolerant experience in managing incoming prompts. Read more
|
||||
about the <a class="reference internal" href="model_serving.html#arch-model-serving"><span class="std std-ref">model subsystem</span></a> and how the LLMs are hosted in Arch.</p>
|
||||
<p>Arch’s <strong>prompt handler</strong> subsystem interacts with the <strong>model subsytem</strong> through Envoy’s cluster manager system to ensure robust, resilient and fault-tolerant experience in managing incoming prompts.</p>
|
||||
<div class="admonition seealso">
|
||||
<p class="admonition-title">See also</p>
|
||||
<p>Read more about the <a class="reference internal" href="model_serving.html#model-serving"><span class="std std-ref">model subsystem</span></a> and how the LLMs are hosted in Arch.</p>
|
||||
</div>
|
||||
<section id="messages">
|
||||
<h2>Messages<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#messages" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#messages'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
|
|
@ -176,15 +175,15 @@ containing two key-value pairs:</p>
|
|||
</ul>
|
||||
</div></blockquote>
|
||||
</section>
|
||||
<section id="prompt-guardrails">
|
||||
<h2>Prompt Guardrails<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#prompt-guardrails" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#prompt-guardrails'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<section id="prompt-guard">
|
||||
<h2>Prompt Guard<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#prompt-guard" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#prompt-guard'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>Arch is engineered with <a class="reference internal" href="../../guides/prompt_guard.html#prompt-guard"><span class="std std-ref">Arch-Guard</span></a>, an industry leading safety layer, powered by a
|
||||
compact and high-performimg LLM that monitors incoming prompts to detect and reject jailbreak attempts -
|
||||
ensuring that unauthorized or harmful behaviors are intercepted early in the process.</p>
|
||||
<p>To add jailbreak guardrails, see example below:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id1">
|
||||
<div class="code-block-caption"><span class="caption-text">Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
|
||||
|
|
@ -194,46 +193,27 @@ ensuring that unauthorized or harmful behaviors are intercepted early in the pro
|
|||
</span><span id="line-8"><span class="linenos"> 8</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span><span class="c1"># default system prompt used by all prompt targets</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-21"><span class="linenos">21</span>
|
||||
</span><span id="line-22"><mark><span class="linenos">22</span><span class="nt">prompt_guards</span><span class="p">:</span>
|
||||
</mark></span><span id="line-23"><mark><span class="linenos">23</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
|
||||
</mark></span><span id="line-24"><mark><span class="linenos">24</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
|
||||
</mark></span><span id="line-25"><mark><span class="linenos">25</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
|
||||
</mark></span><span id="line-26"><mark><span class="linenos">26</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="s">"Looks</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">you're</span><span class="nv"> </span><span class="s">curious</span><span class="nv"> </span><span class="s">about</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">abilities,</span><span class="nv"> </span><span class="s">but</span><span class="nv"> </span><span class="s">I</span><span class="nv"> </span><span class="s">can</span><span class="nv"> </span><span class="s">only</span><span class="nv"> </span><span class="s">provide</span><span class="nv"> </span><span class="s">assistance</span><span class="nv"> </span><span class="s">within</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">programmed</span><span class="nv"> </span><span class="s">parameters."</span>
|
||||
</mark></span><span id="line-27"><span class="linenos">27</span>
|
||||
</span><span id="line-28"><span class="linenos">28</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</span><span id="line-29"><span class="linenos">29</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"reboot_network_device"</span>
|
||||
</span><span id="line-30"><span class="linenos">30</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Helps</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">operators</span><span class="nv"> </span><span class="s">perform</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">operations</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">rebooting</span><span class="nv"> </span><span class="s">a</span><span class="nv"> </span><span class="s">device."</span>
|
||||
</span><span id="line-31"><span class="linenos">31</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-32"><span class="linenos">32</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||||
</span><span id="line-33"><span class="linenos">33</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/action"</span>
|
||||
</span><span id="line-34"><span class="linenos">34</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</span><span id="line-35"><span class="linenos">35</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_id"</span>
|
||||
</span><span id="line-36"><span class="linenos">36</span><span class="w"> </span><span class="c1"># additional type options include: int | float | bool | string | list | dict</span>
|
||||
</span><span id="line-37"><span class="linenos">37</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
|
||||
</span><span id="line-38"><span class="linenos">38</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Identifier</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</span><span id="line-39"><span class="linenos">39</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-40"><span class="linenos">40</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"confirmation"</span>
|
||||
</span><span id="line-41"><span class="linenos">41</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
|
||||
</span><span id="line-42"><span class="linenos">42</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Confirmation</span><span class="nv"> </span><span class="s">flag</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">proceed</span><span class="nv"> </span><span class="s">with</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</span><span id="line-43"><span class="linenos">43</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="s">"no"</span>
|
||||
</span><span id="line-44"><span class="linenos">44</span><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">yes</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">no</span><span class="p p-Indicator">]</span>
|
||||
</span></code></pre></div>
|
||||
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span>
|
||||
</span><span id="line-21"><mark><span class="linenos">21</span><span class="nt">prompt_guards</span><span class="p">:</span>
|
||||
</mark></span><span id="line-22"><mark><span class="linenos">22</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
|
||||
</mark></span><span id="line-23"><mark><span class="linenos">23</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
|
||||
</mark></span><span id="line-24"><mark><span class="linenos">24</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
|
||||
</mark></span><span id="line-25"><mark><span class="linenos">25</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.</span>
|
||||
</mark></span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>As a roadmap item, Arch will expose the ability for developers to define custom guardrails via Arch-Guard-v2,
|
||||
<p>As a roadmap item, Arch will expose the ability for developers to define custom guardrails via Arch-Guard,
|
||||
and add support for additional safety checks defined by developers and hazardous categories like, violent crimes, privacy, hate,
|
||||
etc. To offer feedback on our roadmap, please visit our <a class="reference external" href="https://github.com/orgs/katanemo/projects/1" rel="nofollow noopener">github page<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a></p>
|
||||
</div>
|
||||
|
|
@ -247,7 +227,7 @@ when a user’s intent has changed so that you can build faster, more accurate R
|
|||
<p>Configuring <code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code> is simple. See example below:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id2">
|
||||
<div class="code-block-caption"><span class="caption-text">Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id2"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
|
||||
|
|
@ -257,70 +237,71 @@ when a user’s intent has changed so that you can build faster, more accurate R
|
|||
</span><span id="line-8"><span class="linenos"> 8</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</span><span id="line-10"><span class="linenos">10</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-11"><span class="linenos">11</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
|
||||
</span><span id="line-12"><span class="linenos">12</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-13"><span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</span><span id="line-14"><span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-15"><span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-16"><span class="linenos">16</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-17"><span class="linenos">17</span>
|
||||
</span><span id="line-18"><span class="linenos">18</span><span class="c1"># default system prompt used by all prompt targets</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-21"><span class="linenos">21</span>
|
||||
</span><span id="line-22"><span class="linenos">22</span><span class="nt">prompt_guards</span><span class="p">:</span>
|
||||
</span><span id="line-23"><span class="linenos">23</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
|
||||
</span><span id="line-24"><span class="linenos">24</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
|
||||
</span><span id="line-25"><span class="linenos">25</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
|
||||
</span><span id="line-26"><span class="linenos">26</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="s">"Looks</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">you're</span><span class="nv"> </span><span class="s">curious</span><span class="nv"> </span><span class="s">about</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">abilities,</span><span class="nv"> </span><span class="s">but</span><span class="nv"> </span><span class="s">I</span><span class="nv"> </span><span class="s">can</span><span class="nv"> </span><span class="s">only</span><span class="nv"> </span><span class="s">provide</span><span class="nv"> </span><span class="s">assistance</span><span class="nv"> </span><span class="s">within</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">programmed</span><span class="nv"> </span><span class="s">parameters."</span>
|
||||
</span><span id="line-27"><span class="linenos">27</span>
|
||||
</span><span id="line-28"><span class="linenos">28</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</span><span id="line-29"><mark><span class="linenos">29</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"reboot_network_device"</span>
|
||||
</mark></span><span id="line-30"><mark><span class="linenos">30</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Helps</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">operators</span><span class="nv"> </span><span class="s">perform</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">operations</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">rebooting</span><span class="nv"> </span><span class="s">a</span><span class="nv"> </span><span class="s">device."</span>
|
||||
</mark></span><span id="line-31"><mark><span class="linenos">31</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</mark></span><span id="line-32"><mark><span class="linenos">32</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||||
</mark></span><span id="line-33"><mark><span class="linenos">33</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/action"</span>
|
||||
</mark></span><span id="line-34"><mark><span class="linenos">34</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</mark></span><span id="line-35"><mark><span class="linenos">35</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_id"</span>
|
||||
</mark></span><span id="line-36"><mark><span class="linenos">36</span><span class="w"> </span><span class="c1"># additional type options include: int | float | bool | string | list | dict</span>
|
||||
</mark></span><span id="line-37"><mark><span class="linenos">37</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
|
||||
</mark></span><span id="line-38"><mark><span class="linenos">38</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Identifier</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</mark></span><span id="line-39"><span class="linenos">39</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-40"><span class="linenos">40</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"confirmation"</span>
|
||||
</span><span id="line-41"><span class="linenos">41</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
|
||||
</span><span id="line-42"><span class="linenos">42</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Confirmation</span><span class="nv"> </span><span class="s">flag</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">proceed</span><span class="nv"> </span><span class="s">with</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</span><span id="line-43"><span class="linenos">43</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="s">"no"</span>
|
||||
</span><span id="line-44"><span class="linenos">44</span><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">yes</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">no</span><span class="p p-Indicator">]</span>
|
||||
</span><span id="line-45"><span class="linenos">45</span>
|
||||
</span><span id="line-46"><span class="linenos">46</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"information_extraction"</span>
|
||||
</span><span id="line-47"><span class="linenos">47</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-48"><span class="linenos">48</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"This</span><span class="nv"> </span><span class="s">prompt</span><span class="nv"> </span><span class="s">handles</span><span class="nv"> </span><span class="s">all</span><span class="nv"> </span><span class="s">scenarios</span><span class="nv"> </span><span class="s">that</span><span class="nv"> </span><span class="s">are</span><span class="nv"> </span><span class="s">question</span><span class="nv"> </span><span class="s">and</span><span class="nv"> </span><span class="s">answer</span><span class="nv"> </span><span class="s">in</span><span class="nv"> </span><span class="s">nature.</span><span class="nv"> </span><span class="s">Like</span><span class="nv"> </span><span class="s">summarization,</span><span class="nv"> </span><span class="s">information</span><span class="nv"> </span><span class="s">extraction,</span><span class="nv"> </span><span class="s">etc."</span>
|
||||
</span><span id="line-49"><span class="linenos">49</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-50"><span class="linenos">50</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||||
</span><span id="line-51"><span class="linenos">51</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/summary"</span>
|
||||
</span><span id="line-52"><span class="linenos">52</span><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
|
||||
</span><span id="line-53"><span class="linenos">53</span><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-54"><span class="linenos">54</span><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
|
||||
</span><span id="line-55"><span class="linenos">55</span><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||||
</span><span id="line-56"><span class="linenos">56</span><span class="w"> </span><span class="no">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
|
||||
</span><span id="line-57"><span class="linenos">57</span>
|
||||
</span><span id="line-58"><span class="linenos">58</span><span class="nt">error_target</span><span class="p">:</span>
|
||||
</span><span id="line-59"><span class="linenos">59</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-60"><span class="linenos">60</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
|
||||
</span><span id="line-61"><span class="linenos">61</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
|
||||
</span><span id="line-62"><span class="linenos">62</span>
|
||||
</span><span id="line-63"><span class="linenos">63</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-64"><span class="linenos">64</span><span class="nt">endpoints</span><span class="p">:</span>
|
||||
</span><span id="line-65"><span class="linenos">65</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||||
</span><span id="line-66"><span class="linenos">66</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-67"><span class="linenos">67</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||||
</span><span id="line-68"><span class="linenos">68</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-69"><span class="linenos">69</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
|
||||
</span><span id="line-70"><span class="linenos">70</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-71"><span class="linenos">71</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span><span id="line-19"><span class="linenos">19</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-20"><span class="linenos">20</span>
|
||||
</span><span id="line-21"><span class="linenos">21</span><span class="nt">prompt_guards</span><span class="p">:</span>
|
||||
</span><span id="line-22"><span class="linenos">22</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
|
||||
</span><span id="line-23"><span class="linenos">23</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
|
||||
</span><span id="line-24"><span class="linenos">24</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
|
||||
</span><span id="line-25"><span class="linenos">25</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.</span>
|
||||
</span><span id="line-26"><span class="linenos">26</span>
|
||||
</span><span id="line-27"><span class="linenos">27</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</span><span id="line-28"><span class="linenos">28</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">information_extraction</span>
|
||||
</span><span id="line-29"><span class="linenos">29</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-30"><span class="linenos">30</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.</span>
|
||||
</span><span id="line-31"><span class="linenos">31</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-32"><span class="linenos">32</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||||
</span><span id="line-33"><span class="linenos">33</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/summary</span>
|
||||
</span><span id="line-34"><span class="linenos">34</span><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
|
||||
</span><span id="line-35"><span class="linenos">35</span><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-36"><span class="linenos">36</span><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
|
||||
</span><span id="line-37"><span class="linenos">37</span><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
|
||||
</span><span id="line-38"><span class="linenos">38</span>
|
||||
</span><span id="line-39"><mark><span class="linenos">39</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_network_device</span>
|
||||
</mark></span><span id="line-40"><mark><span class="linenos">40</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Reboot a specific network device</span>
|
||||
</mark></span><span id="line-41"><mark><span class="linenos">41</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</mark></span><span id="line-42"><mark><span class="linenos">42</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||||
</mark></span><span id="line-43"><mark><span class="linenos">43</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/action</span>
|
||||
</mark></span><span id="line-44"><mark><span class="linenos">44</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</mark></span><span id="line-45"><mark><span class="linenos">45</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_id</span>
|
||||
</mark></span><span id="line-46"><mark><span class="linenos">46</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">str</span>
|
||||
</mark></span><span id="line-47"><mark><span class="linenos">47</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Identifier of the network device to reboot.</span>
|
||||
</mark></span><span id="line-48"><mark><span class="linenos">48</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</mark></span><span id="line-49"><mark><span class="linenos">49</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">confirmation</span>
|
||||
</mark></span><span id="line-50"><mark><span class="linenos">50</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">bool</span>
|
||||
</mark></span><span id="line-51"><mark><span class="linenos">51</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Confirmation flag to proceed with reboot.</span>
|
||||
</mark></span><span id="line-52"><mark><span class="linenos">52</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</mark></span><span id="line-53"><mark><span class="linenos">53</span><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">true</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">false</span><span class="p p-Indicator">]</span>
|
||||
</mark></span><span id="line-54"><span class="linenos">54</span>
|
||||
</span><span id="line-55"><span class="linenos">55</span><span class="nt">error_target</span><span class="p">:</span>
|
||||
</span><span id="line-56"><span class="linenos">56</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-57"><span class="linenos">57</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
|
||||
</span><span id="line-58"><span class="linenos">58</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
|
||||
</span><span id="line-59"><span class="linenos">59</span>
|
||||
</span><span id="line-60"><span class="linenos">60</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-61"><span class="linenos">61</span><span class="nt">endpoints</span><span class="p">:</span>
|
||||
</span><span id="line-62"><span class="linenos">62</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||||
</span><span id="line-63"><span class="linenos">63</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-64"><span class="linenos">64</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||||
</span><span id="line-65"><span class="linenos">65</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-66"><span class="linenos">66</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
|
||||
</span><span id="line-67"><span class="linenos">67</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-68"><span class="linenos">68</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="admonition seealso">
|
||||
<p class="admonition-title">See also</p>
|
||||
<p>Check <a class="reference internal" href="../prompt_target.html#prompt-target"><span class="std std-ref">Prompt Target</span></a> for more details!</p>
|
||||
</div>
|
||||
<section id="intent-detection-and-prompt-matching">
|
||||
<h3>Intent Detection and Prompt Matching:<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#intent-detection-and-prompt-matching" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#intent-detection-and-prompt-matching'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Arch uses fast Natural Language Inference (NLI) and embedding approaches to first detect the intent of each
|
||||
|
|
@ -382,28 +363,24 @@ traffic, apply rate limits, and utilize a large set of traffic management capabi
|
|||
</span><span id="line-14"><span class="nb">print</span><span class="p">(</span><span class="s2">"OpenAI Response:"</span><span class="p">,</span> <span class="n">response</span><span class="o">.</span><span class="n">choices</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
<p>In these examples:</p>
|
||||
<blockquote>
|
||||
<div><p>The OpenAI client is used to send traffic directly through the Arch egress proxy to the LLM of your choice, such as OpenAI.
|
||||
The OpenAI client is configured to route traffic via Arch by setting the proxy to 127.0.0.1:51001, assuming Arch is
|
||||
running locally and bound to that address and port.</p>
|
||||
</div></blockquote>
|
||||
<p>This setup allows you to take advantage of Arch’s advanced traffic management features while interacting with LLM APIs like OpenAI.</p>
|
||||
<p>In these examples, the OpenAI client is used to send traffic directly through the Arch egress proxy to the LLM of your choice, such as OpenAI.
|
||||
The OpenAI client is configured to route traffic via Arch by setting the proxy to <code class="docutils literal notranslate"><span class="pre">127.0.0.1:51001</span></code>, assuming Arch is running locally and bound to that address and port.
|
||||
This setup allows you to take advantage of Arch’s advanced traffic management features while interacting with LLM APIs like OpenAI.</p>
|
||||
</section>
|
||||
</section>
|
||||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="model_serving.html">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="listener.html">
|
||||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="15 18 9 12 15 6"></polyline>
|
||||
</svg>
|
||||
Model Serving
|
||||
Listener
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="request_lifecycle.html">
|
||||
Request Lifecycle
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="model_serving.html">
|
||||
Model Serving
|
||||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="9 18 15 12 9 6"></polyline>
|
||||
</svg>
|
||||
|
|
@ -413,7 +390,7 @@ running locally and bound to that address and port.</p>
|
|||
<div class="sticky top-16 -mt-10 max-h-[calc(100vh-5rem)] overflow-y-auto pt-6 space-y-2"><p class="font-medium">On this page</p>
|
||||
<ul>
|
||||
<li><a :data-current="activeSection === '#messages'" class="reference internal" href="#messages">Messages</a></li>
|
||||
<li><a :data-current="activeSection === '#prompt-guardrails'" class="reference internal" href="#prompt-guardrails">Prompt Guardrails</a></li>
|
||||
<li><a :data-current="activeSection === '#prompt-guard'" class="reference internal" href="#prompt-guard">Prompt Guard</a></li>
|
||||
<li><a :data-current="activeSection === '#prompt-targets'" class="reference internal" href="#prompt-targets">Prompt Targets</a><ul>
|
||||
<li><a :data-current="activeSection === '#intent-detection-and-prompt-matching'" class="reference internal" href="#intent-detection-and-prompt-matching">Intent Detection and Prompt Matching:</a></li>
|
||||
<li><a :data-current="activeSection === '#agentic-apps-via-prompt-targets'" class="reference internal" href="#agentic-apps-via-prompt-targets">Agentic Apps via Prompt Targets</a></li>
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@
|
|||
<link href="./docs/concepts/tech_overview/request_lifecycle.html" rel="canonical"/>
|
||||
<link href="../../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../../search.html" rel="search" title="Search"/>
|
||||
<link href="../llm_provider.html" rel="next" title="LLM Provider"/>
|
||||
<link href="prompt.html" rel="prev" title="Prompt"/>
|
||||
<link href="error_target.html" rel="next" title="Error Target"/>
|
||||
<link href="model_serving.html" rel="prev" title="Model Serving"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
const userPreference = localStorage.getItem('darkMode');
|
||||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -199,7 +199,7 @@ lifecycle. The downstream and upstream HTTP/2 codec lives here.</p></li>
|
|||
forwarding prompts <code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code> and establishes the lifecycle of any <strong>upstream</strong> connection to a
|
||||
hosted endpoint that implements domain-specific business logic for incoming promots. This is where knowledge
|
||||
of targets and endpoint health, load balancing and connection pooling exists.</p></li>
|
||||
<li><p><a class="reference internal" href="model_serving.html#arch-model-serving"><span class="std std-ref">Model serving subsystem</span></a> which helps Arch make intelligent decisions about the
|
||||
<li><p><a class="reference internal" href="model_serving.html#model-serving"><span class="std std-ref">Model serving subsystem</span></a> which helps Arch make intelligent decisions about the
|
||||
incoming prompts. The model server is designed to call the purpose-built LLMs in Arch.</p></li>
|
||||
</ul>
|
||||
<p>The three subsystems are bridged with either the HTTP router filter, and the cluster manager subsystems of Envoy.</p>
|
||||
|
|
@ -214,7 +214,7 @@ enables scaling to very high core count CPUs.</p>
|
|||
<section id="configuration">
|
||||
<h2>Configuration<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#configuration" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#configuration'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>Today, only support a static bootstrap configuration file for simplicity today:</p>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
|
||||
</span><span id="line-2">
|
||||
</span><span id="line-3"><span class="nt">listener</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
|
||||
|
|
@ -224,67 +224,64 @@ enables scaling to very high core count CPUs.</p>
|
|||
</span><span id="line-8">
|
||||
</span><span id="line-9"><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</span><span id="line-10"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-17">
|
||||
</span><span id="line-18"><span class="c1"># default system prompt used by all prompt targets</span>
|
||||
</span><span id="line-19"><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||||
</span><span id="line-20"><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-21">
|
||||
</span><span id="line-22"><span class="nt">prompt_guards</span><span class="p">:</span>
|
||||
</span><span id="line-23"><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
|
||||
</span><span id="line-24"><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
|
||||
</span><span id="line-25"><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
|
||||
</span><span id="line-26"><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="s">"Looks</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">you're</span><span class="nv"> </span><span class="s">curious</span><span class="nv"> </span><span class="s">about</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">abilities,</span><span class="nv"> </span><span class="s">but</span><span class="nv"> </span><span class="s">I</span><span class="nv"> </span><span class="s">can</span><span class="nv"> </span><span class="s">only</span><span class="nv"> </span><span class="s">provide</span><span class="nv"> </span><span class="s">assistance</span><span class="nv"> </span><span class="s">within</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">programmed</span><span class="nv"> </span><span class="s">parameters."</span>
|
||||
</span><span id="line-27">
|
||||
</span><span id="line-28"><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</span><span id="line-29"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"reboot_network_device"</span>
|
||||
</span><span id="line-30"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Helps</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">operators</span><span class="nv"> </span><span class="s">perform</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">operations</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">rebooting</span><span class="nv"> </span><span class="s">a</span><span class="nv"> </span><span class="s">device."</span>
|
||||
</span><span id="line-19"><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-20">
|
||||
</span><span id="line-21"><span class="nt">prompt_guards</span><span class="p">:</span>
|
||||
</span><span id="line-22"><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
|
||||
</span><span id="line-23"><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
|
||||
</span><span id="line-24"><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
|
||||
</span><span id="line-25"><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.</span>
|
||||
</span><span id="line-26">
|
||||
</span><span id="line-27"><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</span><span id="line-28"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">information_extraction</span>
|
||||
</span><span id="line-29"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-30"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.</span>
|
||||
</span><span id="line-31"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-32"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||||
</span><span id="line-33"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/action"</span>
|
||||
</span><span id="line-34"><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</span><span id="line-35"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_id"</span>
|
||||
</span><span id="line-36"><span class="w"> </span><span class="c1"># additional type options include: int | float | bool | string | list | dict</span>
|
||||
</span><span id="line-37"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
|
||||
</span><span id="line-38"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Identifier</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</span><span id="line-39"><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-40"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"confirmation"</span>
|
||||
</span><span id="line-41"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
|
||||
</span><span id="line-42"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Confirmation</span><span class="nv"> </span><span class="s">flag</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">proceed</span><span class="nv"> </span><span class="s">with</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</span><span id="line-43"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="s">"no"</span>
|
||||
</span><span id="line-44"><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">yes</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">no</span><span class="p p-Indicator">]</span>
|
||||
</span><span id="line-45">
|
||||
</span><span id="line-46"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"information_extraction"</span>
|
||||
</span><span id="line-47"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-48"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"This</span><span class="nv"> </span><span class="s">prompt</span><span class="nv"> </span><span class="s">handles</span><span class="nv"> </span><span class="s">all</span><span class="nv"> </span><span class="s">scenarios</span><span class="nv"> </span><span class="s">that</span><span class="nv"> </span><span class="s">are</span><span class="nv"> </span><span class="s">question</span><span class="nv"> </span><span class="s">and</span><span class="nv"> </span><span class="s">answer</span><span class="nv"> </span><span class="s">in</span><span class="nv"> </span><span class="s">nature.</span><span class="nv"> </span><span class="s">Like</span><span class="nv"> </span><span class="s">summarization,</span><span class="nv"> </span><span class="s">information</span><span class="nv"> </span><span class="s">extraction,</span><span class="nv"> </span><span class="s">etc."</span>
|
||||
</span><span id="line-49"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-50"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||||
</span><span id="line-51"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/summary"</span>
|
||||
</span><span id="line-52"><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
|
||||
</span><span id="line-53"><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-54"><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
|
||||
</span><span id="line-55"><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||||
</span><span id="line-56"><span class="w"> </span><span class="no">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
|
||||
</span><span id="line-57">
|
||||
</span><span id="line-58"><span class="nt">error_target</span><span class="p">:</span>
|
||||
</span><span id="line-59"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-60"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
|
||||
</span><span id="line-61"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
|
||||
</span><span id="line-62">
|
||||
</span><span id="line-63"><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-64"><span class="nt">endpoints</span><span class="p">:</span>
|
||||
</span><span id="line-65"><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||||
</span><span id="line-66"><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-67"><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||||
</span><span id="line-68"><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-69"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
|
||||
</span><span id="line-70"><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-71"><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span><span id="line-33"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/summary</span>
|
||||
</span><span id="line-34"><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
|
||||
</span><span id="line-35"><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-36"><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
|
||||
</span><span id="line-37"><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
|
||||
</span><span id="line-38">
|
||||
</span><span id="line-39"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_network_device</span>
|
||||
</span><span id="line-40"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Reboot a specific network device</span>
|
||||
</span><span id="line-41"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-42"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||||
</span><span id="line-43"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/action</span>
|
||||
</span><span id="line-44"><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</span><span id="line-45"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_id</span>
|
||||
</span><span id="line-46"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">str</span>
|
||||
</span><span id="line-47"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Identifier of the network device to reboot.</span>
|
||||
</span><span id="line-48"><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-49"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">confirmation</span>
|
||||
</span><span id="line-50"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">bool</span>
|
||||
</span><span id="line-51"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Confirmation flag to proceed with reboot.</span>
|
||||
</span><span id="line-52"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</span><span id="line-53"><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">true</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">false</span><span class="p p-Indicator">]</span>
|
||||
</span><span id="line-54">
|
||||
</span><span id="line-55"><span class="nt">error_target</span><span class="p">:</span>
|
||||
</span><span id="line-56"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-57"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
|
||||
</span><span id="line-58"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
|
||||
</span><span id="line-59">
|
||||
</span><span id="line-60"><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-61"><span class="nt">endpoints</span><span class="p">:</span>
|
||||
</span><span id="line-62"><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||||
</span><span id="line-63"><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-64"><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||||
</span><span id="line-65"><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-66"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
|
||||
</span><span id="line-67"><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-68"><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
|
@ -374,16 +371,16 @@ processing request headers and then finalized by the HCM during post-request pro
|
|||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
<div class="mr-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="prompt.html">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="model_serving.html">
|
||||
<svg class="mr-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="15 18 9 12 15 6"></polyline>
|
||||
</svg>
|
||||
Prompt
|
||||
Model Serving
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="../llm_provider.html">
|
||||
LLM Provider
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="error_target.html">
|
||||
Error Target
|
||||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="9 18 15 12 9 6"></polyline>
|
||||
</svg>
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -163,19 +163,19 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="listener.html#configure-listener">Configure Listener</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="model_serving.html">Model Serving</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html#local-serving-cpu-moderate">Local Serving (CPU - Moderate)</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html#local-serving-gpu-fast">Local Serving (GPU- Fast)</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html#cloud-serving-gpu-blazing-fast">Cloud Serving (GPU - Blazing Fast)</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="prompt.html">Prompt</a><ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="prompt.html">Prompts</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html#messages">Messages</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html#prompt-guardrails">Prompt Guardrails</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html#prompt-guard">Prompt Guard</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html#prompt-targets">Prompt Targets</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html#prompting-llms">Prompting LLMs</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="model_serving.html">Model Serving</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html#local-serving-cpu-moderate">Local Serving (CPU - Moderate)</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html#local-serving-gpu-fast">Local Serving (GPU - Fast)</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html#cloud-serving-gpu-blazing-fast">Cloud Serving (GPU - Blazing Fast)</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html#terminology">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html#network-topology">Network topology</a></li>
|
||||
|
|
@ -186,6 +186,12 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html#id1">Overview</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="error_target.html">Error Target</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="error_target.html#key-concepts">Key Concepts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="error_target.html#error-header-example">Error Header Example</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="error_target.html#best-practices-and-tips">Best Practices and Tips</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -160,31 +160,31 @@ to keep things consistent in logs, traces and in code.</p>
|
|||
<p><strong>Upstream(Egress)</strong>: An upstream host that receives connections and prompts from Arch, and returns context or responses for a prompt</p>
|
||||
<a class="reference internal image-reference" href="../../_images/network-topology-ingress-egress.jpg"><img alt="../../_images/network-topology-ingress-egress.jpg" class="align-center" src="../../_images/network-topology-ingress-egress.jpg" style="width: 100%;"/>
|
||||
</a>
|
||||
<p><strong>Listener</strong>: A listener is a named network location (e.g., port, address, path etc.) that Arch listens on to process prompts
|
||||
<p><strong>Listener</strong>: A <a class="reference internal" href="listener.html#arch-overview-listeners"><span class="std std-ref">listener</span></a> is a named network location (e.g., port, address, path etc.) that Arch listens on to process prompts
|
||||
before forwarding them to your application server endpoints. rch enables you to configure one listener for downstream connections
|
||||
(like port 80, 443) and creates a separate internal listener for calls that initiate from your application code to LLMs.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>When you start Arch, you specify a listener address/port that you want to bind downstream. But, Arch uses are predefined port
|
||||
that you can use (<code class="docutils literal notranslate"><span class="pre">127.0.0.1:10000</span></code>) to proxy egress calls originating from your application to LLMs (API-based or hosted).
|
||||
For more details, check out <a class="reference internal" href="../llm_provider.html#llm-provider"><span class="std std-ref">LLM providers</span></a></p>
|
||||
For more details, check out <a class="reference internal" href="../llm_provider.html#llm-provider"><span class="std std-ref">LLM provider</span></a>.</p>
|
||||
</div>
|
||||
<p><strong>Instance</strong>: An instance of the Arch gateway. When you start Arch it creates at most two processes. One to handle Layer 7
|
||||
networking operations (auth, tls, observability, etc) and the second process to serve models that enable it to make smart
|
||||
decisions on how to accept, handle and forward prompts. The second process is optional, as the model serving sevice could be
|
||||
hosted on a different network (an API call). But these two processes are considered a single instance of Arch.</p>
|
||||
<p><strong>Prompt Targets</strong>: Arch offers a primitive called <code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code> to help separate business logic from undifferentiated
|
||||
<p><strong>Prompt Target</strong>: Arch offers a primitive called <a class="reference internal" href="../prompt_target.html#prompt-target"><span class="std std-ref">prompt_target</span></a> to help separate business logic from undifferentiated
|
||||
work in building generative AI apps. Prompt targets are endpoints that receive prompts that are processed by Arch.
|
||||
For example, Arch enriches incoming prompts with metadata like knowing when a request is a follow-up or clarifying prompt
|
||||
so that you can build faster, more accurate retrieval (RAG) apps. To support agentic apps, like scheduling travel plans or
|
||||
sharing comments on a document - via prompts, Bolt uses its function calling abilities to extract critical information from
|
||||
the incoming prompt (or a set of prompts) needed by a downstream backend API or function call before calling it directly.</p>
|
||||
<p><strong>Error Targets</strong>: Error targets are those endpoints that receive forwarded errors from Arch when issues arise,
|
||||
<p><strong>Error Target</strong>: <a class="reference internal" href="error_target.html#error-target"><span class="std std-ref">Error targets</span></a> are those endpoints that receive forwarded errors from Arch when issues arise,
|
||||
such as failing to properly call a function/API, detecting violations of guardrails, or encountering other processing errors.
|
||||
These errors are communicated to the application via headers (X-Arch-[ERROR-TYPE]), allowing it to handle the errors gracefully
|
||||
These errors are communicated to the application via headers <code class="docutils literal notranslate"><span class="pre">X-Arch-[ERROR-TYPE]</span></code>, allowing it to handle the errors gracefully
|
||||
and take appropriate actions.</p>
|
||||
<p><strong>Model Serving</strong>: Arch is a set of <strong>two</strong> self-contained processes that are designed to run alongside your application servers
|
||||
(or on a separate hostconnected via a network).The <strong>model serving</strong> process helps Arch make intelligent decisions about the
|
||||
<p><strong>Model Serving</strong>: Arch is a set of <cite>two</cite> self-contained processes that are designed to run alongside your application servers
|
||||
(or on a separate hostconnected via a network).The <a class="reference internal" href="model_serving.html#model-serving"><span class="std std-ref">model serving</span></a> process helps Arch make intelligent decisions about the
|
||||
incoming prompts. The model server is designed to call the (fast) purpose-built LLMs in Arch.</p>
|
||||
</section>
|
||||
</div><div class="flex justify-between items-center pt-6 mt-12 border-t border-border gap-4">
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -161,7 +161,7 @@ threads perform filtering, and forwarding.</p>
|
|||
thread. All the functionality around prompt handling from a downstream client is handled in a separate worker thread.
|
||||
This allows the majority of Arch to be largely single threaded (embarrassingly parallel) with a small amount
|
||||
of more complex code handling coordination between the worker threads.</p>
|
||||
<p>Generally Arch is written to be 100% non-blocking.</p>
|
||||
<p>Generally, Arch is written to be 100% non-blocking.</p>
|
||||
<div class="admonition tip">
|
||||
<p class="admonition-title">Tip</p>
|
||||
<p>For most workloads we recommend configuring the number of worker threads to be equal to the number of
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -152,7 +152,7 @@
|
|||
</nav>
|
||||
<div id="content" role="main">
|
||||
<section id="overview">
|
||||
<h1>Overview<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#overview"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<span id="id1"></span><h1>Overview<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#overview"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h1>
|
||||
<p>Welcome to Arch, the intelligent prompt gateway designed to help developers build <strong>fast</strong>, <strong>secure</strong>, and <strong>personalized</strong> generative AI apps at ANY scale.
|
||||
In this documentation, you will learn how to quickly set up Arch to trigger API calls via prompts, apply prompt guardrails without writing any application-level logic,
|
||||
simplify the interaction with upstream LLMs, and improve observability all while simplifying your application development process.</p>
|
||||
|
|
@ -165,7 +165,7 @@ simplify the interaction with upstream LLMs, and improve observability all while
|
|||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
<div class="sd-card-body docutils">
|
||||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||||
Overview</div>
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-apps" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M1.5 3.25c0-.966.784-1.75 1.75-1.75h2.5c.966 0 1.75.784 1.75 1.75v2.5A1.75 1.75 0 0 1 5.75 7.5h-2.5A1.75 1.75 0 0 1 1.5 5.75Zm7 0c0-.966.784-1.75 1.75-1.75h2.5c.966 0 1.75.784 1.75 1.75v2.5a1.75 1.75 0 0 1-1.75 1.75h-2.5A1.75 1.75 0 0 1 8.5 5.75Zm-7 7c0-.966.784-1.75 1.75-1.75h2.5c.966 0 1.75.784 1.75 1.75v2.5a1.75 1.75 0 0 1-1.75 1.75h-2.5a1.75 1.75 0 0 1-1.75-1.75Zm7 0c0-.966.784-1.75 1.75-1.75h2.5c.966 0 1.75.784 1.75 1.75v2.5a1.75 1.75 0 0 1-1.75 1.75h-2.5a1.75 1.75 0 0 1-1.75-1.75ZM3.25 3a.25.25 0 0 0-.25.25v2.5c0 .138.112.25.25.25h2.5A.25.25 0 0 0 6 5.75v-2.5A.25.25 0 0 0 5.75 3Zm7 0a.25.25 0 0 0-.25.25v2.5c0 .138.112.25.25.25h2.5a.25.25 0 0 0 .25-.25v-2.5a.25.25 0 0 0-.25-.25Zm-7 7a.25.25 0 0 0-.25.25v2.5c0 .138.112.25.25.25h2.5a.25.25 0 0 0 .25-.25v-2.5a.25.25 0 0 0-.25-.25Zm7 0a.25.25 0 0 0-.25.25v2.5c0 .138.112.25.25.25h2.5a.25.25 0 0 0 .25-.25v-2.5a.25.25 0 0 0-.25-.25Z"></path></svg> Overview</div>
|
||||
<p class="sd-card-text">Overview of Arch and Doc navigation</p>
|
||||
</div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="overview.html"><span>overview.html</span></a></div>
|
||||
|
|
@ -174,7 +174,7 @@ Overview</div>
|
|||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
<div class="sd-card-body docutils">
|
||||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||||
Intro to Arch</div>
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-book" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path></svg> Intro to Arch</div>
|
||||
<p class="sd-card-text">Explore Arch’s features and developer workflow</p>
|
||||
</div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="intro_to_arch.html"><span>intro_to_arch.html</span></a></div>
|
||||
|
|
@ -183,7 +183,7 @@ Intro to Arch</div>
|
|||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
<div class="sd-card-body docutils">
|
||||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||||
Quickstart</div>
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-rocket" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M14.064 0h.186C15.216 0 16 .784 16 1.75v.186a8.752 8.752 0 0 1-2.564 6.186l-.458.459c-.314.314-.641.616-.979.904v3.207c0 .608-.315 1.172-.833 1.49l-2.774 1.707a.749.749 0 0 1-1.11-.418l-.954-3.102a1.214 1.214 0 0 1-.145-.125L3.754 9.816a1.218 1.218 0 0 1-.124-.145L.528 8.717a.749.749 0 0 1-.418-1.11l1.71-2.774A1.748 1.748 0 0 1 3.31 4h3.204c.288-.338.59-.665.904-.979l.459-.458A8.749 8.749 0 0 1 14.064 0ZM8.938 3.623h-.002l-.458.458c-.76.76-1.437 1.598-2.02 2.5l-1.5 2.317 2.143 2.143 2.317-1.5c.902-.583 1.74-1.26 2.499-2.02l.459-.458a7.25 7.25 0 0 0 2.123-5.127V1.75a.25.25 0 0 0-.25-.25h-.186a7.249 7.249 0 0 0-5.125 2.123ZM3.56 14.56c-.732.732-2.334 1.045-3.005 1.148a.234.234 0 0 1-.201-.064.234.234 0 0 1-.064-.201c.103-.671.416-2.273 1.15-3.003a1.502 1.502 0 1 1 2.12 2.12Zm6.94-3.935c-.088.06-.177.118-.266.175l-2.35 1.521.548 1.783 1.949-1.2a.25.25 0 0 0 .119-.213ZM3.678 8.116 5.2 5.766c.058-.09.117-.178.176-.266H3.309a.25.25 0 0 0-.213.119l-1.2 1.95ZM12 5a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path></svg> Quickstart</div>
|
||||
<p class="sd-card-text">Learn how to quickly set up and integrate</p>
|
||||
</div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="quickstart.html"><span>quickstart.html</span></a></div>
|
||||
|
|
@ -200,28 +200,28 @@ Quickstart</div>
|
|||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
<div class="sd-card-body docutils">
|
||||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||||
Tech Overview</div>
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-package" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="m8.878.392 5.25 3.045c.54.314.872.89.872 1.514v6.098a1.75 1.75 0 0 1-.872 1.514l-5.25 3.045a1.75 1.75 0 0 1-1.756 0l-5.25-3.045A1.75 1.75 0 0 1 1 11.049V4.951c0-.624.332-1.201.872-1.514L7.122.392a1.75 1.75 0 0 1 1.756 0ZM7.875 1.69l-4.63 2.685L8 7.133l4.755-2.758-4.63-2.685a.248.248 0 0 0-.25 0ZM2.5 5.677v5.372c0 .09.047.171.125.216l4.625 2.683V8.432Zm6.25 8.271 4.625-2.683a.25.25 0 0 0 .125-.216V5.677L8.75 8.432Z"></path></svg> Tech Overview</div>
|
||||
<p class="sd-card-text">Learn about the technology stack</p>
|
||||
</div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../Concepts/tech_overview/tech_overview.html"><span>../Concepts/tech_overview/tech_overview.html</span></a></div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../concepts/tech_overview/tech_overview.html"><span>../concepts/tech_overview/tech_overview.html</span></a></div>
|
||||
</div>
|
||||
<div class="sd-col sd-d-flex-row docutils">
|
||||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
<div class="sd-card-body docutils">
|
||||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||||
LLM Provider</div>
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-webhook" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M5.5 4.25a2.25 2.25 0 0 1 4.5 0 .75.75 0 0 0 1.5 0 3.75 3.75 0 1 0-6.14 2.889l-2.272 4.258a.75.75 0 0 0 1.324.706L7 7.25a.75.75 0 0 0-.309-1.015A2.25 2.25 0 0 1 5.5 4.25Z"></path><path d="M7.364 3.607a.75.75 0 0 1 1.03.257l2.608 4.349a3.75 3.75 0 1 1-.628 6.785.75.75 0 0 1 .752-1.299 2.25 2.25 0 1 0-.033-3.88.75.75 0 0 1-1.03-.256L7.107 4.636a.75.75 0 0 1 .257-1.03Z"></path><path d="M2.9 8.776A.75.75 0 0 1 2.625 9.8 2.25 2.25 0 1 0 6 11.75a.75.75 0 0 1 .75-.751h5.5a.75.75 0 0 1 0 1.5H7.425a3.751 3.751 0 1 1-5.55-3.998.75.75 0 0 1 1.024.274Z"></path></svg> LLM Provider</div>
|
||||
<p class="sd-card-text">Explore Arch’s LLM integration options</p>
|
||||
</div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../Concepts/llm_provider.html"><span>../Concepts/llm_provider.html</span></a></div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../concepts/llm_provider.html"><span>../concepts/llm_provider.html</span></a></div>
|
||||
</div>
|
||||
<div class="sd-col sd-d-flex-row docutils">
|
||||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
<div class="sd-card-body docutils">
|
||||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||||
Targets</div>
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-workflow" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M0 1.75C0 .784.784 0 1.75 0h3.5C6.216 0 7 .784 7 1.75v3.5A1.75 1.75 0 0 1 5.25 7H4v4a1 1 0 0 0 1 1h4v-1.25C9 9.784 9.784 9 10.75 9h3.5c.966 0 1.75.784 1.75 1.75v3.5A1.75 1.75 0 0 1 14.25 16h-3.5A1.75 1.75 0 0 1 9 14.25v-.75H5A2.5 2.5 0 0 1 2.5 11V7h-.75A1.75 1.75 0 0 1 0 5.25Zm1.75-.25a.25.25 0 0 0-.25.25v3.5c0 .138.112.25.25.25h3.5a.25.25 0 0 0 .25-.25v-3.5a.25.25 0 0 0-.25-.25Zm9 9a.25.25 0 0 0-.25.25v3.5c0 .138.112.25.25.25h3.5a.25.25 0 0 0 .25-.25v-3.5a.25.25 0 0 0-.25-.25Z"></path></svg> Prompt Target</div>
|
||||
<p class="sd-card-text">Understand how Arch handles prompts</p>
|
||||
</div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../Concepts/prompt_target.html"><span>../Concepts/prompt_target.html</span></a></div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../concepts/prompt_target.html"><span>../concepts/prompt_target.html</span></a></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -235,16 +235,16 @@ Targets</div>
|
|||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
<div class="sd-card-body docutils">
|
||||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||||
Prompt Guard</div>
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-shield-check" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="m8.533.133 5.25 1.68A1.75 1.75 0 0 1 15 3.48V7c0 1.566-.32 3.182-1.303 4.682-.983 1.498-2.585 2.813-5.032 3.855a1.697 1.697 0 0 1-1.33 0c-2.447-1.042-4.049-2.357-5.032-3.855C1.32 10.182 1 8.566 1 7V3.48a1.75 1.75 0 0 1 1.217-1.667l5.25-1.68a1.748 1.748 0 0 1 1.066 0Zm-.61 1.429.001.001-5.25 1.68a.251.251 0 0 0-.174.237V7c0 1.36.275 2.666 1.057 3.859.784 1.194 2.121 2.342 4.366 3.298a.196.196 0 0 0 .154 0c2.245-.957 3.582-2.103 4.366-3.297C13.225 9.666 13.5 8.358 13.5 7V3.48a.25.25 0 0 0-.174-.238l-5.25-1.68a.25.25 0 0 0-.153 0ZM11.28 6.28l-3.5 3.5a.75.75 0 0 1-1.06 0l-1.5-1.5a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l.97.97 2.97-2.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path></svg> Prompt Guard</div>
|
||||
<p class="sd-card-text">Instructions on securing and validating prompts</p>
|
||||
</div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../guides/tech_overview/tech_overview.html"><span>../guides/tech_overview/tech_overview.html</span></a></div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../guides/prompt_guard.html"><span>../guides/prompt_guard.html</span></a></div>
|
||||
</div>
|
||||
<div class="sd-col sd-d-flex-row docutils">
|
||||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
<div class="sd-card-body docutils">
|
||||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||||
Function Calling</div>
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-code-square" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M0 1.75C0 .784.784 0 1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25Zm7.47 3.97a.75.75 0 0 1 1.06 0l2 2a.75.75 0 0 1 0 1.06l-2 2a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L10.69 8 9.22 6.53a.75.75 0 0 1 0-1.06ZM6.78 6.53 5.31 8l1.47 1.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-2-2a.75.75 0 0 1 0-1.06l2-2a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path></svg> Function Calling</div>
|
||||
<p class="sd-card-text">A guide to effective function calling</p>
|
||||
</div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../guides/function_calling.html"><span>../guides/function_calling.html</span></a></div>
|
||||
|
|
@ -253,10 +253,10 @@ Function Calling</div>
|
|||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
<div class="sd-card-body docutils">
|
||||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||||
Observability</div>
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-issue-opened" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M8 9.5a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path><path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Z"></path></svg> Observability</div>
|
||||
<p class="sd-card-text">Learn to monitor and troubleshoot Arch</p>
|
||||
</div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../guides/prompt_target.html"><span>../guides/prompt_target.html</span></a></div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../guides/observability/observability.html"><span>../guides/observability/observability.html</span></a></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -270,7 +270,7 @@ Observability</div>
|
|||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
<div class="sd-card-body docutils">
|
||||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||||
Agentic Workflow</div>
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-dependabot" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M5.75 7.5a.75.75 0 0 1 .75.75v1.5a.75.75 0 0 1-1.5 0v-1.5a.75.75 0 0 1 .75-.75Zm5.25.75a.75.75 0 0 0-1.5 0v1.5a.75.75 0 0 0 1.5 0v-1.5Z"></path><path d="M6.25 0h2A.75.75 0 0 1 9 .75V3.5h3.25a2.25 2.25 0 0 1 2.25 2.25V8h.75a.75.75 0 0 1 0 1.5h-.75v2.75a2.25 2.25 0 0 1-2.25 2.25h-8.5a2.25 2.25 0 0 1-2.25-2.25V9.5H.75a.75.75 0 0 1 0-1.5h.75V5.75A2.25 2.25 0 0 1 3.75 3.5H7.5v-2H6.25a.75.75 0 0 1 0-1.5ZM3 5.75v6.5c0 .414.336.75.75.75h8.5a.75.75 0 0 0 .75-.75v-6.5a.75.75 0 0 0-.75-.75h-8.5a.75.75 0 0 0-.75.75Z"></path></svg> Agentic Workflow</div>
|
||||
<p class="sd-card-text">Discover how to create and manage custom agents within Arch</p>
|
||||
</div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../build_with_arch/agent.html"><span>../build_with_arch/agent.html</span></a></div>
|
||||
|
|
@ -279,7 +279,7 @@ Agentic Workflow</div>
|
|||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||||
<div class="sd-card-body docutils">
|
||||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||||
RAG Application</div>
|
||||
<svg aria-hidden="true" class="sd-octicon sd-octicon-stack" height="1.0em" version="1.1" viewbox="0 0 16 16" width="1.0em"><path d="M7.122.392a1.75 1.75 0 0 1 1.756 0l5.003 2.902c.83.481.83 1.68 0 2.162L8.878 8.358a1.75 1.75 0 0 1-1.756 0L2.119 5.456a1.251 1.251 0 0 1 0-2.162ZM8.125 1.69a.248.248 0 0 0-.25 0l-4.63 2.685 4.63 2.685a.248.248 0 0 0 .25 0l4.63-2.685ZM1.601 7.789a.75.75 0 0 1 1.025-.273l5.249 3.044a.248.248 0 0 0 .25 0l5.249-3.044a.75.75 0 0 1 .752 1.298l-5.248 3.044a1.75 1.75 0 0 1-1.756 0L1.874 8.814A.75.75 0 0 1 1.6 7.789Zm0 3.5a.75.75 0 0 1 1.025-.273l5.249 3.044a.248.248 0 0 0 .25 0l5.249-3.044a.75.75 0 0 1 .752 1.298l-5.248 3.044a1.75 1.75 0 0 1-1.756 0l-5.248-3.044a.75.75 0 0 1-.273-1.025Z"></path></svg> RAG Application</div>
|
||||
<p class="sd-card-text">Integrate RAG for knowledge-driven responses</p>
|
||||
</div>
|
||||
<a class="sd-stretched-link sd-hide-link-text reference external" href="../build_with_arch/rag.html"><span>../build_with_arch/rag.html</span></a></div>
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -196,53 +196,50 @@ Below is an example configuration to get you started, including:</p>
|
|||
<li><p><code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code>: Defines endpoints that handle specific types of prompts.</p></li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">error_target</span></code>: Specifies where to route errors for handling.</p></li>
|
||||
</ul>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
</span><span id="line-2"><span class="nt">listen</span><span class="p">:</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
|
||||
</span><span id="line-5">
|
||||
</span><span id="line-6"><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
|
||||
</span><span id="line-2">
|
||||
</span><span id="line-3"><span class="nt">listen</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
|
||||
</span><span id="line-5"><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
|
||||
</span><span id="line-6"><span class="w"> </span><span class="c1"># Defines how Arch should parse the content from application/json or text/pain Content-type in the http request</span>
|
||||
</span><span id="line-7"><span class="w"> </span><span class="nt">message_format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">huggingface</span>
|
||||
</span><span id="line-8">
|
||||
</span><span id="line-9"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-10"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-15">
|
||||
</span><span id="line-16"><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</span><span id="line-17"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_devices</span>
|
||||
</span><span id="line-18"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">></span>
|
||||
</span><span id="line-19"><span class="w"> </span><span class="no">This prompt target handles user requests to reboot devices.</span>
|
||||
</span><span id="line-20"><span class="w"> </span><span class="no">It ensures that when users request to reboot specific devices or device groups, the system processes the reboot commands accurately.</span>
|
||||
</span><span id="line-21">
|
||||
</span><span id="line-22"><span class="w"> </span><span class="no">**Examples of user prompts:**</span>
|
||||
</span><span id="line-23">
|
||||
</span><span id="line-24"><span class="w"> </span><span class="no">- "Please reboot device 12345."</span>
|
||||
</span><span id="line-25"><span class="w"> </span><span class="no">- "Restart all devices in tenant group tenant-XYZ</span>
|
||||
</span><span id="line-26"><span class="w"> </span><span class="no">- "I need to reboot devices A, B, and C."</span>
|
||||
</span><span id="line-27">
|
||||
</span><span id="line-28"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_reboot</span>
|
||||
</span><span id="line-29"><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</span><span id="line-30"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_ids"</span>
|
||||
</span><span id="line-31"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span><span class="w"> </span><span class="c1"># Options: integer | float | list | dictionary | set</span>
|
||||
</span><span id="line-32"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"A</span><span class="nv"> </span><span class="s">list</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">identifiers</span><span class="nv"> </span><span class="s">(IDs)</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</span><span id="line-33"><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</span><span id="line-34"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_group"</span>
|
||||
</span><span id="line-35"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">string</span><span class="w"> </span><span class="c1"># Options: string | integer | float | list | dictionary | set</span>
|
||||
</span><span id="line-36"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"The</span><span class="nv"> </span><span class="s">name</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">group</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</span><span id="line-37"><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</span><span id="line-38">
|
||||
</span><span id="line-39"><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-40"><span class="nt">endpoints</span><span class="p">:</span>
|
||||
</span><span id="line-41"><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||||
</span><span id="line-42"><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-43"><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-44"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
|
||||
</span><span id="line-45"><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-46"><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span><span id="line-47"><span class="w"> </span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
</span><span id="line-9"><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</span><span id="line-10"><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-11"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
|
||||
</span><span id="line-12"><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-13"><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</span><span id="line-14"><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-15"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-16"><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-17">
|
||||
</span><span id="line-18"><span class="c1"># default system prompt used by all prompt targets</span>
|
||||
</span><span id="line-19"><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-20">
|
||||
</span><span id="line-21"><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</span><span id="line-22"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_devices</span>
|
||||
</span><span id="line-23"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Reboot specific devices or device groups</span>
|
||||
</span><span id="line-24">
|
||||
</span><span id="line-25"><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/device_reboot</span>
|
||||
</span><span id="line-26"><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</span><span id="line-27"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_ids</span>
|
||||
</span><span id="line-28"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">list</span>
|
||||
</span><span id="line-29"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">A list of device identifiers (IDs) to reboot.</span>
|
||||
</span><span id="line-30"><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</span><span id="line-31"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_group</span>
|
||||
</span><span id="line-32"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">str</span>
|
||||
</span><span id="line-33"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">The name of the device group to reboot</span>
|
||||
</span><span id="line-34"><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</span><span id="line-35">
|
||||
</span><span id="line-36"><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-37"><span class="nt">endpoints</span><span class="p">:</span>
|
||||
</span><span id="line-38"><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
|
||||
</span><span id="line-39"><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-40"><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||||
</span><span id="line-41"><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-42"><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
|
||||
</span><span id="line-43"><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-44"><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
|
@ -256,7 +253,7 @@ Below is an example configuration to get you started, including:</p>
|
|||
<h2>Next Steps<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#next-steps" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#next-steps'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
<p>Congratulations! You’ve successfully set up Arch and made your first prompt-based request. To further enhance your GenAI applications, explore the following resources:</p>
|
||||
<ul class="simple">
|
||||
<li><p>Full Documentation: Comprehensive guides and references.</p></li>
|
||||
<li><p><a class="reference internal" href="overview.html#overview"><span class="std std-ref">Full Documentation</span></a>: Comprehensive guides and references.</p></li>
|
||||
<li><p><a class="reference external" href="https://github.com/katanemo/arch" rel="nofollow noopener">GitHub Repository<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>: Access the source code, contribute, and track updates.</p></li>
|
||||
<li><p><a class="reference external" href="https://github.com/katanemo/arch#contact" rel="nofollow noopener">Support<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a>: Get help and connect with the Arch community .</p></li>
|
||||
</ul>
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -248,7 +248,7 @@ Achieving performance on par with GPT-4, these models set a new benchmark in the
|
|||
<p>Here’s a step-by-step guide to configuring function calling within your Arch setup:</p>
|
||||
<section id="step-1-define-the-function">
|
||||
<h3>Step 1: Define the Function<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-1-define-the-function" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-1-define-the-function'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Create or identify the backend function you want Arch to call. This could be an API endpoint, a script, or any other executable backend logic.</p>
|
||||
<p>First, create or identify the backend function you want Arch to call. This could be an API endpoint, a script, or any other executable backend logic.</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id3">
|
||||
<div class="code-block-caption"><span class="caption-text">Example Function</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="kn">import</span> <span class="nn">requests</span>
|
||||
|
|
@ -277,9 +277,10 @@ Achieving performance on par with GPT-4, these models set a new benchmark in the
|
|||
</section>
|
||||
<section id="step-2-configure-prompt-targets">
|
||||
<h3>Step 2: Configure Prompt Targets<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-2-configure-prompt-targets" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-2-configure-prompt-targets'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Map the function to a prompt target, defining the intent and parameters that Arch will extract from the user’s prompt.</p>
|
||||
<p>Next, map the function to a prompt target, defining the intent and parameters that Arch will extract from the user’s prompt.
|
||||
Specify the parameters your function needs and how Arch should interpret these.</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id4">
|
||||
<div class="code-block-caption"><span class="caption-text">Example Config</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id4"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="code-block-caption"><span class="caption-text">Prompt Target Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id4"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">get_weather</span>
|
||||
</span><span id="line-3"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Get the current weather for a location</span>
|
||||
|
|
@ -299,9 +300,10 @@ Achieving performance on par with GPT-4, these models set a new benchmark in the
|
|||
</div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="step-3-validate-parameters">
|
||||
<h3>Step 3: Validate Parameters<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-3-validate-parameters" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-3-validate-parameters'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Arch will validate parameters and ensure that the required parameters (e.g., location) are present in the prompt, and add validation rules if necessary.
|
||||
<section id="step-3-arch-takes-over">
|
||||
<h3>Step 3: Arch Takes Over<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-3-arch-takes-over" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-3-arch-takes-over'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Once you have defined the functions and configured the prompt targets, Arch takes care of the remaining work.
|
||||
It will automatically validate parameters validate parameters and ensure that the required parameters (e.g., location) are present in the prompt, and add validation rules if necessary.
|
||||
Here is ane example validation schema using the <a class="reference external" href="https://json-schema.org/docs" rel="nofollow noopener">jsonschema<svg fill="currentColor" height="1em" stroke="none" viewbox="0 96 960 960" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M188 868q-11-11-11-28t11-28l436-436H400q-17 0-28.5-11.5T360 336q0-17 11.5-28.5T400 296h320q17 0 28.5 11.5T760 336v320q0 17-11.5 28.5T720 696q-17 0-28.5-11.5T680 656V432L244 868q-11 11-28 11t-28-11Z"></path></svg></a> library</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id5">
|
||||
<div class="code-block-caption"><span class="caption-text">Example Validation Schema</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id5"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
|
|
@ -354,11 +356,8 @@ Here is ane example validation schema using the <a class="reference external" hr
|
|||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="step-4-execute-and-return-the-response">
|
||||
<h3>Step 4: Execute and Return the Response<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#step-4-execute-and-return-the-response" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#step-4-execute-and-return-the-response'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Once the function is called, format the response and send it back to Arch-Function.
|
||||
Next, Arch-Function provides users with coherent and user-friendly responses.</p>
|
||||
<p>Once the functions are called, Arch formats the response and deliver back to users.
|
||||
By completing these setup steps, you enable Arch to manage the process from validation to response, ensuring users receive consistent, reliable results.</p>
|
||||
</section>
|
||||
</section>
|
||||
<section id="example-use-cases">
|
||||
|
|
@ -415,8 +414,7 @@ Next, Arch-Function provides users with coherent and user-friendly responses.</p
|
|||
<li><a :data-current="activeSection === '#implementing-function-calling'" class="reference internal" href="#implementing-function-calling">Implementing Function Calling</a><ul>
|
||||
<li><a :data-current="activeSection === '#step-1-define-the-function'" class="reference internal" href="#step-1-define-the-function">Step 1: Define the Function</a></li>
|
||||
<li><a :data-current="activeSection === '#step-2-configure-prompt-targets'" class="reference internal" href="#step-2-configure-prompt-targets">Step 2: Configure Prompt Targets</a></li>
|
||||
<li><a :data-current="activeSection === '#step-3-validate-parameters'" class="reference internal" href="#step-3-validate-parameters">Step 3: Validate Parameters</a></li>
|
||||
<li><a :data-current="activeSection === '#step-4-execute-and-return-the-response'" class="reference internal" href="#step-4-execute-and-return-the-response">Step 4: Execute and Return the Response</a></li>
|
||||
<li><a :data-current="activeSection === '#step-3-arch-takes-over'" class="reference internal" href="#step-3-arch-takes-over">Step 3: Arch Takes Over</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a :data-current="activeSection === '#example-use-cases'" class="reference internal" href="#example-use-cases">Example Use Cases</a></li>
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -101,9 +101,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +129,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -212,6 +212,19 @@ Arch-Guard is designed to address this challenge.</p>
|
|||
It excels at detecting explicitly malicious prompts and assessing toxic content, providing an essential layer of security for LLM applications.</p>
|
||||
<p>By embedding Arch-Guard within the Arch architecture, we empower developers to build robust, LLM-powered applications while prioritizing security and safety. With Arch-Guard, you can navigate the complexities of prompt management with confidence, knowing you have a reliable defense against malicious input.</p>
|
||||
</section>
|
||||
<section id="example-configuration">
|
||||
<h3>Example Configuration<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#example-configuration" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#example-configuration'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h3>
|
||||
<p>Here is an example of using Arch-Guard in Arch:</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id3">
|
||||
<div class="code-block-caption"><span class="caption-text">Arch-Guard Example Configuration</span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id3"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos">1</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
|
||||
</span><span id="line-2"><span class="linenos">2</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
|
||||
</span><span id="line-3"><span class="linenos">3</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="linenos">4</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="how-arch-guard-works">
|
||||
<h2>How Arch-Guard Works<a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#how-arch-guard-works" x-intersect.margin.0%.0%.-70%.0%="activeSection = '#how-arch-guard-works'"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></h2>
|
||||
|
|
@ -281,6 +294,7 @@ By implementing Prompt Guard, developers can provide a robust layer of input val
|
|||
<li><a :data-current="activeSection === '#why-prompt-guard'" class="reference internal" href="#why-prompt-guard">Why Prompt Guard</a></li>
|
||||
<li><a :data-current="activeSection === '#arch-guard'" class="reference internal" href="#arch-guard">Arch-Guard</a><ul>
|
||||
<li><a :data-current="activeSection === '#what-is-arch-guard'" class="reference internal" href="#what-is-arch-guard">What Is Arch-Guard</a></li>
|
||||
<li><a :data-current="activeSection === '#example-configuration'" class="reference internal" href="#example-configuration">Example Configuration</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a :data-current="activeSection === '#how-arch-guard-works'" class="reference internal" href="#how-arch-guard-works">How Arch-Guard Works</a></li>
|
||||
|
|
|
|||
|
|
@ -100,9 +100,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -127,7 +128,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -175,9 +175,10 @@ Concepts</label><div class="sd-tab-content docutils">
|
|||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -220,7 +221,6 @@ Resources</label><div class="sd-tab-content docutils">
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
BIN
objects.inv
BIN
objects.inv
Binary file not shown.
|
|
@ -18,7 +18,6 @@
|
|||
<link href="./docs/resources/configuration_reference.html" rel="canonical"/>
|
||||
<link href="../_static/favicon.ico" rel="icon"/>
|
||||
<link href="../search.html" rel="search" title="Search"/>
|
||||
<link href="error_target.html" rel="next" title="Error Targets"/>
|
||||
<link href="../build_with_arch/rag.html" rel="prev" title="RAG Application"/>
|
||||
<script>
|
||||
<!-- Prevent Flash of wrong theme -->
|
||||
|
|
@ -101,9 +100,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -128,7 +128,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1 current"><a class="current reference internal" href="#">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
@ -160,7 +159,7 @@ options, etc). Our belief that the simple things, should be simple. So we offert
|
|||
that they can spend more of their time in building features unique to their AI experience.</p>
|
||||
<div class="literal-block-wrapper docutils container" id="id1">
|
||||
<div class="code-block-caption"><span class="caption-text"><a class="reference download internal" download="" href="../_downloads/ca9d3b7116524473d8adbde7cf15d167/arch_config_full_reference.yaml"><code class="xref download docutils literal notranslate"><span class="pre">Arch</span> <span class="pre">Configuration</span> <span class="pre">-</span> <span class="pre">Full</span> <span class="pre">Reference</span></code></a></span><a @click.prevent="window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label="Copy link to this element" class="headerlink" data-tooltip="Copy link to this element" href="#id1"><svg height="1em" viewbox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z"></path></svg></a></div>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
||||
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.1</span>
|
||||
</span><span id="line-2"><span class="linenos"> 2</span>
|
||||
</span><span id="line-3"><span class="linenos"> 3</span><span class="nt">listener</span><span class="p">:</span>
|
||||
</span><span id="line-4"><span class="linenos"> 4</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span><span class="w"> </span><span class="c1"># or 127.0.0.1</span>
|
||||
|
|
@ -170,9 +169,9 @@ that they can spend more of their time in building features unique to their AI e
|
|||
</span><span id="line-8"><span class="linenos"> 8</span><span class="w"> </span><span class="nt">common_tls_context</span><span class="p">:</span><span class="w"> </span><span class="c1"># If you configure port 443, you'll need to update the listener with your TLS certificates</span>
|
||||
</span><span id="line-9"><span class="linenos"> 9</span><span class="w"> </span><span class="nt">tls_certificates</span><span class="p">:</span>
|
||||
</span><span id="line-10"><span class="linenos"> 10</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">certificate_chain</span><span class="p">:</span>
|
||||
</span><span id="line-11"><span class="linenos"> 11</span><span class="w"> </span><span class="nt">filename</span><span class="p">:</span><span class="w"> </span><span class="s">"/etc/certs/cert.pem"</span>
|
||||
</span><span id="line-11"><span class="linenos"> 11</span><span class="w"> </span><span class="nt">filename</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/etc/certs/cert.pem</span>
|
||||
</span><span id="line-12"><span class="linenos"> 12</span><span class="w"> </span><span class="nt">private_key</span><span class="p">:</span>
|
||||
</span><span id="line-13"><span class="linenos"> 13</span><span class="w"> </span><span class="nt">filename</span><span class="p">:</span><span class="w"> </span><span class="s">"/etc/certs/key.pem"</span>
|
||||
</span><span id="line-13"><span class="linenos"> 13</span><span class="w"> </span><span class="nt">filename</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/etc/certs/key.pem</span>
|
||||
</span><span id="line-14"><span class="linenos"> 14</span>
|
||||
</span><span id="line-15"><span class="linenos"> 15</span><span class="c1"># Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.</span>
|
||||
</span><span id="line-16"><span class="linenos"> 16</span><span class="nt">endpoints</span><span class="p">:</span>
|
||||
|
|
@ -180,42 +179,42 @@ that they can spend more of their time in building features unique to their AI e
|
|||
</span><span id="line-18"><span class="linenos"> 18</span><span class="w"> </span><span class="c1"># value could be ip address or a hostname with port</span>
|
||||
</span><span id="line-19"><span class="linenos"> 19</span><span class="w"> </span><span class="c1"># this could also be a list of endpoints for load balancing</span>
|
||||
</span><span id="line-20"><span class="linenos"> 20</span><span class="w"> </span><span class="c1"># for example endpoint: [ ip1:port, ip2:port ]</span>
|
||||
</span><span id="line-21"><span class="linenos"> 21</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:80"</span>
|
||||
</span><span id="line-21"><span class="linenos"> 21</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
|
||||
</span><span id="line-22"><span class="linenos"> 22</span><span class="w"> </span><span class="c1"># max time to wait for a connection to be established</span>
|
||||
</span><span id="line-23"><span class="linenos"> 23</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
|
||||
</span><span id="line-24"><span class="linenos"> 24</span>
|
||||
</span><span id="line-25"><span class="linenos"> 25</span><span class="w"> </span><span class="nt">mistral_local</span><span class="p">:</span>
|
||||
</span><span id="line-26"><span class="linenos"> 26</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"127.0.0.1:8001"</span>
|
||||
</span><span id="line-26"><span class="linenos"> 26</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:8001</span>
|
||||
</span><span id="line-27"><span class="linenos"> 27</span>
|
||||
</span><span id="line-28"><span class="linenos"> 28</span><span class="w"> </span><span class="nt">error_target</span><span class="p">:</span>
|
||||
</span><span id="line-29"><span class="linenos"> 29</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"error_target_1"</span>
|
||||
</span><span id="line-29"><span class="linenos"> 29</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
|
||||
</span><span id="line-30"><span class="linenos"> 30</span>
|
||||
</span><span id="line-31"><span class="linenos"> 31</span><span class="c1"># Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way</span>
|
||||
</span><span id="line-32"><span class="linenos"> 32</span><span class="nt">llm_providers</span><span class="p">:</span>
|
||||
</span><span id="line-33"><span class="linenos"> 33</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
||||
</span><span id="line-34"><span class="linenos"> 34</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"openai"</span>
|
||||
</span><span id="line-35"><span class="linenos"> 35</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
|
||||
</span><span id="line-33"><span class="linenos"> 33</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OpenAI</span>
|
||||
</span><span id="line-34"><span class="linenos"> 34</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai</span>
|
||||
</span><span id="line-35"><span class="linenos"> 35</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">OPENAI_API_KEY</span>
|
||||
</span><span id="line-36"><span class="linenos"> 36</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
||||
</span><span id="line-37"><span class="linenos"> 37</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-38"><span class="linenos"> 38</span><span class="w"> </span><span class="nt">stream</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-39"><span class="linenos"> 39</span><span class="w"> </span><span class="nt">rate_limits</span><span class="p">:</span>
|
||||
</span><span id="line-40"><span class="linenos"> 40</span><span class="w"> </span><span class="nt">selector</span><span class="p">:</span><span class="w"> </span><span class="c1">#optional headers, to add rate limiting based on http headers like JWT tokens or API keys</span>
|
||||
</span><span id="line-41"><span class="linenos"> 41</span><span class="w"> </span><span class="nt">http_header</span><span class="p">:</span>
|
||||
</span><span id="line-42"><span class="linenos"> 42</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Authorization"</span>
|
||||
</span><span id="line-42"><span class="linenos"> 42</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Authorization</span>
|
||||
</span><span id="line-43"><span class="linenos"> 43</span><span class="w"> </span><span class="nt">value</span><span class="p">:</span><span class="w"> </span><span class="s">""</span><span class="w"> </span><span class="c1"># Empty value means each separate value has a separate limit</span>
|
||||
</span><span id="line-44"><span class="linenos"> 44</span><span class="w"> </span><span class="nt">limit</span><span class="p">:</span>
|
||||
</span><span id="line-45"><span class="linenos"> 45</span><span class="w"> </span><span class="nt">tokens</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">100000</span><span class="w"> </span><span class="c1"># Tokens per unit</span>
|
||||
</span><span id="line-46"><span class="linenos"> 46</span><span class="w"> </span><span class="nt">unit</span><span class="p">:</span><span class="w"> </span><span class="s">"minute"</span>
|
||||
</span><span id="line-46"><span class="linenos"> 46</span><span class="w"> </span><span class="nt">unit</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">minute</span>
|
||||
</span><span id="line-47"><span class="linenos"> 47</span>
|
||||
</span><span id="line-48"><span class="linenos"> 48</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Mistral8x7b"</span>
|
||||
</span><span id="line-49"><span class="linenos"> 49</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"mistral"</span>
|
||||
</span><span id="line-50"><span class="linenos"> 50</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_API_KEY</span>
|
||||
</span><span id="line-51"><span class="linenos"> 51</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="s">"mistral-8x7b"</span>
|
||||
</span><span id="line-48"><span class="linenos"> 48</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Mistral8x7b</span>
|
||||
</span><span id="line-49"><span class="linenos"> 49</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral</span>
|
||||
</span><span id="line-50"><span class="linenos"> 50</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">MISTRAL_API_KEY</span>
|
||||
</span><span id="line-51"><span class="linenos"> 51</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral-8x7b</span>
|
||||
</span><span id="line-52"><span class="linenos"> 52</span>
|
||||
</span><span id="line-53"><span class="linenos"> 53</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"MistralLocal7b"</span>
|
||||
</span><span id="line-54"><span class="linenos"> 54</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="s">"local"</span>
|
||||
</span><span id="line-55"><span class="linenos"> 55</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="s">"mistral-7b-instruct"</span>
|
||||
</span><span id="line-56"><span class="linenos"> 56</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="s">"mistral_local"</span>
|
||||
</span><span id="line-53"><span class="linenos"> 53</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">MistralLocal7b</span>
|
||||
</span><span id="line-54"><span class="linenos"> 54</span><span class="w"> </span><span class="nt">provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">local</span>
|
||||
</span><span id="line-55"><span class="linenos"> 55</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral-7b-instruct</span>
|
||||
</span><span id="line-56"><span class="linenos"> 56</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral_local</span>
|
||||
</span><span id="line-57"><span class="linenos"> 57</span>
|
||||
</span><span id="line-58"><span class="linenos"> 58</span><span class="c1"># provides a way to override default settings for the arch system</span>
|
||||
</span><span id="line-59"><span class="linenos"> 59</span><span class="nt">overrides</span><span class="p">:</span>
|
||||
|
|
@ -224,51 +223,48 @@ that they can spend more of their time in building features unique to their AI e
|
|||
</span><span id="line-62"><span class="linenos"> 62</span><span class="w"> </span><span class="nt">prompt_target_intent_matching_threshold</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.60</span>
|
||||
</span><span id="line-63"><span class="linenos"> 63</span>
|
||||
</span><span id="line-64"><span class="linenos"> 64</span><span class="c1"># default system prompt used by all prompt targets</span>
|
||||
</span><span id="line-65"><span class="linenos"> 65</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||||
</span><span id="line-66"><span class="linenos"> 66</span><span class="w"> </span><span class="no">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-67"><span class="linenos"> 67</span>
|
||||
</span><span id="line-68"><span class="linenos"> 68</span><span class="nt">prompt_guards</span><span class="p">:</span>
|
||||
</span><span id="line-69"><span class="linenos"> 69</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
|
||||
</span><span id="line-70"><span class="linenos"> 70</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
|
||||
</span><span id="line-71"><span class="linenos"> 71</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
|
||||
</span><span id="line-72"><span class="linenos"> 72</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="s">"Looks</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">you're</span><span class="nv"> </span><span class="s">curious</span><span class="nv"> </span><span class="s">about</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">abilities,</span><span class="nv"> </span><span class="s">but</span><span class="nv"> </span><span class="s">I</span><span class="nv"> </span><span class="s">can</span><span class="nv"> </span><span class="s">only</span><span class="nv"> </span><span class="s">provide</span><span class="nv"> </span><span class="s">assistance</span><span class="nv"> </span><span class="s">within</span><span class="nv"> </span><span class="s">my</span><span class="nv"> </span><span class="s">programmed</span><span class="nv"> </span><span class="s">parameters."</span>
|
||||
</span><span id="line-73"><span class="linenos"> 73</span>
|
||||
</span><span id="line-74"><span class="linenos"> 74</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</span><span id="line-75"><span class="linenos"> 75</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"reboot_network_device"</span>
|
||||
</span><span id="line-76"><span class="linenos"> 76</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Helps</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">operators</span><span class="nv"> </span><span class="s">perform</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">operations</span><span class="nv"> </span><span class="s">like</span><span class="nv"> </span><span class="s">rebooting</span><span class="nv"> </span><span class="s">a</span><span class="nv"> </span><span class="s">device."</span>
|
||||
</span><span id="line-65"><span class="linenos"> 65</span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.</span>
|
||||
</span><span id="line-66"><span class="linenos"> 66</span>
|
||||
</span><span id="line-67"><span class="linenos"> 67</span><span class="nt">prompt_guards</span><span class="p">:</span>
|
||||
</span><span id="line-68"><span class="linenos"> 68</span><span class="w"> </span><span class="nt">input_guards</span><span class="p">:</span>
|
||||
</span><span id="line-69"><span class="linenos"> 69</span><span class="w"> </span><span class="nt">jailbreak</span><span class="p">:</span>
|
||||
</span><span id="line-70"><span class="linenos"> 70</span><span class="w"> </span><span class="nt">on_exception</span><span class="p">:</span>
|
||||
</span><span id="line-71"><span class="linenos"> 71</span><span class="w"> </span><span class="nt">message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.</span>
|
||||
</span><span id="line-72"><span class="linenos"> 72</span>
|
||||
</span><span id="line-73"><span class="linenos"> 73</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
||||
</span><span id="line-74"><span class="linenos"> 74</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">information_extraction</span>
|
||||
</span><span id="line-75"><span class="linenos"> 75</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-76"><span class="linenos"> 76</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.</span>
|
||||
</span><span id="line-77"><span class="linenos"> 77</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-78"><span class="linenos"> 78</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||||
</span><span id="line-79"><span class="linenos"> 79</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/action"</span>
|
||||
</span><span id="line-80"><span class="linenos"> 80</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</span><span id="line-81"><span class="linenos"> 81</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"device_id"</span>
|
||||
</span><span id="line-82"><span class="linenos"> 82</span><span class="w"> </span><span class="c1"># additional type options include: int | float | bool | string | list | dict</span>
|
||||
</span><span id="line-83"><span class="linenos"> 83</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
|
||||
</span><span id="line-84"><span class="linenos"> 84</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Identifier</span><span class="nv"> </span><span class="s">of</span><span class="nv"> </span><span class="s">the</span><span class="nv"> </span><span class="s">network</span><span class="nv"> </span><span class="s">device</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</span><span id="line-85"><span class="linenos"> 85</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-86"><span class="linenos"> 86</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"confirmation"</span>
|
||||
</span><span id="line-87"><span class="linenos"> 87</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="s">"string"</span>
|
||||
</span><span id="line-88"><span class="linenos"> 88</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"Confirmation</span><span class="nv"> </span><span class="s">flag</span><span class="nv"> </span><span class="s">to</span><span class="nv"> </span><span class="s">proceed</span><span class="nv"> </span><span class="s">with</span><span class="nv"> </span><span class="s">reboot."</span>
|
||||
</span><span id="line-89"><span class="linenos"> 89</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="s">"no"</span>
|
||||
</span><span id="line-90"><span class="linenos"> 90</span><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">yes</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">no</span><span class="p p-Indicator">]</span>
|
||||
</span><span id="line-91"><span class="linenos"> 91</span>
|
||||
</span><span id="line-92"><span class="linenos"> 92</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"information_extraction"</span>
|
||||
</span><span id="line-93"><span class="linenos"> 93</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-94"><span class="linenos"> 94</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s">"This</span><span class="nv"> </span><span class="s">prompt</span><span class="nv"> </span><span class="s">handles</span><span class="nv"> </span><span class="s">all</span><span class="nv"> </span><span class="s">scenarios</span><span class="nv"> </span><span class="s">that</span><span class="nv"> </span><span class="s">are</span><span class="nv"> </span><span class="s">question</span><span class="nv"> </span><span class="s">and</span><span class="nv"> </span><span class="s">answer</span><span class="nv"> </span><span class="s">in</span><span class="nv"> </span><span class="s">nature.</span><span class="nv"> </span><span class="s">Like</span><span class="nv"> </span><span class="s">summarization,</span><span class="nv"> </span><span class="s">information</span><span class="nv"> </span><span class="s">extraction,</span><span class="nv"> </span><span class="s">etc."</span>
|
||||
</span><span id="line-95"><span class="linenos"> 95</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-96"><span class="linenos"> 96</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||||
</span><span id="line-97"><span class="linenos"> 97</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="s">"/agent/summary"</span>
|
||||
</span><span id="line-98"><span class="linenos"> 98</span><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
|
||||
</span><span id="line-99"><span class="linenos"> 99</span><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-100"><span class="linenos">100</span><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
|
||||
</span><span id="line-101"><span class="linenos">101</span><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">|</span>
|
||||
</span><span id="line-102"><span class="linenos">102</span><span class="w"> </span><span class="no">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
|
||||
</span><span id="line-103"><span class="linenos">103</span>
|
||||
</span><span id="line-104"><span class="linenos">104</span><span class="nt">error_target</span><span class="p">:</span>
|
||||
</span><span id="line-105"><span class="linenos">105</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-106"><span class="linenos">106</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
|
||||
</span><span id="line-107"><span class="linenos">107</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
|
||||
</span><span id="line-108"><span class="linenos">108</span>
|
||||
</span><span id="line-109"><span class="linenos">109</span><span class="nt">tracing</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">100</span><span class="w"> </span><span class="c1">#sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.</span>
|
||||
</span><span id="line-79"><span class="linenos"> 79</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/summary</span>
|
||||
</span><span id="line-80"><span class="linenos"> 80</span><span class="w"> </span><span class="c1"># Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM</span>
|
||||
</span><span id="line-81"><span class="linenos"> 81</span><span class="w"> </span><span class="nt">auto_llm_dispatch_on_response</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-82"><span class="linenos"> 82</span><span class="w"> </span><span class="c1"># override system prompt for this prompt target</span>
|
||||
</span><span id="line-83"><span class="linenos"> 83</span><span class="w"> </span><span class="nt">system_prompt</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a helpful information extraction assistant. Use the information that is provided to you.</span>
|
||||
</span><span id="line-84"><span class="linenos"> 84</span>
|
||||
</span><span id="line-85"><span class="linenos"> 85</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_network_device</span>
|
||||
</span><span id="line-86"><span class="linenos"> 86</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Reboot a specific network device</span>
|
||||
</span><span id="line-87"><span class="linenos"> 87</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-88"><span class="linenos"> 88</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">app_server</span>
|
||||
</span><span id="line-89"><span class="linenos"> 89</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/action</span>
|
||||
</span><span id="line-90"><span class="linenos"> 90</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
||||
</span><span id="line-91"><span class="linenos"> 91</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device_id</span>
|
||||
</span><span id="line-92"><span class="linenos"> 92</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">str</span>
|
||||
</span><span id="line-93"><span class="linenos"> 93</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Identifier of the network device to reboot.</span>
|
||||
</span><span id="line-94"><span class="linenos"> 94</span><span class="w"> </span><span class="nt">required</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
||||
</span><span id="line-95"><span class="linenos"> 95</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">confirmation</span>
|
||||
</span><span id="line-96"><span class="linenos"> 96</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">bool</span>
|
||||
</span><span id="line-97"><span class="linenos"> 97</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Confirmation flag to proceed with reboot.</span>
|
||||
</span><span id="line-98"><span class="linenos"> 98</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
</span><span id="line-99"><span class="linenos"> 99</span><span class="w"> </span><span class="nt">enum</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">true</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">false</span><span class="p p-Indicator">]</span>
|
||||
</span><span id="line-100"><span class="linenos">100</span>
|
||||
</span><span id="line-101"><span class="linenos">101</span><span class="nt">error_target</span><span class="p">:</span>
|
||||
</span><span id="line-102"><span class="linenos">102</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span>
|
||||
</span><span id="line-103"><span class="linenos">103</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">error_target_1</span>
|
||||
</span><span id="line-104"><span class="linenos">104</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/error</span>
|
||||
</span><span id="line-105"><span class="linenos">105</span>
|
||||
</span><span id="line-106"><span class="linenos">106</span><span class="nt">tracing</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">100</span><span class="w"> </span><span class="c1">#sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.</span>
|
||||
</span></code></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -282,14 +278,6 @@ that they can spend more of their time in building features unique to their AI e
|
|||
RAG Application
|
||||
</a>
|
||||
</div>
|
||||
<div class="ml-auto">
|
||||
<a class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors border border-input hover:bg-accent hover:text-accent-foreground py-2 px-4" href="error_target.html">
|
||||
Error Targets
|
||||
<svg class="ml-2 h-4 w-4" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
|
||||
<polyline points="9 18 15 12 9 6"></polyline>
|
||||
</svg>
|
||||
</a>
|
||||
</div>
|
||||
</div></div>
|
||||
</main>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -135,9 +135,10 @@
|
|||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/terminology.html">Terminology</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/threading_model.html">Threading Model</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/listener.html">Listener</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/prompt.html">Prompts</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/model_serving.html">Model Serving</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/prompt.html">Prompt</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/request_lifecycle.html">Request Lifecycle</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="concepts/tech_overview/error_target.html">Error Target</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="concepts/llm_provider.html">LLM Provider</a></li>
|
||||
|
|
@ -162,7 +163,6 @@
|
|||
<p class="caption" role="heading"><span class="caption-text">Resources</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="resources/configuration_reference.html">Configuration Reference</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="resources/error_target.html">Error Targets</a></li>
|
||||
</ul>
|
||||
|
||||
</nav>
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -1,2 +1,2 @@
|
|||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url><loc>./docsbuild_with_arch/agent.html</loc></url><url><loc>./docsbuild_with_arch/rag.html</loc></url><url><loc>./docsconcepts/llm_provider.html</loc></url><url><loc>./docsconcepts/prompt_target.html</loc></url><url><loc>./docsconcepts/tech_overview/listener.html</loc></url><url><loc>./docsconcepts/tech_overview/model_serving.html</loc></url><url><loc>./docsconcepts/tech_overview/prompt.html</loc></url><url><loc>./docsconcepts/tech_overview/request_lifecycle.html</loc></url><url><loc>./docsconcepts/tech_overview/tech_overview.html</loc></url><url><loc>./docsconcepts/tech_overview/terminology.html</loc></url><url><loc>./docsconcepts/tech_overview/threading_model.html</loc></url><url><loc>./docsget_started/intro_to_arch.html</loc></url><url><loc>./docsget_started/overview.html</loc></url><url><loc>./docsget_started/quickstart.html</loc></url><url><loc>./docsguides/function_calling.html</loc></url><url><loc>./docsguides/observability/access_logging.html</loc></url><url><loc>./docsguides/observability/monitoring.html</loc></url><url><loc>./docsguides/observability/observability.html</loc></url><url><loc>./docsguides/observability/tracing.html</loc></url><url><loc>./docsguides/prompt_guard.html</loc></url><url><loc>./docsindex.html</loc></url><url><loc>./docsresources/configuration_reference.html</loc></url><url><loc>./docsresources/error_target.html</loc></url><url><loc>./docssearch.html</loc></url></urlset>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url><loc>./docsbuild_with_arch/agent.html</loc></url><url><loc>./docsbuild_with_arch/rag.html</loc></url><url><loc>./docsconcepts/llm_provider.html</loc></url><url><loc>./docsconcepts/prompt_target.html</loc></url><url><loc>./docsconcepts/tech_overview/error_target.html</loc></url><url><loc>./docsconcepts/tech_overview/listener.html</loc></url><url><loc>./docsconcepts/tech_overview/model_serving.html</loc></url><url><loc>./docsconcepts/tech_overview/prompt.html</loc></url><url><loc>./docsconcepts/tech_overview/request_lifecycle.html</loc></url><url><loc>./docsconcepts/tech_overview/tech_overview.html</loc></url><url><loc>./docsconcepts/tech_overview/terminology.html</loc></url><url><loc>./docsconcepts/tech_overview/threading_model.html</loc></url><url><loc>./docsget_started/intro_to_arch.html</loc></url><url><loc>./docsget_started/overview.html</loc></url><url><loc>./docsget_started/quickstart.html</loc></url><url><loc>./docsguides/function_calling.html</loc></url><url><loc>./docsguides/observability/access_logging.html</loc></url><url><loc>./docsguides/observability/monitoring.html</loc></url><url><loc>./docsguides/observability/observability.html</loc></url><url><loc>./docsguides/observability/tracing.html</loc></url><url><loc>./docsguides/prompt_guard.html</loc></url><url><loc>./docsindex.html</loc></url><url><loc>./docsresources/configuration_reference.html</loc></url><url><loc>./docssearch.html</loc></url></urlset>
|
||||
Loading…
Add table
Add a link
Reference in a new issue