@ -161,96 +161,106 @@ the Arch gateway. This where you enable capabilities like routing to upstream LL
where prompts get routed to, apply guardrails, and enable critical agent observability features.< / p >
< div class = "literal-block-wrapper docutils container" id = "id2" >
< div class = "code-block-caption" > < span class = "caption-text" > < a class = "reference download internal" download = "" href = "../_downloads/ca9d3b7116524473d8adbde7cf15d167/arch_config_full_reference.yaml" > < code class = "xref download docutils literal notranslate" > < span class = "pre" > Arch< / span > < span class = "pre" > Configuration< / span > < span class = "pre" > -< / span > < span class = "pre" > Full< / span > < span class = "pre" > Reference< / span > < / code > < / a > < / span > < a @ click . prevent = "window.navigator.clipboard.writeText($el.href); $el.setAttribute('data-tooltip', 'Copied!'); setTimeout(() => $el.setAttribute('data-tooltip', 'Copy link to this element'), 2000)" aria-label = "Copy link to this element" class = "headerlink" data-tooltip = "Copy link to this element" href = "#id2" > < svg height = "1em" viewbox = "0 0 24 24" width = "1em" xmlns = "http://www.w3.org/2000/svg" > < path d = "M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7c-2.76 0-5 2.24-5 5s2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1zM8 13h8v-2H8v2zm9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1s-1.39 3.1-3.1 3.1h-4V17h4c2.76 0 5-2.24 5-5s-2.24-5-5-5z" > < / path > < / svg > < / a > < / div >
< div class = "highlight-yaml notranslate" > < div class = "highlight" > < pre > < span > < / span > < code > < span id = "line-1" > < span class = "linenos" > 1< / span > < span class = "nt" > version< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > v0.1< / span >
< / span > < span id = "line-2" > < span class = "linenos" > 2< / span >
< / span > < span id = "line-3" > < span class = "linenos" > 3< / span > < span class = "nt" > listeners< / span > < span class = "p" > :< / span >
< / span > < span id = "line-4" > < span class = "linenos" > 4< / span > < span class = "w" > < / span > < span class = "nt" > ingress_traffic< / span > < span class = "p" > :< / span >
< / span > < span id = "line-5" > < span class = "linenos" > 5< / span > < span class = "w" > < / span > < span class = "nt" > address< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 0.0.0.0< / span >
< / span > < span id = "line-6" > < span class = "linenos" > 6< / span > < span class = "w" > < / span > < span class = "nt" > port< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 10000< / span >
< / span > < span id = "line-7" > < span class = "linenos" > 7< / span > < span class = "w" > < / span > < span class = "nt" > message_format< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > openai< / span >
< / span > < span id = "line-8" > < span class = "linenos" > 8< / span > < span class = "w" > < / span > < span class = "nt" > timeout< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 5s< / span >
< / span > < span id = "line-9" > < span class = "linenos" > 9< / span > < span class = "w" > < / span > < span class = "nt" > egress_traffic< / span > < span class = "p" > :< / span >
< / span > < span id = "line-10" > < span class = "linenos" > 10< / span > < span class = "w" > < / span > < span class = "nt" > address< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 0.0.0.0< / span >
< / span > < span id = "line-11" > < span class = "linenos" > 11< / span > < span class = "w" > < / span > < span class = "nt" > port< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 12000< / span >
< / span > < span id = "line-12" > < span class = "linenos" > 12< / span > < span class = "w" > < / span > < span class = "nt" > message_format< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > openai< / span >
< / span > < span id = "line-13" > < span class = "linenos" > 13< / span > < span class = "w" > < / span > < span class = "nt" > timeout< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 5s< / span >
< / span > < span id = "line-14" > < span class = "linenos" > 14< / span >
< / span > < span id = "line-15" > < span class = "linenos" > 15< / span > < span class = "c1" > # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.< / span >
< / span > < span id = "line-16" > < span class = "linenos" > 16< / span > < span class = "nt" > endpoints< / span > < span class = "p" > :< / span >
< / span > < span id = "line-17" > < span class = "linenos" > 17< / span > < span class = "w" > < / span > < span class = "nt" > app_server< / span > < span class = "p" > :< / span >
< / span > < span id = "line-18" > < span class = "linenos" > 18< / span > < span class = "w" > < / span > < span class = "c1" > # value could be ip address or a hostname with port< / span >
< / span > < span id = "line-19" > < span class = "linenos" > 19< / span > < span class = "w" > < / span > < span class = "c1" > # this could also be a list of endpoints for load balancing< / span >
< / span > < span id = "line-20" > < span class = "linenos" > 20< / span > < span class = "w" > < / span > < span class = "c1" > # for example endpoint: [ ip1:port, ip2:port ]< / span >
< / span > < span id = "line-21" > < span class = "linenos" > 21< / span > < span class = "w" > < / span > < span class = "nt" > endpoint< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 127.0.0.1:80< / span >
< / span > < span id = "line-22" > < span class = "linenos" > 22< / span > < span class = "w" > < / span > < span class = "c1" > # max time to wait for a connection to be established< / span >
< / span > < span id = "line-23" > < span class = "linenos" > 23< / span > < span class = "w" > < / span > < span class = "nt" > connect_timeout< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 0.005s< / span >
< / span > < span id = "line-24" > < span class = "linenos" > 24< / span >
< / span > < span id = "line-25" > < span class = "linenos" > 25< / span > < span class = "w" > < / span > < span class = "nt" > mistral_local< / span > < span class = "p" > :< / span >
< / span > < span id = "line-26" > < span class = "linenos" > 26< / span > < span class = "w" > < / span > < span class = "nt" > endpoint< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 127.0.0.1:8001< / span >
< / span > < span id = "line-27" > < span class = "linenos" > 27< / span >
< / span > < span id = "line-28" > < span class = "linenos" > 28< / span > < span class = "w" > < / span > < span class = "nt" > error_target< / span > < span class = "p" > :< / span >
< / span > < span id = "line-29" > < span class = "linenos" > 29< / span > < span class = "w" > < / span > < span class = "nt" > endpoint< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > error_target_1< / span >
< / span > < span id = "line-30" > < span class = "linenos" > 30< / span >
< / span > < span id = "line-31" > < span class = "linenos" > 31< / span > < span class = "c1" > # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way< / span >
< / span > < span id = "line-32" > < span class = "linenos" > 32< / span > < span class = "nt" > llm_providers< / span > < span class = "p" > :< / span >
< / span > < span id = "line-33" > < span class = "linenos" > 33< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > openai/gpt-4o< / span >
< / span > < span id = "line-34" > < span class = "linenos" > 34< / span > < span class = "w" > < / span > < span class = "nt" > access_key< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > $OPENAI_API_KEY< / span >
< / span > < span id = "line-35" > < span class = "linenos" > 35< / span > < span class = "w" > < / span > < span class = "nt" > model< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > openai/gpt-4o< / span >
< / span > < span id = "line-36" > < span class = "linenos" > 36< / span > < span class = "w" > < / span > < span class = "nt" > default< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > true< / span >
< / span > < span id = "line-37" > < span class = "linenos" > 37< / span >
< / span > < span id = "line-38" > < span class = "linenos" > 38< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > access_key< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > $MISTRAL_API_KEY< / span >
< / span > < span id = "line-39" > < span class = "linenos" > 39< / span > < span class = "w" > < / span > < span class = "nt" > model< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > mistral/mistral-8x7b< / span >
< / span > < span id = "line-40" > < span class = "linenos" > 40< / span >
< / span > < span id = "line-41" > < span class = "linenos" > 41< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > model< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > mistral/mistral-7b-instruct< / span >
< / span > < span id = "line-42" > < span class = "linenos" > 42< / span > < span class = "w" > < / span > < span class = "nt" > base_url< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > http://mistral_local< / span >
< / span > < span id = "line-43" > < span class = "linenos" > 43< / span >
< / span > < span id = "line-44" > < span class = "linenos" > 44< / span > < span class = "c1" > # provides a way to override default settings for the arch system< / span >
< / span > < span id = "line-45" > < span class = "linenos" > 45< / span > < span class = "nt" > overrides< / span > < span class = "p" > :< / span >
< / span > < span id = "line-46" > < span class = "linenos" > 46< / span > < span class = "w" > < / span > < span class = "c1" > # By default Arch uses an NLI + embedding approach to match an incoming prompt to a prompt target.< / span >
< / span > < span id = "line-47" > < span class = "linenos" > 47< / span > < span class = "w" > < / span > < span class = "c1" > # The intent matching threshold is kept at 0.80, you can override this behavior if you would like< / span >
< / span > < span id = "line-48" > < span class = "linenos" > 48< / span > < span class = "w" > < / span > < span class = "nt" > prompt_target_intent_matching_threshold< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 0.60< / span >
< / span > < span id = "line-49" > < span class = "linenos" > 49< / span >
< / span > < span id = "line-50" > < span class = "linenos" > 50< / span > < span class = "c1" > # default system prompt used by all prompt targets< / span >
< / span > < span id = "line-51" > < span class = "linenos" > 51< / span > < span class = "nt" > system_prompt< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.< / span >
< / span > < span id = "line-52" > < span class = "linenos" > 52< / span >
< / span > < span id = "line-53" > < span class = "linenos" > 53< / span > < span class = "nt" > prompt_guards< / span > < span class = "p" > :< / span >
< / span > < span id = "line-54" > < span class = "linenos" > 54< / span > < span class = "w" > < / span > < span class = "nt" > input_guards< / span > < span class = "p" > :< / span >
< / span > < span id = "line-55" > < span class = "linenos" > 55< / span > < span class = "w" > < / span > < span class = "nt" > jailbreak< / span > < span class = "p" > :< / span >
< / span > < span id = "line-56" > < span class = "linenos" > 56< / span > < span class = "w" > < / span > < span class = "nt" > on_exception< / span > < span class = "p" > :< / span >
< / span > < span id = "line-57" > < span class = "linenos" > 57< / span > < span class = "w" > < / span > < span class = "nt" > message< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.< / span >
< / span > < span id = "line-58" > < span class = "linenos" > 58< / span >
< / span > < span id = "line-59" > < span class = "linenos" > 59< / span > < span class = "nt" > prompt_targets< / span > < span class = "p" > :< / span >
< / span > < span id = "line-60" > < span class = "linenos" > 60< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > information_extraction< / span >
< / span > < span id = "line-61" > < span class = "linenos" > 61< / span > < span class = "w" > < / span > < span class = "nt" > default< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > true< / span >
< / span > < span id = "line-62" > < span class = "linenos" > 62< / span > < span class = "w" > < / span > < span class = "nt" > description< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.< / span >
< / span > < span id = "line-63" > < span class = "linenos" > 63< / span > < span class = "w" > < / span > < span class = "nt" > endpoint< / span > < span class = "p" > :< / span >
< / span > < span id = "line-64" > < span class = "linenos" > 64< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > app_server< / span >
< / span > < span id = "line-65" > < span class = "linenos" > 65< / span > < span class = "w" > < / span > < span class = "nt" > path< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > /agent/summary< / span >
< / span > < span id = "line-66" > < span class = "linenos" > 66< / span > < span class = "w" > < / span > < span class = "nt" > http_method< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > POST< / span >
< / span > < span id = "line-67" > < span class = "linenos" > 67< / span > < span class = "w" > < / span > < span class = "c1" > # Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM< / span >
< / span > < span id = "line-68" > < span class = "linenos" > 68< / span > < span class = "w" > < / span > < span class = "nt" > auto_llm_dispatch_on_response< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > true< / span >
< / span > < span id = "line-69" > < span class = "linenos" > 69< / span > < span class = "w" > < / span > < span class = "c1" > # override system prompt for this prompt target< / span >
< / span > < span id = "line-70" > < span class = "linenos" > 70< / span > < span class = "w" > < / span > < span class = "nt" > system_prompt< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > You are a helpful information extraction assistant. Use the information that is provided to you.< / span >
< / span > < span id = "line-71" > < span class = "linenos" > 71< / span >
< / span > < span id = "line-72" > < span class = "linenos" > 72< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > reboot_network_device< / span >
< / span > < span id = "line-73" > < span class = "linenos" > 73< / span > < span class = "w" > < / span > < span class = "nt" > description< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > Reboot a specific network device< / span >
< / span > < span id = "line-74" > < span class = "linenos" > 74< / span > < span class = "w" > < / span > < span class = "nt" > endpoint< / span > < span class = "p" > :< / span >
< / span > < span id = "line-75" > < span class = "linenos" > 75< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > app_server< / span >
< / span > < span id = "line-76" > < span class = "linenos" > 76< / span > < span class = "w" > < / span > < span class = "nt" > path< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > /agent/action< / span >
< / span > < span id = "line-77" > < span class = "linenos" > 77< / span > < span class = "w" > < / span > < span class = "nt" > parameters< / span > < span class = "p" > :< / span >
< / span > < span id = "line-78" > < span class = "linenos" > 78< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > device_id< / span >
< / span > < span id = "line-79" > < span class = "linenos" > 79< / span > < span class = "w" > < / span > < span class = "nt" > type< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > str< / span >
< / span > < span id = "line-80" > < span class = "linenos" > 80< / span > < span class = "w" > < / span > < span class = "nt" > description< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > Identifier of the network device to reboot.< / span >
< / span > < span id = "line-81" > < span class = "linenos" > 81< / span > < span class = "w" > < / span > < span class = "nt" > required< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > true< / span >
< / span > < span id = "line-82" > < span class = "linenos" > 82< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > confirmation< / span >
< / span > < span id = "line-83" > < span class = "linenos" > 83< / span > < span class = "w" > < / span > < span class = "nt" > type< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > bool< / span >
< / span > < span id = "line-84" > < span class = "linenos" > 84< / span > < span class = "w" > < / span > < span class = "nt" > description< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > Confirmation flag to proceed with reboot.< / span >
< / span > < span id = "line-85" > < span class = "linenos" > 85< / span > < span class = "w" > < / span > < span class = "nt" > default< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > false< / span >
< / span > < span id = "line-86" > < span class = "linenos" > 86< / span > < span class = "w" > < / span > < span class = "nt" > enum< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > [< / span > < span class = "nv" > true< / span > < span class = "p p-Indicator" > ,< / span > < span class = "w" > < / span > < span class = "nv" > false< / span > < span class = "p p-Indicator" > ]< / span >
< / span > < span id = "line-87" > < span class = "linenos" > 87< / span >
< / span > < span id = "line-88" > < span class = "linenos" > 88< / span > < span class = "nt" > tracing< / span > < span class = "p" > :< / span >
< / span > < span id = "line-89" > < span class = "linenos" > 89< / span > < span class = "w" > < / span > < span class = "c1" > # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.< / span >
< / span > < span id = "line-90" > < span class = "linenos" > 90< / span > < span class = "w" > < / span > < span class = "nt" > sampling_rate< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 0.1< / span >
< div class = "highlight-yaml notranslate" > < div class = "highlight" > < pre > < span > < / span > < code > < span id = "line-1" > < span class = "linenos" > 1< / span > < span class = "nt" > version< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > v0.1< / span >
< / span > < span id = "line-2" > < span class = "linenos" > 2< / span >
< / span > < span id = "line-3" > < span class = "linenos" > 3< / span > < span class = "nt" > listeners< / span > < span class = "p" > :< / span >
< / span > < span id = "line-4" > < span class = "linenos" > 4< / span > < span class = "w" > < / span > < span class = "nt" > ingress_traffic< / span > < span class = "p" > :< / span >
< / span > < span id = "line-5" > < span class = "linenos" > 5< / span > < span class = "w" > < / span > < span class = "nt" > address< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 0.0.0.0< / span >
< / span > < span id = "line-6" > < span class = "linenos" > 6< / span > < span class = "w" > < / span > < span class = "nt" > port< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 10000< / span >
< / span > < span id = "line-7" > < span class = "linenos" > 7< / span > < span class = "w" > < / span > < span class = "nt" > message_format< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > openai< / span >
< / span > < span id = "line-8" > < span class = "linenos" > 8< / span > < span class = "w" > < / span > < span class = "nt" > timeout< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 5s< / span >
< / span > < span id = "line-9" > < span class = "linenos" > 9< / span > < span class = "w" > < / span > < span class = "nt" > egress_traffic< / span > < span class = "p" > :< / span >
< / span > < span id = "line-10" > < span class = "linenos" > 10< / span > < span class = "w" > < / span > < span class = "nt" > address< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 0.0.0.0< / span >
< / span > < span id = "line-11" > < span class = "linenos" > 11< / span > < span class = "w" > < / span > < span class = "nt" > port< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 12000< / span >
< / span > < span id = "line-12" > < span class = "linenos" > 12< / span > < span class = "w" > < / span > < span class = "nt" > message_format< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > openai< / span >
< / span > < span id = "line-13" > < span class = "linenos" > 13< / span > < span class = "w" > < / span > < span class = "nt" > timeout< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 5s< / span >
< / span > < span id = "line-14" > < span class = "linenos" > 14< / span >
< / span > < span id = "line-15" > < span class = "linenos" > 15< / span > < span class = "c1" > # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.< / span >
< / span > < span id = "line-16" > < span class = "linenos" > 16< / span > < span class = "nt" > endpoints< / span > < span class = "p" > :< / span >
< / span > < span id = "line-17" > < span class = "linenos" > 17< / span > < span class = "w" > < / span > < span class = "nt" > app_server< / span > < span class = "p" > :< / span >
< / span > < span id = "line-18" > < span class = "linenos" > 18< / span > < span class = "w" > < / span > < span class = "c1" > # value could be ip address or a hostname with port< / span >
< / span > < span id = "line-19" > < span class = "linenos" > 19< / span > < span class = "w" > < / span > < span class = "c1" > # this could also be a list of endpoints for load balancing< / span >
< / span > < span id = "line-20" > < span class = "linenos" > 20< / span > < span class = "w" > < / span > < span class = "c1" > # for example endpoint: [ ip1:port, ip2:port ]< / span >
< / span > < span id = "line-21" > < span class = "linenos" > 21< / span > < span class = "w" > < / span > < span class = "nt" > endpoint< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 127.0.0.1:80< / span >
< / span > < span id = "line-22" > < span class = "linenos" > 22< / span > < span class = "w" > < / span > < span class = "c1" > # max time to wait for a connection to be established< / span >
< / span > < span id = "line-23" > < span class = "linenos" > 23< / span > < span class = "w" > < / span > < span class = "nt" > connect_timeout< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 0.005s< / span >
< / span > < span id = "line-24" > < span class = "linenos" > 24< / span >
< / span > < span id = "line-25" > < span class = "linenos" > 25< / span > < span class = "w" > < / span > < span class = "nt" > mistral_local< / span > < span class = "p" > :< / span >
< / span > < span id = "line-26" > < span class = "linenos" > 26< / span > < span class = "w" > < / span > < span class = "nt" > endpoint< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 127.0.0.1:8001< / span >
< / span > < span id = "line-27" > < span class = "linenos" > 27< / span >
< / span > < span id = "line-28" > < span class = "linenos" > 28< / span > < span class = "w" > < / span > < span class = "nt" > error_target< / span > < span class = "p" > :< / span >
< / span > < span id = "line-29" > < span class = "linenos" > 29< / span > < span class = "w" > < / span > < span class = "nt" > endpoint< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > error_target_1< / span >
< / span > < span id = "line-30" > < span class = "linenos" > 30< / span >
< / span > < span id = "line-31" > < span class = "linenos" > 31< / span > < span class = "c1" > # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way< / span >
< / span > < span id = "line-32" > < span class = "linenos" > 32< / span > < span class = "nt" > llm_providers< / span > < span class = "p" > :< / span >
< / span > < span id = "line-33" > < span class = "linenos" > 33< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > openai/gpt-4o< / span >
< / span > < span id = "line-34" > < span class = "linenos" > 34< / span > < span class = "w" > < / span > < span class = "nt" > access_key< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > $OPENAI_API_KEY< / span >
< / span > < span id = "line-35" > < span class = "linenos" > 35< / span > < span class = "w" > < / span > < span class = "nt" > model< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > openai/gpt-4o< / span >
< / span > < span id = "line-36" > < span class = "linenos" > 36< / span > < span class = "w" > < / span > < span class = "nt" > default< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > true< / span >
< / span > < span id = "line-37" > < span class = "linenos" > 37< / span >
< / span > < span id = "line-38" > < span class = "linenos" > 38< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > access_key< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > $MISTRAL_API_KEY< / span >
< / span > < span id = "line-39" > < span class = "linenos" > 39< / span > < span class = "w" > < / span > < span class = "nt" > model< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > mistral/mistral-8x7b< / span >
< / span > < span id = "line-40" > < span class = "linenos" > 40< / span >
< / span > < span id = "line-41" > < span class = "linenos" > 41< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > model< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > mistral/mistral-7b-instruct< / span >
< / span > < span id = "line-42" > < span class = "linenos" > 42< / span > < span class = "w" > < / span > < span class = "nt" > base_url< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > http://mistral_local< / span >
< / span > < span id = "line-43" > < span class = "linenos" > 43< / span >
< / span > < span id = "line-44" > < span class = "linenos" > 44< / span > < span class = "c1" > # Model aliases - friendly names that map to actual provider names< / span >
< / span > < span id = "line-45" > < span class = "linenos" > 45< / span > < span class = "nt" > model_aliases< / span > < span class = "p" > :< / span >
< / span > < span id = "line-46" > < span class = "linenos" > 46< / span > < span class = "w" > < / span > < span class = "c1" > # Alias for summarization tasks -> fast/cheap model< / span >
< / span > < span id = "line-47" > < span class = "linenos" > 47< / span > < span class = "w" > < / span > < span class = "nt" > arch.summarize.v1< / span > < span class = "p" > :< / span >
< / span > < span id = "line-48" > < span class = "linenos" > 48< / span > < span class = "w" > < / span > < span class = "nt" > target< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > gpt-4o< / span >
< / span > < span id = "line-49" > < span class = "linenos" > 49< / span >
< / span > < span id = "line-50" > < span class = "linenos" > 50< / span > < span class = "w" > < / span > < span class = "c1" > # Alias for general purpose tasks -> latest model< / span >
< / span > < span id = "line-51" > < span class = "linenos" > 51< / span > < span class = "w" > < / span > < span class = "nt" > arch.v1< / span > < span class = "p" > :< / span >
< / span > < span id = "line-52" > < span class = "linenos" > 52< / span > < span class = "w" > < / span > < span class = "nt" > target< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > mistral-8x7b< / span >
< / span > < span id = "line-53" > < span class = "linenos" > 53< / span >
< / span > < span id = "line-54" > < span class = "linenos" > 54< / span > < span class = "c1" > # provides a way to override default settings for the arch system< / span >
< / span > < span id = "line-55" > < span class = "linenos" > 55< / span > < span class = "nt" > overrides< / span > < span class = "p" > :< / span >
< / span > < span id = "line-56" > < span class = "linenos" > 56< / span > < span class = "w" > < / span > < span class = "c1" > # By default Arch uses an NLI + embedding approach to match an incoming prompt to a prompt target.< / span >
< / span > < span id = "line-57" > < span class = "linenos" > 57< / span > < span class = "w" > < / span > < span class = "c1" > # The intent matching threshold is kept at 0.80, you can override this behavior if you would like< / span >
< / span > < span id = "line-58" > < span class = "linenos" > 58< / span > < span class = "w" > < / span > < span class = "nt" > prompt_target_intent_matching_threshold< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 0.60< / span >
< / span > < span id = "line-59" > < span class = "linenos" > 59< / span >
< / span > < span id = "line-60" > < span class = "linenos" > 60< / span > < span class = "c1" > # default system prompt used by all prompt targets< / span >
< / span > < span id = "line-61" > < span class = "linenos" > 61< / span > < span class = "nt" > system_prompt< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.< / span >
< / span > < span id = "line-62" > < span class = "linenos" > 62< / span >
< / span > < span id = "line-63" > < span class = "linenos" > 63< / span > < span class = "nt" > prompt_guards< / span > < span class = "p" > :< / span >
< / span > < span id = "line-64" > < span class = "linenos" > 64< / span > < span class = "w" > < / span > < span class = "nt" > input_guards< / span > < span class = "p" > :< / span >
< / span > < span id = "line-65" > < span class = "linenos" > 65< / span > < span class = "w" > < / span > < span class = "nt" > jailbreak< / span > < span class = "p" > :< / span >
< / span > < span id = "line-66" > < span class = "linenos" > 66< / span > < span class = "w" > < / span > < span class = "nt" > on_exception< / span > < span class = "p" > :< / span >
< / span > < span id = "line-67" > < span class = "linenos" > 67< / span > < span class = "w" > < / span > < span class = "nt" > message< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.< / span >
< / span > < span id = "line-68" > < span class = "linenos" > 68< / span >
< / span > < span id = "line-69" > < span class = "linenos" > 69< / span > < span class = "nt" > prompt_targets< / span > < span class = "p" > :< / span >
< / span > < span id = "line-70" > < span class = "linenos" > 70< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > information_extraction< / span >
< / span > < span id = "line-71" > < span class = "linenos" > 71< / span > < span class = "w" > < / span > < span class = "nt" > default< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > true< / span >
< / span > < span id = "line-72" > < span class = "linenos" > 72< / span > < span class = "w" > < / span > < span class = "nt" > description< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.< / span >
< / span > < span id = "line-73" > < span class = "linenos" > 73< / span > < span class = "w" > < / span > < span class = "nt" > endpoint< / span > < span class = "p" > :< / span >
< / span > < span id = "line-74" > < span class = "linenos" > 74< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > app_server< / span >
< / span > < span id = "line-75" > < span class = "linenos" > 75< / span > < span class = "w" > < / span > < span class = "nt" > path< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > /agent/summary< / span >
< / span > < span id = "line-76" > < span class = "linenos" > 76< / span > < span class = "w" > < / span > < span class = "nt" > http_method< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > POST< / span >
< / span > < span id = "line-77" > < span class = "linenos" > 77< / span > < span class = "w" > < / span > < span class = "c1" > # Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM< / span >
< / span > < span id = "line-78" > < span class = "linenos" > 78< / span > < span class = "w" > < / span > < span class = "nt" > auto_llm_dispatch_on_response< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > true< / span >
< / span > < span id = "line-79" > < span class = "linenos" > 79< / span > < span class = "w" > < / span > < span class = "c1" > # override system prompt for this prompt target< / span >
< / span > < span id = "line-80" > < span class = "linenos" > 80< / span > < span class = "w" > < / span > < span class = "nt" > system_prompt< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > You are a helpful information extraction assistant. Use the information that is provided to you.< / span >
< / span > < span id = "line-81" > < span class = "linenos" > 81< / span >
< / span > < span id = "line-82" > < span class = "linenos" > 82< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > reboot_network_device< / span >
< / span > < span id = "line-83" > < span class = "linenos" > 83< / span > < span class = "w" > < / span > < span class = "nt" > description< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > Reboot a specific network device< / span >
< / span > < span id = "line-84" > < span class = "linenos" > 84< / span > < span class = "w" > < / span > < span class = "nt" > endpoint< / span > < span class = "p" > :< / span >
< / span > < span id = "line-85" > < span class = "linenos" > 85< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > app_server< / span >
< / span > < span id = "line-86" > < span class = "linenos" > 86< / span > < span class = "w" > < / span > < span class = "nt" > path< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > /agent/action< / span >
< / span > < span id = "line-87" > < span class = "linenos" > 87< / span > < span class = "w" > < / span > < span class = "nt" > parameters< / span > < span class = "p" > :< / span >
< / span > < span id = "line-88" > < span class = "linenos" > 88< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > device_id< / span >
< / span > < span id = "line-89" > < span class = "linenos" > 89< / span > < span class = "w" > < / span > < span class = "nt" > type< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > str< / span >
< / span > < span id = "line-90" > < span class = "linenos" > 90< / span > < span class = "w" > < / span > < span class = "nt" > description< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > Identifier of the network device to reboot.< / span >
< / span > < span id = "line-91" > < span class = "linenos" > 91< / span > < span class = "w" > < / span > < span class = "nt" > required< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > true< / span >
< / span > < span id = "line-92" > < span class = "linenos" > 92< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > -< / span > < span class = "w" > < / span > < span class = "nt" > name< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > confirmation< / span >
< / span > < span id = "line-93" > < span class = "linenos" > 93< / span > < span class = "w" > < / span > < span class = "nt" > type< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > bool< / span >
< / span > < span id = "line-94" > < span class = "linenos" > 94< / span > < span class = "w" > < / span > < span class = "nt" > description< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > Confirmation flag to proceed with reboot.< / span >
< / span > < span id = "line-95" > < span class = "linenos" > 95< / span > < span class = "w" > < / span > < span class = "nt" > default< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > false< / span >
< / span > < span id = "line-96" > < span class = "linenos" > 96< / span > < span class = "w" > < / span > < span class = "nt" > enum< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "p p-Indicator" > [< / span > < span class = "nv" > true< / span > < span class = "p p-Indicator" > ,< / span > < span class = "w" > < / span > < span class = "nv" > false< / span > < span class = "p p-Indicator" > ]< / span >
< / span > < span id = "line-97" > < span class = "linenos" > 97< / span >
< / span > < span id = "line-98" > < span class = "linenos" > 98< / span > < span class = "nt" > tracing< / span > < span class = "p" > :< / span >
< / span > < span id = "line-99" > < span class = "linenos" > 99< / span > < span class = "w" > < / span > < span class = "c1" > # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.< / span >
< / span > < span id = "line-100" > < span class = "linenos" > 100< / span > < span class = "w" > < / span > < span class = "nt" > sampling_rate< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "l l-Scalar l-Scalar-Plain" > 0.1< / span >
< / span > < / code > < / pre > < / div >
< / div >
< / div >
@ -270,7 +280,7 @@ where prompts get routed to, apply guardrails, and enable critical agent observa
< / div > < footer class = "py-6 border-t border-border md:py-0" >
< div class = "container flex flex-col items-center justify-between gap-4 md:h-24 md:flex-row" >
< div class = "flex flex-col items-center gap-4 px-8 md:flex-row md:gap-2 md:px-0" >
< p class = "text-sm leading-loose text-center text-muted-foreground md:text-left" > © 2025, Katanemo Labs, Inc Last updated: Sep 17 , 2025. < / p >
< p class = "text-sm leading-loose text-center text-muted-foreground md:text-left" > © 2025, Katanemo Labs, Inc Last updated: Sep 19 , 2025. < / p >
< / div >
< / div >
< / footer >