mirror of
https://github.com/katanemo/plano.git
synced 2026-05-15 11:02:39 +02:00
707 lines
44 KiB
HTML
707 lines
44 KiB
HTML
|
|
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<html lang="en" data-content_root="../../../" >
|
|||
|
|
|
|||
|
|
<head>
|
|||
|
|
<meta charset="utf-8" />
|
|||
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
|||
|
|
|
|||
|
|
<title>Prompts — Arch 0.1-beta documentation</title>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script data-cfasync="false">
|
|||
|
|
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
|||
|
|
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
|||
|
|
</script>
|
|||
|
|
|
|||
|
|
<!-- Loaded before other Sphinx assets -->
|
|||
|
|
<link href="../../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
|||
|
|
<link href="../../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
|||
|
|
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link href="../../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
|||
|
|
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
|||
|
|
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
|||
|
|
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
|||
|
|
|
|||
|
|
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=a746c00c" />
|
|||
|
|
<link rel="stylesheet" type="text/css" href="../../../_static/styles/sphinx-book-theme.css?v=a3416100" />
|
|||
|
|
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
|
|||
|
|
|
|||
|
|
<!-- Pre-loaded scripts that we'll load fully later -->
|
|||
|
|
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
|||
|
|
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
|||
|
|
<script src="../../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
|||
|
|
|
|||
|
|
<script src="../../../_static/documentation_options.js?v=2742c0eb"></script>
|
|||
|
|
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|||
|
|
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|||
|
|
<script src="../../../_static/clipboard.min.js?v=a7894cd8"></script>
|
|||
|
|
<script src="../../../_static/copybutton.js?v=f281be69"></script>
|
|||
|
|
<script src="../../../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
|||
|
|
<script>DOCUMENTATION_OPTIONS.pagename = 'intro/architecture/prompt_processing/prompt_processing';</script>
|
|||
|
|
<link rel="icon" href="../../../_static/favicon.ico"/>
|
|||
|
|
<link rel="index" title="Index" href="../../../genindex.html" />
|
|||
|
|
<link rel="search" title="Search" href="../../../search.html" />
|
|||
|
|
<link rel="next" title="LLM Provider" href="../listeners/llm_provider.html" />
|
|||
|
|
<link rel="prev" title="Listener" href="../listeners/listeners.html" />
|
|||
|
|
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
|||
|
|
<meta name="docsearch:language" content="en"/>
|
|||
|
|
</head>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
|||
|
|
|
|||
|
|
<div id="pst-scroll-pixel-helper"></div>
|
|||
|
|
|
|||
|
|
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
|||
|
|
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input type="checkbox"
|
|||
|
|
class="sidebar-toggle"
|
|||
|
|
id="pst-primary-sidebar-checkbox"/>
|
|||
|
|
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
|||
|
|
|
|||
|
|
<input type="checkbox"
|
|||
|
|
class="sidebar-toggle"
|
|||
|
|
id="pst-secondary-sidebar-checkbox"/>
|
|||
|
|
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
|||
|
|
|
|||
|
|
<div class="search-button__wrapper">
|
|||
|
|
<div class="search-button__overlay"></div>
|
|||
|
|
<div class="search-button__search-container">
|
|||
|
|
<form class="bd-search d-flex align-items-center"
|
|||
|
|
action="../../../search.html"
|
|||
|
|
method="get">
|
|||
|
|
<i class="fa-solid fa-magnifying-glass"></i>
|
|||
|
|
<input type="search"
|
|||
|
|
class="form-control"
|
|||
|
|
name="q"
|
|||
|
|
id="search-input"
|
|||
|
|
placeholder="Search..."
|
|||
|
|
aria-label="Search..."
|
|||
|
|
autocomplete="off"
|
|||
|
|
autocorrect="off"
|
|||
|
|
autocapitalize="off"
|
|||
|
|
spellcheck="false"/>
|
|||
|
|
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
|||
|
|
</form></div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<div class="pst-async-banner-revealer d-none">
|
|||
|
|
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
|||
|
|
</header>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="bd-container">
|
|||
|
|
<div class="bd-container__inner bd-page-width">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="bd-sidebar-primary bd-sidebar">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="sidebar-header-items sidebar-primary__section">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<div class="sidebar-primary-items__start sidebar-primary__section">
|
|||
|
|
<div class="sidebar-primary-item">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<a class="navbar-brand logo" href="../../../root.html">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<img src="../../../_static/arch-nav-logo.png" class="logo__image only-light" alt="Arch 0.1-beta documentation - Home"/>
|
|||
|
|
<script>document.write(`<img src="../../../_static/arch-nav-logo.png" class="logo__image only-dark" alt="Arch 0.1-beta documentation - Home"/>`);</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a></div>
|
|||
|
|
<div class="sidebar-primary-item">
|
|||
|
|
|
|||
|
|
<script>
|
|||
|
|
document.write(`
|
|||
|
|
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
|||
|
|
<i class="fa-solid fa-magnifying-glass"></i>
|
|||
|
|
<span class="search-button__default-text">Search</span>
|
|||
|
|
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
|||
|
|
</button>
|
|||
|
|
`);
|
|||
|
|
</script></div>
|
|||
|
|
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
|||
|
|
<div class="bd-toc-item navbar-nav active">
|
|||
|
|
<ul class="current nav bd-sidenav">
|
|||
|
|
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../../intro.html">Introduction</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
|||
|
|
<li class="toctree-l2"><a class="reference internal" href="../../what_is_arch.html">What is Arch</a></li>
|
|||
|
|
<li class="toctree-l2 current active has-children"><a class="reference internal" href="../architecture.html">Technical Architecture</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
|
|||
|
|
<li class="toctree-l3"><a class="reference internal" href="../intro/terminology.html">Terminology</a></li>
|
|||
|
|
<li class="toctree-l3"><a class="reference internal" href="../intro/threading_model.html">Threading model</a></li>
|
|||
|
|
<li class="toctree-l3"><a class="reference internal" href="../listeners/listeners.html">Listener</a></li>
|
|||
|
|
<li class="toctree-l3 current active"><a class="current reference internal" href="#">Prompts</a></li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="toctree-l3"><a class="reference internal" href="../listeners/llm_provider.html">LLM Provider</a></li>
|
|||
|
|
|
|||
|
|
<li class="toctree-l3"><a class="reference internal" href="../model_serving/model_serving.html">Model Serving</a></li>
|
|||
|
|
</ul>
|
|||
|
|
</details></li>
|
|||
|
|
<li class="toctree-l2"><a class="reference internal" href="../../life_of_a_request.html">Life of a Request</a></li>
|
|||
|
|
<li class="toctree-l2"><a class="reference internal" href="../../getting_help.html">Getting help</a></li>
|
|||
|
|
</ul>
|
|||
|
|
</details></li>
|
|||
|
|
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/getting_started.html">Getting Started</a></li>
|
|||
|
|
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/rag.html">Retrieval-Augmented (RAG)</a></li>
|
|||
|
|
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/use_cases/function_calling.html">Agentic (Text-to-Action) Apps</a></li>
|
|||
|
|
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../observability/observability.html">Observability</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
|||
|
|
<li class="toctree-l2"><a class="reference internal" href="../../../observability/tracing.html">Tracing</a></li>
|
|||
|
|
<li class="toctree-l2"><a class="reference internal" href="../../../observability/stats.html">Monitoring</a></li>
|
|||
|
|
<li class="toctree-l2"><a class="reference internal" href="../../../observability/access_logs.html">Access Logging</a></li>
|
|||
|
|
</ul>
|
|||
|
|
</details></li>
|
|||
|
|
<li class="toctree-l1"><a class="reference internal" href="../../../llms/llms.html">LLMs</a></li>
|
|||
|
|
<li class="toctree-l1"><a class="reference internal" href="../../../configuration_reference.html">Configuration Reference</a></li>
|
|||
|
|
</ul>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
</nav></div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="sidebar-primary-items__end sidebar-primary__section">
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<div id="rtd-footer-container"></div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<main id="main-content" class="bd-main" role="main">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="sbt-scroll-pixel-helper"></div>
|
|||
|
|
|
|||
|
|
<div class="bd-content">
|
|||
|
|
<div class="bd-article-container">
|
|||
|
|
|
|||
|
|
<div class="bd-header-article d-print-none">
|
|||
|
|
<div class="header-article-items header-article__inner">
|
|||
|
|
|
|||
|
|
<div class="header-article-items__start">
|
|||
|
|
|
|||
|
|
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
|||
|
|
<span class="fa-solid fa-bars"></span>
|
|||
|
|
</button></div>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="header-article-items__end">
|
|||
|
|
|
|||
|
|
<div class="header-article-item">
|
|||
|
|
|
|||
|
|
<div class="article-header-buttons">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="dropdown dropdown-download-buttons">
|
|||
|
|
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
|||
|
|
<i class="fas fa-download"></i>
|
|||
|
|
</button>
|
|||
|
|
<ul class="dropdown-menu">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li><a href="../../../_sources/intro/architecture/prompt_processing/prompt_processing.rst" target="_blank"
|
|||
|
|
class="btn btn-sm btn-download-source-button dropdown-item"
|
|||
|
|
title="Download source file"
|
|||
|
|
data-bs-placement="left" data-bs-toggle="tooltip"
|
|||
|
|
>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="btn__icon-container">
|
|||
|
|
<i class="fas fa-file"></i>
|
|||
|
|
</span>
|
|||
|
|
<span class="btn__text-container">.rst</span>
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li>
|
|||
|
|
<button onclick="window.print()"
|
|||
|
|
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
|||
|
|
title="Print to PDF"
|
|||
|
|
data-bs-placement="left" data-bs-toggle="tooltip"
|
|||
|
|
>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="btn__icon-container">
|
|||
|
|
<i class="fas fa-file-pdf"></i>
|
|||
|
|
</span>
|
|||
|
|
<span class="btn__text-container">.pdf</span>
|
|||
|
|
</button>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<button onclick="toggleFullScreen()"
|
|||
|
|
class="btn btn-sm btn-fullscreen-button"
|
|||
|
|
title="Fullscreen mode"
|
|||
|
|
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
|||
|
|
>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="btn__icon-container">
|
|||
|
|
<i class="fas fa-expand"></i>
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
</button>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>
|
|||
|
|
document.write(`
|
|||
|
|
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
|||
|
|
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
|||
|
|
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
|||
|
|
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
|||
|
|
</button>
|
|||
|
|
`);
|
|||
|
|
</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>
|
|||
|
|
document.write(`
|
|||
|
|
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
|||
|
|
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
|||
|
|
</button>
|
|||
|
|
`);
|
|||
|
|
</script>
|
|||
|
|
<button class="sidebar-toggle secondary-toggle btn btn-sm" title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
|||
|
|
<span class="fa-solid fa-list"></span>
|
|||
|
|
</button>
|
|||
|
|
</div></div>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div id="jb-print-docs-body" class="onlyprint">
|
|||
|
|
<h1>Prompts</h1>
|
|||
|
|
<!-- Table of contents -->
|
|||
|
|
<div id="print-main-content">
|
|||
|
|
<div id="jb-print-toc">
|
|||
|
|
|
|||
|
|
<div>
|
|||
|
|
<h2> Contents </h2>
|
|||
|
|
</div>
|
|||
|
|
<nav aria-label="Page">
|
|||
|
|
<ul class="visible nav section-nav flex-column">
|
|||
|
|
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Prompts</a></li>
|
|||
|
|
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#messages">Messages</a></li>
|
|||
|
|
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prompt-guardrails">Prompt Guardrails</a></li>
|
|||
|
|
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prompt-targets">Prompt Targets</a><ul class="visible nav section-nav flex-column">
|
|||
|
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#intent-detection-and-prompt-matching">Intent Detection and Prompt Matching:</a></li>
|
|||
|
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#agentic-apps-via-prompt-targets">Agentic Apps via Prompt Targets</a></li>
|
|||
|
|
</ul>
|
|||
|
|
</li>
|
|||
|
|
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prompting-llms">Prompting LLMs</a><ul class="visible nav section-nav flex-column">
|
|||
|
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#example-using-the-arch-python-sdk">Example: Using the Arch Python SDK</a></li>
|
|||
|
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#example-using-openai-client-with-arch-as-an-egress-gateway">Example: Using OpenAI Client with Arch as an Egress Gateway</a></li>
|
|||
|
|
</ul>
|
|||
|
|
</li>
|
|||
|
|
</ul>
|
|||
|
|
|
|||
|
|
</nav>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div id="searchbox"></div>
|
|||
|
|
<article class="bd-article">
|
|||
|
|
|
|||
|
|
<section id="prompts">
|
|||
|
|
<span id="arch-overview-prompt-handling"></span><h1>Prompts<a class="headerlink" href="#prompts" title="Link to this heading">#</a></h1>
|
|||
|
|
<p>Arch’s primary design point is to securely accept, process and handle prompts. To do that effectively,
|
|||
|
|
Arch relies on Envoy’s HTTP <a class="reference external" href="https://www.envoyproxy.io/docs/envoy/v1.31.2/intro/arch_overview/http/http_connection_management">connection management</a>,
|
|||
|
|
subsystem and its <strong>prompt handler</strong> subsystem engineered with purpose-built <a class="reference internal" href="../../../llms/llms.html#llms-in-arch"><span class="std std-ref">LLMs</span></a> to
|
|||
|
|
implement critical functionality on behalf of developers so that you can stay focused on business logic.</p>
|
|||
|
|
<div class="admonition note">
|
|||
|
|
<p class="admonition-title">Note</p>
|
|||
|
|
<p>Arch’s <strong>prompt handler</strong> subsystem interacts with the <strong>model</strong> subsytem through Envoy’s cluster manager
|
|||
|
|
system to ensure robust, resilient and fault-tolerant experience in managing incoming prompts. Read more
|
|||
|
|
about the <a class="reference internal" href="../model_serving/model_serving.html#arch-model-serving"><span class="std std-ref">model subsystem</span></a> and how the LLMs are hosted in Arch.</p>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
<section id="messages">
|
|||
|
|
<h1>Messages<a class="headerlink" href="#messages" title="Link to this heading">#</a></h1>
|
|||
|
|
<p>Arch accepts messages directly from the body of the HTTP request in a format that follows the <a class="reference external" href="https://huggingface.co/docs/text-generation-inference/en/messages_api">Hugging Face Messages API</a>.
|
|||
|
|
This design allows developers to pass a list of messages, where each message is represented as a dictionary
|
|||
|
|
containing two key-value pairs:</p>
|
|||
|
|
<blockquote>
|
|||
|
|
<div><ul class="simple">
|
|||
|
|
<li><p><strong>Role</strong>: Defines the role of the message sender, such as “user” or “assistant”.</p></li>
|
|||
|
|
<li><p><strong>Content</strong>: Contains the actual text of the message.</p></li>
|
|||
|
|
</ul>
|
|||
|
|
</div></blockquote>
|
|||
|
|
</section>
|
|||
|
|
<section id="prompt-guardrails">
|
|||
|
|
<h1>Prompt Guardrails<a class="headerlink" href="#prompt-guardrails" title="Link to this heading">#</a></h1>
|
|||
|
|
<p>Arch is engineered with <a class="reference internal" href="../../../llms/llms.html#llms-in-arch"><span class="std std-ref">Arch-Guard</span></a>, an industry leading safety layer, powered by a
|
|||
|
|
compact and high-performimg LLM that monitors incoming prompts to detect and reject jailbreak attempts -
|
|||
|
|
ensuring that unauthorized or harmful behaviors are intercepted early in the process.</p>
|
|||
|
|
<p>To add jailbreak guardrails, see example below:</p>
|
|||
|
|
<div class="literal-block-wrapper docutils container" id="id1">
|
|||
|
|
<div class="code-block-caption"><span class="caption-text"><a class="reference download internal" download="" href="../../../_downloads/a0b0970c8fa22d4fe861e8362965b77c/getting-started.yml"><code class="xref download docutils literal notranslate"><span class="pre">arch-getting-started.yml</span></code></a></span><a class="headerlink" href="#id1" title="Link to this code">#</a></div>
|
|||
|
|
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
|||
|
|
<span class="linenos"> 2</span><span class="nt">listener</span><span class="p">:</span>
|
|||
|
|
<span class="linenos"> 3</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
|
|||
|
|
<span class="linenos"> 4</span><span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
|
|||
|
|
<span class="linenos"> 5</span><span class="w"> </span><span class="nt">messages</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">tuple | hugging-face-messages-api</span>
|
|||
|
|
<span class="linenos"> 6</span>
|
|||
|
|
<span class="linenos"> 7</span><span class="nt">system_prompts</span><span class="p">:</span>
|
|||
|
|
<span class="linenos"> 8</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">network_assistant</span>
|
|||
|
|
<span class="linenos"> 9</span><span class="w"> </span><span class="nt">content</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts about the operational health of the network</span>
|
|||
|
|
<span class="linenos">10</span>
|
|||
|
|
<span class="linenos">11</span><span class="nt">llm_providers</span><span class="p">:</span>
|
|||
|
|
<span class="linenos">12</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
|||
|
|
<span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPEN_AI_KEY</span>
|
|||
|
|
<span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
|||
|
|
<span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
|||
|
|
<span class="linenos">16</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Mistral"</span>
|
|||
|
|
<span class="linenos">17</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_KEY</span>
|
|||
|
|
<span class="linenos">18</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mixtral8-7B</span>
|
|||
|
|
<span class="linenos">19</span>
|
|||
|
|
<span class="linenos">20</span><span class="nt">prompt_endpoints</span><span class="p">:</span>
|
|||
|
|
<span class="linenos">21</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"http://127.0.0.2"</span>
|
|||
|
|
<span class="linenos">22</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"http://127.0.0.1"</span>
|
|||
|
|
<span class="linenos">23</span>
|
|||
|
|
<span class="hll"><span class="linenos">24</span><span class="nt">prompt_guards</span><span class="p">:</span>
|
|||
|
|
</span><span class="hll"><span class="linenos">25</span><span class="w"> </span><span class="nt">input-guard</span><span class="p">:</span>
|
|||
|
|
</span><span class="hll"><span class="linenos">26</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="c1">#jailbreak</span>
|
|||
|
|
</span><span class="hll"><span class="linenos">27</span><span class="w"> </span><span class="nt">on-exception-message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you are curious about my abilities. But I can only</span>
|
|||
|
|
</span><span class="linenos">28</span>
|
|||
|
|
<span class="linenos">29</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
|||
|
|
<span class="linenos">30</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">information_extraction</span>
|
|||
|
|
<span class="linenos">31</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">RAG</span>
|
|||
|
|
<span class="linenos">32</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">this prompt handles all information extractions scenarios</span>
|
|||
|
|
<span class="linenos">33</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/summary</span>
|
|||
|
|
<span class="linenos">34</span>
|
|||
|
|
<span class="linenos">35</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_network_device</span>
|
|||
|
|
<span class="linenos">36</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/action</span>
|
|||
|
|
<span class="linenos">37</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">used to help network operators with perform device operations like rebooting a device.</span>
|
|||
|
|
<span class="linenos">38</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
|||
|
|
<span class="linenos">39</span><span class="nt">error_target</span><span class="p">:</span><span class="w"> </span><span class="c1">#handle errors from Bolt or upstream LLMs</span>
|
|||
|
|
<span class="linenos">40</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">“error_handler”</span>
|
|||
|
|
<span class="linenos">41</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/errors</span>
|
|||
|
|
</pre></div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="admonition note">
|
|||
|
|
<p class="admonition-title">Note</p>
|
|||
|
|
<p>As a roadmap item, Arch will expose the ability for developers to define custom guardrails via Arch-Guard-v2,
|
|||
|
|
and add support for additional safety checks defined by developers and hazardous categories like, violent crimes, privacy, hate,
|
|||
|
|
etc. To offer feedback on our roadmap, please visit our <a class="reference external" href="https://github.com/orgs/katanemo/projects/1">github page</a></p>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
<section id="prompt-targets">
|
|||
|
|
<h1>Prompt Targets<a class="headerlink" href="#prompt-targets" title="Link to this heading">#</a></h1>
|
|||
|
|
<p>Once a prompt passes any configured guardrail checks, Arch processes the contents of the incoming conversation
|
|||
|
|
and identifies where to forwad the conversation to via its essential <code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code> primitve. Prompt targets
|
|||
|
|
are endpoints that receive prompts that are processed by Arch. For example, Arch enriches incoming prompts with
|
|||
|
|
metadata like knowing when a user’s intent has changed so that you can build faster, more accurate RAG apps.</p>
|
|||
|
|
<p>Configuring <code class="docutils literal notranslate"><span class="pre">prompt_targets</span></code> is simple. See example below:</p>
|
|||
|
|
<div class="literal-block-wrapper docutils container" id="id2">
|
|||
|
|
<div class="code-block-caption"><span class="caption-text"><a class="reference download internal" download="" href="../../../_downloads/a0b0970c8fa22d4fe861e8362965b77c/getting-started.yml"><code class="xref download docutils literal notranslate"><span class="pre">arch-getting-started.yml</span></code></a></span><a class="headerlink" href="#id2" title="Link to this code">#</a></div>
|
|||
|
|
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="s">"0.1-beta"</span>
|
|||
|
|
<span class="linenos"> 2</span><span class="nt">listener</span><span class="p">:</span>
|
|||
|
|
<span class="linenos"> 3</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1 | 0.0.0.0</span>
|
|||
|
|
<span class="linenos"> 4</span><span class="w"> </span><span class="nt">port_value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1">#If you configure port 443, you'll need to update the listener with tls_certificates</span>
|
|||
|
|
<span class="linenos"> 5</span><span class="w"> </span><span class="nt">messages</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">tuple | hugging-face-messages-api</span>
|
|||
|
|
<span class="linenos"> 6</span>
|
|||
|
|
<span class="linenos"> 7</span><span class="nt">system_prompts</span><span class="p">:</span>
|
|||
|
|
<span class="linenos"> 8</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">network_assistant</span>
|
|||
|
|
<span class="linenos"> 9</span><span class="w"> </span><span class="nt">content</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">You are a network assistant that just offers facts about the operational health of the network</span>
|
|||
|
|
<span class="linenos">10</span>
|
|||
|
|
<span class="linenos">11</span><span class="nt">llm_providers</span><span class="p">:</span>
|
|||
|
|
<span class="linenos">12</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"OpenAI"</span>
|
|||
|
|
<span class="linenos">13</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPEN_AI_KEY</span>
|
|||
|
|
<span class="linenos">14</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
|
|||
|
|
<span class="linenos">15</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
|
|||
|
|
<span class="linenos">16</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="s">"Mistral"</span>
|
|||
|
|
<span class="linenos">17</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_KEY</span>
|
|||
|
|
<span class="linenos">18</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mixtral8-7B</span>
|
|||
|
|
<span class="linenos">19</span>
|
|||
|
|
<span class="linenos">20</span><span class="nt">prompt_endpoints</span><span class="p">:</span>
|
|||
|
|
<span class="linenos">21</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"http://127.0.0.2"</span>
|
|||
|
|
<span class="linenos">22</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"http://127.0.0.1"</span>
|
|||
|
|
<span class="linenos">23</span>
|
|||
|
|
<span class="linenos">24</span><span class="nt">prompt_guards</span><span class="p">:</span>
|
|||
|
|
<span class="linenos">25</span><span class="w"> </span><span class="nt">input-guard</span><span class="p">:</span>
|
|||
|
|
<span class="linenos">26</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="c1">#jailbreak</span>
|
|||
|
|
<span class="linenos">27</span><span class="w"> </span><span class="nt">on-exception-message</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Looks like you are curious about my abilities. But I can only</span>
|
|||
|
|
<span class="linenos">28</span>
|
|||
|
|
<span class="hll"><span class="linenos">29</span><span class="nt">prompt_targets</span><span class="p">:</span>
|
|||
|
|
</span><span class="hll"><span class="linenos">30</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">information_extraction</span>
|
|||
|
|
</span><span class="hll"><span class="linenos">31</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">RAG</span>
|
|||
|
|
</span><span class="hll"><span class="linenos">32</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">this prompt handles all information extractions scenarios</span>
|
|||
|
|
</span><span class="hll"><span class="linenos">33</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/summary</span>
|
|||
|
|
</span><span class="hll"><span class="linenos">34</span>
|
|||
|
|
</span><span class="hll"><span class="linenos">35</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">reboot_network_device</span>
|
|||
|
|
</span><span class="hll"><span class="linenos">36</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/agent/action</span>
|
|||
|
|
</span><span class="hll"><span class="linenos">37</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">used to help network operators with perform device operations like rebooting a device.</span>
|
|||
|
|
</span><span class="hll"><span class="linenos">38</span><span class="w"> </span><span class="nt">parameters</span><span class="p">:</span>
|
|||
|
|
</span><span class="linenos">39</span><span class="nt">error_target</span><span class="p">:</span><span class="w"> </span><span class="c1">#handle errors from Bolt or upstream LLMs</span>
|
|||
|
|
<span class="linenos">40</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">“error_handler”</span>
|
|||
|
|
<span class="linenos">41</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/errors</span>
|
|||
|
|
</pre></div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
<section id="intent-detection-and-prompt-matching">
|
|||
|
|
<h2>Intent Detection and Prompt Matching:<a class="headerlink" href="#intent-detection-and-prompt-matching" title="Link to this heading">#</a></h2>
|
|||
|
|
<p>Arch uses fast Natural Language Inference (NLI) and embedding approaches to first detect the intent of each
|
|||
|
|
incoming prompt. This intent detection phase analyzes the prompt’s content and matches it against predefined
|
|||
|
|
prompt targets, ensuring that each prompt is forwarded to the most appropriate endpoint. Arch’s intent
|
|||
|
|
detection framework considers both the name and description of each prompt target, and uses a composite matching
|
|||
|
|
score between an NLI and cosine similarity to enchance accuracy in forwarding decisions.</p>
|
|||
|
|
<ul class="simple">
|
|||
|
|
<li><p><strong>Embeddings</strong>: By embedding the prompt and comparing it to known target vectors, Arch effectively identifies
|
|||
|
|
the closest match, ensuring that the prompt is handled by the correct downstream service.</p></li>
|
|||
|
|
<li><p><strong>NLI</strong>: NLI techniques further refine the matching process by evaluating the semantic alignment between the
|
|||
|
|
prompt and potential targets.</p></li>
|
|||
|
|
</ul>
|
|||
|
|
</section>
|
|||
|
|
<section id="agentic-apps-via-prompt-targets">
|
|||
|
|
<h2>Agentic Apps via Prompt Targets<a class="headerlink" href="#agentic-apps-via-prompt-targets" title="Link to this heading">#</a></h2>
|
|||
|
|
<p>To support agentic apps, like scheduling travel plans or sharing comments on a document - via prompts, Arch uses
|
|||
|
|
its function calling abilities to extract critical information from the incoming prompt (or a set of prompts)
|
|||
|
|
needed by a downstream backend API or function call before calling it directly. For more details on how you can
|
|||
|
|
build agentic applications using Arch, see our full guide <a class="reference internal" href="../../../getting_started/use_cases/function_calling.html#arch-function-calling-agentic-guide"><span class="std std-ref">here</span></a>:</p>
|
|||
|
|
<div class="admonition note">
|
|||
|
|
<p class="admonition-title">Note</p>
|
|||
|
|
<p>Arch <a class="reference internal" href="../../../llms/llms.html#llms-in-arch"><span class="std std-ref">Arch-FC</span></a> is the dedicated agentic model engineered in Arch to extract information from
|
|||
|
|
a (set of) prompts and executes necessary backend API calls. This allows for efficient handling of agentic tasks,
|
|||
|
|
such as scheduling data retrieval, by dynamically interacting with backend services. Arch-FC is a flagship 1.3
|
|||
|
|
billion parameter model that matches performance with frontier models like Claude Sonnet 3.5 ang GPT-4, while
|
|||
|
|
being 100x cheaper ($0.05M/token hosted) and 10x faster (p50 latencies of 200ms).</p>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
</section>
|
|||
|
|
<section id="prompting-llms">
|
|||
|
|
<h1>Prompting LLMs<a class="headerlink" href="#prompting-llms" title="Link to this heading">#</a></h1>
|
|||
|
|
<p>Arch is a single piece of software that is designed to manage both ingress and egress prompt traffic, drawing its
|
|||
|
|
distributed proxy nature from the robust <a class="reference external" href="https://envoyproxy.io">Envoy</a>. This makes it extremely efficient and capable
|
|||
|
|
of handling upstream connections to LLMs. If your application is originating code to an API-based LLM, simply use
|
|||
|
|
Arch’s Python or JavaScript client SDK to send traffic to the desired LLM of choice. By sending traffic through Arch,
|
|||
|
|
you can propagate traces, manage and monitor traffic, apply rate limits, and utilize a large set of traffic management
|
|||
|
|
capabilities in a central place.</p>
|
|||
|
|
<div class="admonition attention">
|
|||
|
|
<p class="admonition-title">Attention</p>
|
|||
|
|
<p>When you start Arch, it automatically creates a listener port for egress calls to upstream LLMs. This is based on the
|
|||
|
|
<code class="docutils literal notranslate"><span class="pre">llm_providers</span></code> configuration section in the <code class="docutils literal notranslate"><span class="pre">prompt_config.yml</span></code> file. Arch binds itself to a local address such as
|
|||
|
|
127.0.0.1:9000/v1 or a DNS-based address like arch.local:9000/v1 for outgoing traffic.</p>
|
|||
|
|
</div>
|
|||
|
|
<section id="example-using-the-arch-python-sdk">
|
|||
|
|
<h2>Example: Using the Arch Python SDK<a class="headerlink" href="#example-using-the-arch-python-sdk" title="Link to this heading">#</a></h2>
|
|||
|
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">arch_client</span> <span class="kn">import</span> <span class="n">ArchClient</span>
|
|||
|
|
|
|||
|
|
<span class="c1"># Initialize the Arch client</span>
|
|||
|
|
<span class="n">client</span> <span class="o">=</span> <span class="n">ArchClient</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="s2">"http://127.0.0.1:9000/v1"</span><span class="p">)</span>
|
|||
|
|
|
|||
|
|
<span class="c1"># Define your LLM provider and prompt</span>
|
|||
|
|
<span class="n">model_id</span> <span class="o">=</span> <span class="s2">"openai"</span>
|
|||
|
|
<span class="n">prompt</span> <span class="o">=</span> <span class="s2">"What is the capital of France?"</span>
|
|||
|
|
|
|||
|
|
<span class="c1"># Send the prompt to the LLM through Arch</span>
|
|||
|
|
<span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">llm_provider</span><span class="o">=</span><span class="n">llm_provider</span><span class="p">,</span> <span class="n">prompt</span><span class="o">=</span><span class="n">prompt</span><span class="p">)</span>
|
|||
|
|
|
|||
|
|
<span class="c1"># Print the response</span>
|
|||
|
|
<span class="nb">print</span><span class="p">(</span><span class="s2">"LLM Response:"</span><span class="p">,</span> <span class="n">response</span><span class="p">)</span>
|
|||
|
|
</pre></div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
<section id="example-using-openai-client-with-arch-as-an-egress-gateway">
|
|||
|
|
<h2>Example: Using OpenAI Client with Arch as an Egress Gateway<a class="headerlink" href="#example-using-openai-client-with-arch-as-an-egress-gateway" title="Link to this heading">#</a></h2>
|
|||
|
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">openai</span>
|
|||
|
|
|
|||
|
|
<span class="c1"># Set the OpenAI API base URL to the Arch gateway endpoint</span>
|
|||
|
|
<span class="n">openai</span><span class="o">.</span><span class="n">api_base</span> <span class="o">=</span> <span class="s2">"http://127.0.0.1:9000/v1"</span>
|
|||
|
|
|
|||
|
|
<span class="c1"># No need to set openai.api_key since it's configured in Arch's gateway</span>
|
|||
|
|
|
|||
|
|
<span class="c1"># Use the OpenAI client as usual</span>
|
|||
|
|
<span class="n">response</span> <span class="o">=</span> <span class="n">openai</span><span class="o">.</span><span class="n">Completion</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
|||
|
|
<span class="n">model</span><span class="o">=</span><span class="s2">"text-davinci-003"</span><span class="p">,</span>
|
|||
|
|
<span class="n">prompt</span><span class="o">=</span><span class="s2">"What is the capital of France?"</span>
|
|||
|
|
<span class="p">)</span>
|
|||
|
|
|
|||
|
|
<span class="nb">print</span><span class="p">(</span><span class="s2">"OpenAI Response:"</span><span class="p">,</span> <span class="n">response</span><span class="o">.</span><span class="n">choices</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span>
|
|||
|
|
</pre></div>
|
|||
|
|
</div>
|
|||
|
|
<p>In these examples:</p>
|
|||
|
|
<blockquote>
|
|||
|
|
<div><p>The ArchClient is used to send traffic directly through the Arch egress proxy to the LLM of your choice, such as OpenAI.
|
|||
|
|
The OpenAI client is configured to route traffic via Arch by setting the proxy to 127.0.0.1:9000, assuming Arch is
|
|||
|
|
running locally and bound to that address and port.</p>
|
|||
|
|
</div></blockquote>
|
|||
|
|
<p>This setup allows you to take advantage of Arch’s advanced traffic management features while interacting with LLM APIs like OpenAI.</p>
|
|||
|
|
</section>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
|
|||
|
|
</article>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<footer class="prev-next-footer d-print-none">
|
|||
|
|
|
|||
|
|
<div class="prev-next-area">
|
|||
|
|
<a class="left-prev"
|
|||
|
|
href="../listeners/listeners.html"
|
|||
|
|
title="previous page">
|
|||
|
|
<i class="fa-solid fa-angle-left"></i>
|
|||
|
|
<div class="prev-next-info">
|
|||
|
|
<p class="prev-next-subtitle">previous</p>
|
|||
|
|
<p class="prev-next-title">Listener</p>
|
|||
|
|
</div>
|
|||
|
|
</a>
|
|||
|
|
<a class="right-next"
|
|||
|
|
href="../listeners/llm_provider.html"
|
|||
|
|
title="next page">
|
|||
|
|
<div class="prev-next-info">
|
|||
|
|
<p class="prev-next-subtitle">next</p>
|
|||
|
|
<p class="prev-next-title">LLM Provider</p>
|
|||
|
|
</div>
|
|||
|
|
<i class="fa-solid fa-angle-right"></i>
|
|||
|
|
</a>
|
|||
|
|
</div>
|
|||
|
|
</footer>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="sidebar-secondary-item">
|
|||
|
|
<div class="page-toc tocsection onthispage">
|
|||
|
|
<i class="fa-solid fa-list"></i> Contents
|
|||
|
|
</div>
|
|||
|
|
<nav class="bd-toc-nav page-toc">
|
|||
|
|
<ul class="visible nav section-nav flex-column">
|
|||
|
|
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Prompts</a></li>
|
|||
|
|
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#messages">Messages</a></li>
|
|||
|
|
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prompt-guardrails">Prompt Guardrails</a></li>
|
|||
|
|
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prompt-targets">Prompt Targets</a><ul class="visible nav section-nav flex-column">
|
|||
|
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#intent-detection-and-prompt-matching">Intent Detection and Prompt Matching:</a></li>
|
|||
|
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#agentic-apps-via-prompt-targets">Agentic Apps via Prompt Targets</a></li>
|
|||
|
|
</ul>
|
|||
|
|
</li>
|
|||
|
|
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prompting-llms">Prompting LLMs</a><ul class="visible nav section-nav flex-column">
|
|||
|
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#example-using-the-arch-python-sdk">Example: Using the Arch Python SDK</a></li>
|
|||
|
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#example-using-openai-client-with-arch-as-an-egress-gateway">Example: Using OpenAI Client with Arch as an Egress Gateway</a></li>
|
|||
|
|
</ul>
|
|||
|
|
</li>
|
|||
|
|
</ul>
|
|||
|
|
|
|||
|
|
</nav></div>
|
|||
|
|
|
|||
|
|
</div></div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
<footer class="bd-footer-content">
|
|||
|
|
|
|||
|
|
<div class="bd-footer-content__inner container">
|
|||
|
|
|
|||
|
|
<div class="footer-item">
|
|||
|
|
|
|||
|
|
<p class="component-author">
|
|||
|
|
By Katanemo Labs, Inc
|
|||
|
|
</p>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<div class="footer-item">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<p class="copyright">
|
|||
|
|
|
|||
|
|
© Copyright 2024, Katanemo Labs, Inc.
|
|||
|
|
<br/>
|
|||
|
|
|
|||
|
|
</p>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<div class="footer-item">
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<div class="footer-item">
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
</footer>
|
|||
|
|
|
|||
|
|
|
|||
|
|
</main>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
|||
|
|
<script src="../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
|||
|
|
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
|||
|
|
|
|||
|
|
<footer class="bd-footer">
|
|||
|
|
</footer>
|
|||
|
|
</body>
|
|||
|
|
</html>
|