This commit is contained in:
salmanap 2026-01-14 23:07:03 +00:00
parent be238fcaef
commit 670388c0bb
6 changed files with 233 additions and 106 deletions

View file

@ -167,87 +167,87 @@ where prompts get routed to, apply guardrails, and enable critical agent observa
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><code><span id="line-1"><span class="linenos"> 1</span><span class="c1"># Arch Gateway configuration version</span>
</span><span id="line-2"><span class="linenos"> 2</span><span class="nt">version</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">v0.3.0</span>
</span><span id="line-3"><span class="linenos"> 3</span>
</span><span id="line-4"><span class="linenos"> 4</span>
</span><span id="line-5"><span class="linenos"> 5</span><span class="c1"># External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)</span>
</span><span id="line-6"><span class="linenos"> 6</span><span class="nt">agents</span><span class="p">:</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">id</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">weather_agent</span><span class="w"> </span><span class="c1"># Example agent for weather</span>
</span><span id="line-8"><span class="linenos"> 8</span><span class="w"> </span><span class="nt">url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://host.docker.internal:10510</span>
</span><span id="line-9"><span class="linenos"> 9</span>
</span><span id="line-10"><span class="linenos"> 10</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">id</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">flight_agent</span><span class="w"> </span><span class="c1"># Example agent for flights</span>
</span><span id="line-11"><span class="linenos"> 11</span><span class="w"> </span><span class="nt">url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://host.docker.internal:10520</span>
</span><span id="line-12"><span class="linenos"> 12</span>
</span><span id="line-13"><span class="linenos"> 13</span>
</span><span id="line-14"><span class="linenos"> 14</span><span class="c1"># MCP filters applied to requests/responses (e.g., input validation, query rewriting)</span>
</span><span id="line-15"><span class="linenos"> 15</span><span class="nt">filters</span><span class="p">:</span>
</span><span id="line-16"><span class="linenos"> 16</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">id</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">input_guards</span><span class="w"> </span><span class="c1"># Example filter for input validation</span>
</span><span id="line-17"><span class="linenos"> 17</span><span class="w"> </span><span class="nt">url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://host.docker.internal:10500</span>
</span><span id="line-18"><span class="linenos"> 18</span><span class="w"> </span><span class="c1"># type: mcp (default)</span>
</span><span id="line-19"><span class="linenos"> 19</span><span class="w"> </span><span class="c1"># transport: streamable-http (default)</span>
</span><span id="line-20"><span class="linenos"> 20</span><span class="w"> </span><span class="c1"># tool: input_guards (default - same as filter id)</span>
</span><span id="line-21"><span class="linenos"> 21</span>
</span><span id="line-22"><span class="linenos"> 22</span>
</span><span id="line-23"><span class="linenos"> 23</span><span class="c1"># LLM provider configurations with API keys and model routing</span>
</span><span id="line-24"><span class="linenos"> 24</span><span class="nt">model_providers</span><span class="p">:</span>
</span><span id="line-25"><span class="linenos"> 25</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
</span><span id="line-26"><span class="linenos"> 26</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
</span><span id="line-27"><span class="linenos"> 27</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-4"><span class="linenos"> 4</span><span class="c1"># External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)</span>
</span><span id="line-5"><span class="linenos"> 5</span><span class="nt">agents</span><span class="p">:</span>
</span><span id="line-6"><span class="linenos"> 6</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">id</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">weather_agent</span><span class="w"> </span><span class="c1"># Example agent for weather</span>
</span><span id="line-7"><span class="linenos"> 7</span><span class="w"> </span><span class="nt">url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://host.docker.internal:10510</span>
</span><span id="line-8"><span class="linenos"> 8</span>
</span><span id="line-9"><span class="linenos"> 9</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">id</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">flight_agent</span><span class="w"> </span><span class="c1"># Example agent for flights</span>
</span><span id="line-10"><span class="linenos"> 10</span><span class="w"> </span><span class="nt">url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://host.docker.internal:10520</span>
</span><span id="line-11"><span class="linenos"> 11</span>
</span><span id="line-12"><span class="linenos"> 12</span><span class="c1"># MCP filters applied to requests/responses (e.g., input validation, query rewriting)</span>
</span><span id="line-13"><span class="linenos"> 13</span><span class="nt">filters</span><span class="p">:</span>
</span><span id="line-14"><span class="linenos"> 14</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">id</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">input_guards</span><span class="w"> </span><span class="c1"># Example filter for input validation</span>
</span><span id="line-15"><span class="linenos"> 15</span><span class="w"> </span><span class="nt">url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://host.docker.internal:10500</span>
</span><span id="line-16"><span class="linenos"> 16</span><span class="w"> </span><span class="c1"># type: mcp (default)</span>
</span><span id="line-17"><span class="linenos"> 17</span><span class="w"> </span><span class="c1"># transport: streamable-http (default)</span>
</span><span id="line-18"><span class="linenos"> 18</span><span class="w"> </span><span class="c1"># tool: input_guards (default - same as filter id)</span>
</span><span id="line-19"><span class="linenos"> 19</span>
</span><span id="line-20"><span class="linenos"> 20</span><span class="c1"># LLM provider configurations with API keys and model routing</span>
</span><span id="line-21"><span class="linenos"> 21</span><span class="nt">model_providers</span><span class="p">:</span>
</span><span id="line-22"><span class="linenos"> 22</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o</span>
</span><span id="line-23"><span class="linenos"> 23</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
</span><span id="line-24"><span class="linenos"> 24</span><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-25"><span class="linenos"> 25</span>
</span><span id="line-26"><span class="linenos"> 26</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
</span><span id="line-27"><span class="linenos"> 27</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
</span><span id="line-28"><span class="linenos"> 28</span>
</span><span id="line-29"><span class="linenos"> 29</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-mini</span>
</span><span id="line-30"><span class="linenos"> 30</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$OPENAI_API_KEY</span>
</span><span id="line-29"><span class="linenos"> 29</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-sonnet-4-0</span>
</span><span id="line-30"><span class="linenos"> 30</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
</span><span id="line-31"><span class="linenos"> 31</span>
</span><span id="line-32"><span class="linenos"> 32</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">anthropic/claude-sonnet-4-0</span>
</span><span id="line-33"><span class="linenos"> 33</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$ANTHROPIC_API_KEY</span>
</span><span id="line-32"><span class="linenos"> 32</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral/ministral-3b-latest</span>
</span><span id="line-33"><span class="linenos"> 33</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_API_KEY</span>
</span><span id="line-34"><span class="linenos"> 34</span>
</span><span id="line-35"><span class="linenos"> 35</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mistral/ministral-3b-latest</span>
</span><span id="line-36"><span class="linenos"> 36</span><span class="w"> </span><span class="nt">access_key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">$MISTRAL_API_KEY</span>
</span><span id="line-37"><span class="linenos"> 37</span>
</span><span id="line-38"><span class="linenos"> 38</span>
</span><span id="line-39"><span class="linenos"> 39</span><span class="c1"># Model aliases - use friendly names instead of full provider model names</span>
</span><span id="line-40"><span class="linenos"> 40</span><span class="nt">model_aliases</span><span class="p">:</span>
</span><span id="line-41"><span class="linenos"> 41</span><span class="w"> </span><span class="nt">fast-llm</span><span class="p">:</span>
</span><span id="line-42"><span class="linenos"> 42</span><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
</span><span id="line-43"><span class="linenos"> 43</span>
</span><span id="line-44"><span class="linenos"> 44</span><span class="w"> </span><span class="nt">smart-llm</span><span class="p">:</span>
</span><span id="line-45"><span class="linenos"> 45</span><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-35"><span class="linenos"> 35</span><span class="w"> </span><span class="c1"># Example: Passthrough authentication for LiteLLM or similar proxies</span>
</span><span id="line-36"><span class="linenos"> 36</span><span class="w"> </span><span class="c1"># When passthrough_auth is true, client's Authorization header is forwarded</span>
</span><span id="line-37"><span class="linenos"> 37</span><span class="w"> </span><span class="c1"># instead of using the configured access_key</span>
</span><span id="line-38"><span class="linenos"> 38</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">model</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">openai/gpt-4o-litellm</span>
</span><span id="line-39"><span class="linenos"> 39</span><span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">https://litellm.example.com</span>
</span><span id="line-40"><span class="linenos"> 40</span><span class="w"> </span><span class="nt">passthrough_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</span><span id="line-41"><span class="linenos"> 41</span>
</span><span id="line-42"><span class="linenos"> 42</span><span class="c1"># Model aliases - use friendly names instead of full provider model names</span>
</span><span id="line-43"><span class="linenos"> 43</span><span class="nt">model_aliases</span><span class="p">:</span>
</span><span id="line-44"><span class="linenos"> 44</span><span class="w"> </span><span class="nt">fast-llm</span><span class="p">:</span>
</span><span id="line-45"><span class="linenos"> 45</span><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o-mini</span>
</span><span id="line-46"><span class="linenos"> 46</span>
</span><span id="line-47"><span class="linenos"> 47</span>
</span><span id="line-48"><span class="linenos"> 48</span><span class="c1"># HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access</span>
</span><span id="line-49"><span class="linenos"> 49</span><span class="nt">listeners</span><span class="p">:</span>
</span><span id="line-50"><span class="linenos"> 50</span><span class="w"> </span><span class="c1"># Agent listener for routing requests to multiple agents</span>
</span><span id="line-51"><span class="linenos"> 51</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">agent</span>
</span><span id="line-52"><span class="linenos"> 52</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">travel_booking_service</span>
</span><span id="line-53"><span class="linenos"> 53</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8001</span>
</span><span id="line-54"><span class="linenos"> 54</span><span class="w"> </span><span class="nt">router</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">plano_orchestrator_v1</span>
</span><span id="line-55"><span class="linenos"> 55</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
</span><span id="line-56"><span class="linenos"> 56</span><span class="w"> </span><span class="nt">agents</span><span class="p">:</span>
</span><span id="line-57"><span class="linenos"> 57</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">id</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">rag_agent</span>
</span><span id="line-58"><span class="linenos"> 58</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">virtual assistant for retrieval augmented generation tasks</span>
</span><span id="line-59"><span class="linenos"> 59</span><span class="w"> </span><span class="nt">filter_chain</span><span class="p">:</span>
</span><span id="line-60"><span class="linenos"> 60</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">input_guards</span>
</span><span id="line-61"><span class="linenos"> 61</span>
</span><span id="line-62"><span class="linenos"> 62</span><span class="w"> </span><span class="c1"># Model listener for direct LLM access</span>
</span><span id="line-63"><span class="linenos"> 63</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">model</span>
</span><span id="line-64"><span class="linenos"> 64</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">model_1</span>
</span><span id="line-65"><span class="linenos"> 65</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
</span><span id="line-66"><span class="linenos"> 66</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">12000</span>
</span><span id="line-67"><span class="linenos"> 67</span>
</span><span id="line-68"><span class="linenos"> 68</span><span class="w"> </span><span class="c1"># Prompt listener for function calling (for prompt_targets)</span>
</span><span id="line-69"><span class="linenos"> 69</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">prompt</span>
</span><span id="line-70"><span class="linenos"> 70</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">prompt_function_listener</span>
</span><span id="line-71"><span class="linenos"> 71</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
</span><span id="line-72"><span class="linenos"> 72</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
</span><span id="line-73"><span class="linenos"> 73</span><span class="w"> </span><span class="c1"># This listener is used for prompt_targets and function calling</span>
</span><span id="line-74"><span class="linenos"> 74</span>
</span><span id="line-75"><span class="linenos"> 75</span>
</span><span id="line-76"><span class="linenos"> 76</span><span class="c1"># Reusable service endpoints</span>
</span><span id="line-77"><span class="linenos"> 77</span><span class="nt">endpoints</span><span class="p">:</span>
</span><span id="line-78"><span class="linenos"> 78</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
</span><span id="line-79"><span class="linenos"> 79</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
</span><span id="line-80"><span class="linenos"> 80</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span><span id="line-81"><span class="linenos"> 81</span>
</span><span id="line-82"><span class="linenos"> 82</span><span class="w"> </span><span class="nt">mistral_local</span><span class="p">:</span>
</span><span id="line-83"><span class="linenos"> 83</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:8001</span>
</span><span id="line-84"><span class="linenos"> 84</span>
</span><span id="line-47"><span class="linenos"> 47</span><span class="w"> </span><span class="nt">smart-llm</span><span class="p">:</span>
</span><span id="line-48"><span class="linenos"> 48</span><span class="w"> </span><span class="nt">target</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">gpt-4o</span>
</span><span id="line-49"><span class="linenos"> 49</span>
</span><span id="line-50"><span class="linenos"> 50</span><span class="c1"># HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access</span>
</span><span id="line-51"><span class="linenos"> 51</span><span class="nt">listeners</span><span class="p">:</span>
</span><span id="line-52"><span class="linenos"> 52</span><span class="w"> </span><span class="c1"># Agent listener for routing requests to multiple agents</span>
</span><span id="line-53"><span class="linenos"> 53</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">agent</span>
</span><span id="line-54"><span class="linenos"> 54</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">travel_booking_service</span>
</span><span id="line-55"><span class="linenos"> 55</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8001</span>
</span><span id="line-56"><span class="linenos"> 56</span><span class="w"> </span><span class="nt">router</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">plano_orchestrator_v1</span>
</span><span id="line-57"><span class="linenos"> 57</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
</span><span id="line-58"><span class="linenos"> 58</span><span class="w"> </span><span class="nt">agents</span><span class="p">:</span>
</span><span id="line-59"><span class="linenos"> 59</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">id</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">rag_agent</span>
</span><span id="line-60"><span class="linenos"> 60</span><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">virtual assistant for retrieval augmented generation tasks</span>
</span><span id="line-61"><span class="linenos"> 61</span><span class="w"> </span><span class="nt">filter_chain</span><span class="p">:</span>
</span><span id="line-62"><span class="linenos"> 62</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">input_guards</span>
</span><span id="line-63"><span class="linenos"> 63</span>
</span><span id="line-64"><span class="linenos"> 64</span><span class="w"> </span><span class="c1"># Model listener for direct LLM access</span>
</span><span id="line-65"><span class="linenos"> 65</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">model</span>
</span><span id="line-66"><span class="linenos"> 66</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">model_1</span>
</span><span id="line-67"><span class="linenos"> 67</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
</span><span id="line-68"><span class="linenos"> 68</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">12000</span>
</span><span id="line-69"><span class="linenos"> 69</span>
</span><span id="line-70"><span class="linenos"> 70</span><span class="w"> </span><span class="c1"># Prompt listener for function calling (for prompt_targets)</span>
</span><span id="line-71"><span class="linenos"> 71</span><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">prompt</span>
</span><span id="line-72"><span class="linenos"> 72</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">prompt_function_listener</span>
</span><span id="line-73"><span class="linenos"> 73</span><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.0.0.0</span>
</span><span id="line-74"><span class="linenos"> 74</span><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">10000</span>
</span><span id="line-75"><span class="linenos"> 75</span><span class="w"> </span><span class="c1"># This listener is used for prompt_targets and function calling</span>
</span><span id="line-76"><span class="linenos"> 76</span>
</span><span id="line-77"><span class="linenos"> 77</span><span class="c1"># Reusable service endpoints</span>
</span><span id="line-78"><span class="linenos"> 78</span><span class="nt">endpoints</span><span class="p">:</span>
</span><span id="line-79"><span class="linenos"> 79</span><span class="w"> </span><span class="nt">app_server</span><span class="p">:</span>
</span><span id="line-80"><span class="linenos"> 80</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:80</span>
</span><span id="line-81"><span class="linenos"> 81</span><span class="w"> </span><span class="nt">connect_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">0.005s</span>
</span><span id="line-82"><span class="linenos"> 82</span>
</span><span id="line-83"><span class="linenos"> 83</span><span class="w"> </span><span class="nt">mistral_local</span><span class="p">:</span>
</span><span id="line-84"><span class="linenos"> 84</span><span class="w"> </span><span class="nt">endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">127.0.0.1:8001</span>
</span><span id="line-85"><span class="linenos"> 85</span>
</span><span id="line-86"><span class="linenos"> 86</span><span class="c1"># Prompt targets for function calling and API orchestration</span>
</span><span id="line-87"><span class="linenos"> 87</span><span class="nt">prompt_targets</span><span class="p">:</span>
@ -268,11 +268,10 @@ where prompts get routed to, apply guardrails, and enable critical agent observa
</span><span id="line-102"><span class="linenos">102</span><span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/weather</span>
</span><span id="line-103"><span class="linenos">103</span><span class="w"> </span><span class="nt">http_method</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">POST</span>
</span><span id="line-104"><span class="linenos">104</span>
</span><span id="line-105"><span class="linenos">105</span>
</span><span id="line-106"><span class="linenos">106</span><span class="c1"># OpenTelemetry tracing configuration</span>
</span><span id="line-107"><span class="linenos">107</span><span class="nt">tracing</span><span class="p">:</span>
</span><span id="line-108"><span class="linenos">108</span><span class="w"> </span><span class="c1"># Random sampling percentage (1-100)</span>
</span><span id="line-109"><span class="linenos">109</span><span class="w"> </span><span class="nt">random_sampling</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">100</span>
</span><span id="line-105"><span class="linenos">105</span><span class="c1"># OpenTelemetry tracing configuration</span>
</span><span id="line-106"><span class="linenos">106</span><span class="nt">tracing</span><span class="p">:</span>
</span><span id="line-107"><span class="linenos">107</span><span class="w"> </span><span class="c1"># Random sampling percentage (1-100)</span>
</span><span id="line-108"><span class="linenos">108</span><span class="w"> </span><span class="nt">random_sampling</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">100</span>
</span></code></pre></div>
</div>
</div>