add preliminary support for llm agents (#432)

2026-06-17 15:25:17 +02:00 · 2025-03-19 15:21:34 -07:00 · 2025-03-19 15:21:34 -07:00 · 84cd1df7bf
commit 84cd1df7bf
parent 8d66fefded
29 changed files with 1388 additions and 121 deletions
--- a/arch/Dockerfile
+++ b/arch/Dockerfile
@ -28,4 +28,5 @@ COPY arch/arch_config_schema.yaml .
 RUN pip install requests
 RUN touch /var/log/envoy.log

+# ENTRYPOINT ["sh","-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --log-level trace 2>&1 | tee /var/log/envoy.log"]
 ENTRYPOINT ["sh","-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log/envoy.log"]
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@ -93,7 +93,6 @@ properties:
      additionalProperties: false
      required:
        - name
-        - model
  overrides:
    type: object
    properties:
@ -101,6 +100,8 @@ properties:
        type: number
      optimize_context_window:
        type: boolean
+      use_agent_orchestrator:
+        type: boolean
  system_prompt:
    type: string
  prompt_targets:
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -142,6 +142,19 @@ static_resources:
                            cluster: {{ llm_cluster_name }}
                            timeout: 60s
                      {% endfor %}
+
+                      {% if agent_orchestrator %}
+                        - match:
+                            prefix: "/"
+                            headers:
+                              - name: "x-arch-llm-provider"
+                                string_match:
+                                  exact: {{ agent_orchestrator }}
+                          route:
+                            auto_host_rewrite: true
+                            cluster: {{ agent_orchestrator }}
+                            timeout: 60s
+                      {% endif %}
                http_filters:
                  - name: envoy.filters.http.compressor
                    typed_config:
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@ -48,7 +48,7 @@ def validate_and_render_schema():
        arch_config_schema = file.read()

    config_yaml = yaml.safe_load(arch_config)
-    config_schema_yaml = yaml.safe_load(arch_config_schema)
+    _ = yaml.safe_load(arch_config_schema)
    inferred_clusters = {}

    endpoints = config_yaml.get("endpoints", {})
@ -150,6 +150,26 @@ def validate_and_render_schema():
    if llm_gateway_listener.get("timeout") == None:
        llm_gateway_listener["timeout"] = "10s"

+    use_agent_orchestrator = config_yaml.get("overrides", {}).get(
+        "use_agent_orchestrator", False
+    )
+
+    agent_orchestrator = None
+    if use_agent_orchestrator:
+        print("Using agent orchestrator")
+
+        if len(endpoints) == 0:
+            raise Exception(
+                "Please provide agent orchestrator in the endpoints section in your arch_config.yaml file"
+            )
+        elif len(endpoints) > 1:
+            raise Exception(
+                "Please provide single agent orchestrator in the endpoints section in your arch_config.yaml file"
+            )
+        else:
+            agent_orchestrator = list(endpoints.keys())[0]
+
+    print("agent_orchestrator: ", agent_orchestrator)
    data = {
        "prompt_gateway_listener": prompt_gateway_listener,
        "llm_gateway_listener": llm_gateway_listener,
@ -159,6 +179,7 @@ def validate_and_render_schema():
        "arch_llm_providers": config_yaml["llm_providers"],
        "arch_tracing": arch_tracing,
        "local_llms": llms_with_endpoint,
+        "agent_orchestrator": agent_orchestrator,
    }

    rendered = template.render(data)