update refernce config

2026-06-17 15:25:17 +02:00 · 2025-12-23 15:46:35 -08:00 · 2025-12-23 15:46:35 -08:00 · 5b5312a7c1
commit 5b5312a7c1
parent 0533987a2f
2 changed files with 120 additions and 146 deletions
--- a/docs/source/resources/includes/arch_config_full_reference.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference.yaml
@ -1,94 +1,75 @@
-version: v0.1
+# Arch Gateway configuration version
+version: v0.3.0

+# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
+agents:
+  - id: weather_agent
+    url: http://host.docker.internal:10510
+
+  - id: flight_agent
+    url: http://host.docker.internal:10520
+
+# MCP filters applied to requests/responses (e.g., input validation, query rewriting)
+filters:
+  - id: input_guards
+    url: http://host.docker.internal:10500
+    # type: mcp (default)
+    # transport: streamable-http (default)
+    # tool: input_guards (default - same as filter id)
+
+# LLM provider configurations with API keys and model routing
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    default: true
+
+  - model: openai/gpt-4o-mini
+    access_key: $OPENAI_API_KEY
+
+  - model: anthropic/claude-sonnet-4-0
+    access_key: $ANTHROPIC_API_KEY
+
+  - model: mistral/ministral-3b-latest
+    access_key: $MISTRAL_API_KEY
+
+# Model aliases - use friendly names instead of full provider model names
+model_aliases:
+  fast-llm:
+    target: gpt-4o-mini
+
+  smart-llm:
+    target: gpt-4o
+
+# HTTP listeners - entry points for agent routing and direct LLM access
 listeners:
-  ingress_traffic:
+  # Agent listener for routing requests to multiple agents
+  - type: agent
+    name: travel_booking_service
+    port: 8001
+    router: plano_orchestrator_v1
    address: 0.0.0.0
-    port: 10000
-    message_format: openai
-    timeout: 5s
-  egress_traffic:
+    agents:
+      - id: rag_agent
+        description: virtual assistant for retrieval augmented generation tasks
+        filter_chain:
+          - input_guards
+
+  # Model listener for direct LLM access
+  - type: model
+    name: model_1
    address: 0.0.0.0
    port: 12000
-    message_format: openai
-    timeout: 5s

-# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
+# Reusable service endpoints
 endpoints:
  app_server:
-    # value could be ip address or a hostname with port
-    # this could also be a list of endpoints for load balancing
-    # for example endpoint: [ ip1:port, ip2:port ]
    endpoint: 127.0.0.1:80
-    # max time to wait for a connection to be established
    connect_timeout: 0.005s

  mistral_local:
    endpoint: 127.0.0.1:8001

-  error_target:
-    endpoint: error_target_1
-
-# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
-llm_providers:
-  - name: openai/gpt-4o
-    access_key: $OPENAI_API_KEY
-    model: openai/gpt-4o
-    default: true
-
-  - access_key: $MISTRAL_API_KEY
-    model: mistral/mistral-8x7b
-
-  - model: mistral/mistral-7b-instruct
-    base_url: http://mistral_local
-
-# Model aliases - friendly names that map to actual provider names
-model_aliases:
-  # Alias for summarization tasks -> fast/cheap model
-  arch.summarize.v1:
-    target: gpt-4o
-
-  # Alias for general purpose tasks -> latest model
-  arch.v1:
-    target: mistral-8x7b
-
-# provides a way to override default settings for the arch system
-overrides:
-  # By default Arch uses an NLI + embedding approach to match an incoming prompt to a prompt target.
-  # The intent matching threshold is kept at 0.80, you can override this behavior if you would like
-  prompt_target_intent_matching_threshold: 0.60
-
-# default system prompt used by all prompt targets
-system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
-
-prompt_targets:
-  - name: information_extraction
-    default: true
-    description: handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.
-    endpoint:
-      name: app_server
-      path: /agent/summary
-      http_method: POST
-    # Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM
-    auto_llm_dispatch_on_response: true
-    # override system prompt for this prompt target
-    system_prompt: You are a helpful information extraction assistant. Use the information that is provided to you.
-
-  - name: reboot_network_device
-    description: Reboot a specific network device
-    endpoint:
-      name: app_server
-      path: /agent/action
-    parameters:
-      - name: device_id
-        type: str
-        description: Identifier of the network device to reboot.
-        required: true
-      - name: confirmation
-        type: bool
-        description: Confirmation flag to proceed with reboot.
-        default: false
-        enum: [true, false]
-
+# OpenTelemetry tracing configuration
 tracing:
-  # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
-  sampling_rate: 0.1
+  # Random sampling percentage (1-100)
+  random_sampling: 100
--- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
@ -1,15 +1,46 @@
+agents:
+- id: weather_agent
+  url: http://host.docker.internal:10510
+- id: flight_agent
+  url: http://host.docker.internal:10520
 endpoints:
  app_server:
    connect_timeout: 0.005s
    endpoint: 127.0.0.1
    port: 80
-  error_target:
-    endpoint: error_target_1
-    port: 80
+  flight_agent:
+    endpoint: host.docker.internal
+    port: 10520
+    protocol: http
+  input_guards:
+    endpoint: host.docker.internal
+    port: 10500
+    protocol: http
  mistral_local:
    endpoint: 127.0.0.1
    port: 8001
+  weather_agent:
+    endpoint: host.docker.internal
+    port: 10510
+    protocol: http
+filters:
+- id: input_guards
+  url: http://host.docker.internal:10500
 listeners:
+- address: 0.0.0.0
+  agents:
+  - description: virtual assistant for retrieval augmented generation tasks
+    filter_chain:
+    - input_guards
+    id: rag_agent
+  name: travel_booking_service
+  port: 8001
+  router: plano_orchestrator_v1
+  type: agent
+- address: 0.0.0.0
+  name: model_1
+  port: 12000
+  type: model
 - address: 0.0.0.0
  model_providers:
  - access_key: $OPENAI_API_KEY
@ -17,49 +48,44 @@ listeners:
    model: gpt-4o
    name: openai/gpt-4o
    provider_interface: openai
+  - access_key: $OPENAI_API_KEY
+    model: gpt-4o-mini
+    name: openai/gpt-4o-mini
+    provider_interface: openai
+  - access_key: $ANTHROPIC_API_KEY
+    model: claude-sonnet-4-0
+    name: anthropic/claude-sonnet-4-0
+    provider_interface: anthropic
  - access_key: $MISTRAL_API_KEY
-    model: mistral-8x7b
-    name: mistral/mistral-8x7b
-    provider_interface: mistral
-  - base_url: http://mistral_local
-    cluster_name: mistral_mistral_local
-    endpoint: mistral_local
-    model: mistral-7b-instruct
-    name: mistral/mistral-7b-instruct
-    port: 80
-    protocol: http
+    model: ministral-3b-latest
+    name: mistral/ministral-3b-latest
    provider_interface: mistral
  name: egress_traffic
  port: 12000
-  timeout: 5s
+  timeout: 30s
  type: model_listener
- address: 0.0.0.0
-  name: ingress_traffic
-  port: 10000
-  timeout: 5s
-  type: prompt_listener
 model_aliases:
-  arch.summarize.v1:
+  fast-llm:
+    target: gpt-4o-mini
+  smart-llm:
    target: gpt-4o
-  arch.v1:
-    target: mistral-8x7b
 model_providers:
 - access_key: $OPENAI_API_KEY
  default: true
  model: gpt-4o
  name: openai/gpt-4o
  provider_interface: openai
+- access_key: $OPENAI_API_KEY
+  model: gpt-4o-mini
+  name: openai/gpt-4o-mini
+  provider_interface: openai
+- access_key: $ANTHROPIC_API_KEY
+  model: claude-sonnet-4-0
+  name: anthropic/claude-sonnet-4-0
+  provider_interface: anthropic
 - access_key: $MISTRAL_API_KEY
-  model: mistral-8x7b
-  name: mistral/mistral-8x7b
-  provider_interface: mistral
- base_url: http://mistral_local
-  cluster_name: mistral_mistral_local
-  endpoint: mistral_local
-  model: mistral-7b-instruct
-  name: mistral/mistral-7b-instruct
-  port: 80
-  protocol: http
+  model: ministral-3b-latest
+  name: mistral/ministral-3b-latest
  provider_interface: mistral
 - model: Arch-Function
  name: arch-function
@ -67,39 +93,6 @@ model_providers:
 - model: Plano-Orchestrator
  name: plano-orchestrator
  provider_interface: arch
-overrides:
-  prompt_target_intent_matching_threshold: 0.6
-prompt_targets:
- auto_llm_dispatch_on_response: true
-  default: true
-  description: handel all scenarios that are question and answer in nature. Like summarization,
-    information extraction, etc.
-  endpoint:
-    http_method: POST
-    name: app_server
-    path: /agent/summary
-  name: information_extraction
-  system_prompt: You are a helpful information extraction assistant. Use the information
-    that is provided to you.
- description: Reboot a specific network device
-  endpoint:
-    name: app_server
-    path: /agent/action
-  name: reboot_network_device
-  parameters:
-  - description: Identifier of the network device to reboot.
-    name: device_id
-    required: true
-    type: str
-  - default: false
-    description: Confirmation flag to proceed with reboot.
-    enum:
-    - true
-    - false
-    name: confirmation
-    type: bool
-system_prompt: You are a network assistant that just offers facts; not advice on manufacturers
-  or purchasing decisions.
 tracing:
-  sampling_rate: 0.1
-version: v0.1
+  random_sampling: 100
+version: v0.3.0