Update docs to Plano (#639)

2026-04-25 00:36:34 +02:00 · 2025-12-23 17:14:50 -08:00 · 2025-12-23 17:14:50 -08:00 · e224cba3e3
commit e224cba3e3
parent 15fbb6c3af
139 changed files with 4407 additions and 24735 deletions
--- a/docs/source/resources/includes/arch_config_full_reference.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference.yaml
@ -1,100 +1,110 @@
-version: v0.1

+# Arch Gateway configuration version
+version: v0.3.0
+
+
+# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
+agents:
+  - id: weather_agent  # Example agent for weather
+    url: http://host.docker.internal:10510
+
+  - id: flight_agent   # Example agent for flights
+    url: http://host.docker.internal:10520
+
+
+# MCP filters applied to requests/responses (e.g., input validation, query rewriting)
+filters:
+  - id: input_guards  # Example filter for input validation
+    url: http://host.docker.internal:10500
+    # type: mcp (default)
+    # transport: streamable-http (default)
+    # tool: input_guards (default - same as filter id)
+
+
+# LLM provider configurations with API keys and model routing
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    default: true
+
+  - model: openai/gpt-4o-mini
+    access_key: $OPENAI_API_KEY
+
+  - model: anthropic/claude-sonnet-4-0
+    access_key: $ANTHROPIC_API_KEY
+
+  - model: mistral/ministral-3b-latest
+    access_key: $MISTRAL_API_KEY
+
+
+# Model aliases - use friendly names instead of full provider model names
+model_aliases:
+  fast-llm:
+    target: gpt-4o-mini
+
+  smart-llm:
+    target: gpt-4o
+
+
+# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
 listeners:
-  ingress_traffic:
+  # Agent listener for routing requests to multiple agents
+  - type: agent
+    name: travel_booking_service
+    port: 8001
+    router: plano_orchestrator_v1
    address: 0.0.0.0
-    port: 10000
-    message_format: openai
-    timeout: 5s
-  egress_traffic:
+    agents:
+      - id: rag_agent
+        description: virtual assistant for retrieval augmented generation tasks
+        filter_chain:
+          - input_guards
+
+  # Model listener for direct LLM access
+  - type: model
+    name: model_1
    address: 0.0.0.0
    port: 12000
-    message_format: openai
-    timeout: 5s

-# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
+  # Prompt listener for function calling (for prompt_targets)
+  - type: prompt
+    name: prompt_function_listener
+    address: 0.0.0.0
+    port: 10000
+    # This listener is used for prompt_targets and function calling
+
+
+# Reusable service endpoints
 endpoints:
  app_server:
-    # value could be ip address or a hostname with port
-    # this could also be a list of endpoints for load balancing
-    # for example endpoint: [ ip1:port, ip2:port ]
    endpoint: 127.0.0.1:80
-    # max time to wait for a connection to be established
    connect_timeout: 0.005s

  mistral_local:
    endpoint: 127.0.0.1:8001

-  error_target:
-    endpoint: error_target_1
-
-# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
-llm_providers:
-  - name: openai/gpt-4o
-    access_key: $OPENAI_API_KEY
-    model: openai/gpt-4o
-    default: true
-
-  - access_key: $MISTRAL_API_KEY
-    model: mistral/mistral-8x7b
-
-  - model: mistral/mistral-7b-instruct
-    base_url: http://mistral_local
-
-# Model aliases - friendly names that map to actual provider names
-model_aliases:
-  # Alias for summarization tasks -> fast/cheap model
-  arch.summarize.v1:
-    target: gpt-4o
-
-  # Alias for general purpose tasks -> latest model
-  arch.v1:
-    target: mistral-8x7b
-
-# provides a way to override default settings for the arch system
-overrides:
-  # By default Arch uses an NLI + embedding approach to match an incoming prompt to a prompt target.
-  # The intent matching threshold is kept at 0.80, you can override this behavior if you would like
-  prompt_target_intent_matching_threshold: 0.60
-
-# default system prompt used by all prompt targets
-system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
-
-prompt_guards:
-  input_guards:
-    jailbreak:
-      on_exception:
-        message: Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.

+# Prompt targets for function calling and API orchestration
 prompt_targets:
-  - name: information_extraction
-    default: true
-    description: handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.
-    endpoint:
-      name: app_server
-      path: /agent/summary
-      http_method: POST
-    # Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM
-    auto_llm_dispatch_on_response: true
-    # override system prompt for this prompt target
-    system_prompt: You are a helpful information extraction assistant. Use the information that is provided to you.
-
-  - name: reboot_network_device
-    description: Reboot a specific network device
-    endpoint:
-      name: app_server
-      path: /agent/action
+  - name: get_current_weather
+    description: Get current weather at a location.
    parameters:
-      - name: device_id
-        type: str
-        description: Identifier of the network device to reboot.
+      - name: location
+        description: The location to get the weather for
        required: true
-      - name: confirmation
-        type: bool
-        description: Confirmation flag to proceed with reboot.
-        default: false
-        enum: [true, false]
+        type: string
+        format: City, State
+      - name: days
+        description: the number of days for the request
+        required: true
+        type: int
+    endpoint:
+      name: app_server
+      path: /weather
+      http_method: POST

+
+# OpenTelemetry tracing configuration
 tracing:
-  # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
-  sampling_rate: 0.1
+  # Random sampling percentage (1-100)
+  random_sampling: 100