expand configuration reference with missing fields

2026-04-25 08:46:24 +02:00 · 2026-03-30 12:16:39 -07:00 · 2026-03-30 12:16:39 -07:00 · 8e4ef9a443
commit 8e4ef9a443
parent cdad02c5ee
1 changed files with 118 additions and 7 deletions
--- a/docs/source/resources/includes/plano_config_full_reference.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference.yaml
@ -32,13 +32,45 @@ model_providers:
  - model: mistral/ministral-3b-latest
    access_key: $MISTRAL_API_KEY

-  # Example: Passthrough authentication for LiteLLM or similar proxies
-  # When passthrough_auth is true, client's Authorization header is forwarded
-  # instead of using the configured access_key
+  # routing_preferences: tags a model with named capabilities so Plano's LLM router
+  # can select the best model for each request based on intent. Requires the
+  # Arch-Router model (or equivalent) to be configured in overrides.llm_routing_model.
+  # Each preference has a name (short label) and a description (used for intent matching).
+  - model: openai/gpt-4o
+    name: gpt-4o-coding       # Optional friendly name to distinguish multiple entries for same model
+    access_key: $OPENAI_API_KEY
+    routing_preferences:
+      - name: code generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+      - name: code review
+        description: reviewing, analyzing, and suggesting improvements to existing code
+
+  - model: anthropic/claude-sonnet-4-0
+    name: claude-sonnet-reasoning
+    access_key: $ANTHROPIC_API_KEY
+    routing_preferences:
+      - name: reasoning
+        description: complex multi-step reasoning, math, logic puzzles, and analytical tasks
+
+  # passthrough_auth: forwards the client's Authorization header upstream instead of
+  # using the configured access_key. Useful for LiteLLM or similar proxy setups.
  - model: openai/gpt-4o-litellm
    base_url: https://litellm.example.com
    passthrough_auth: true

+  # provider_interface: specifies the API format when the provider doesn't match
+  # the default inferred from the model name. Supported: openai, claude, gemini,
+  # mistral, groq, deepseek, plano
+  - model: groq/llama-3.3-70b-versatile
+    access_key: $GROQ_API_KEY
+    provider_interface: groq
+
+  # Custom/self-hosted endpoint with explicit http_host override
+  - model: openai/llama-3.3-70b
+    base_url: https://api.custom-provider.com
+    http_host: api.custom-provider.com
+    access_key: $CUSTOM_API_KEY
+
 # Model aliases - use friendly names instead of full provider model names
 model_aliases:
  fast-llm:
@ -66,26 +98,38 @@ listeners:
    name: model_1
    address: 0.0.0.0
    port: 12000
-    # Optional: attach input filters for guardrails on direct LLM requests
-    # input_filters:
-    #   - input_guards
+    timeout: 30s          # Request timeout (e.g. "30s", "60s")
+    max_retries: 3        # Number of retries on upstream failure
+    input_filters:        # Filters applied before forwarding to LLM
+      - input_guards
+    output_filters:       # Filters applied to LLM responses before returning to client
+      - input_guards

  # Prompt listener for function calling (for prompt_targets)
  - type: prompt
    name: prompt_function_listener
    address: 0.0.0.0
    port: 10000
-    # This listener is used for prompt_targets and function calling

 # Reusable service endpoints
 endpoints:
  app_server:
    endpoint: 127.0.0.1:80
    connect_timeout: 0.005s
+    protocol: http        # http or https

  mistral_local:
    endpoint: 127.0.0.1:8001

+  secure_service:
+    endpoint: api.example.com:443
+    protocol: https
+    http_host: api.example.com  # Override the Host header sent upstream
+
+# Optional top-level system prompt applied to all prompt_targets
+system_prompt: |
+  You are a helpful assistant. Always respond concisely and accurately.
+
 # Prompt targets for function calling and API orchestration
 prompt_targets:
  - name: get_current_weather
@ -104,8 +148,75 @@ prompt_targets:
      name: app_server
      path: /weather
      http_method: POST
+    # Per-target system prompt (overrides top-level system_prompt for this target)
+    system_prompt: You are a weather expert. Provide accurate and concise weather information.
+    # auto_llm_dispatch_on_response: when true, the LLM is called again with the
+    # function response to produce a final natural-language answer for the user
+    auto_llm_dispatch_on_response: true
+
+# Rate limits - control token usage per model and request selector
+ratelimits:
+  - model: openai/gpt-4o
+    selector:
+      key: x-user-id       # HTTP header key used to identify the rate-limit subject
+      value: "*"           # Wildcard matches any value; use a specific string to target one
+    limit:
+      tokens: 100000       # Maximum tokens allowed in the given time unit
+      unit: hour           # Time unit: "minute", "hour", or "day"
+
+  - model: openai/gpt-4o-mini
+    selector:
+      key: x-org-id
+      value: acme-corp
+    limit:
+      tokens: 500000
+      unit: day
+
+# Global behavior overrides
+overrides:
+  # Threshold for routing a request to a prompt_target (0.0–1.0). Lower = more permissive.
+  prompt_target_intent_matching_threshold: 0.7
+  # Trim conversation history to fit within the model's context window
+  optimize_context_window: true
+  # Use Plano's agent orchestrator for multi-agent request routing
+  use_agent_orchestrator: true
+  # Connect timeout for upstream provider clusters (e.g., "5s", "10s"). Default: "5s"
+  upstream_connect_timeout: 10s
+  # Path to the trusted CA bundle for upstream TLS verification
+  upstream_tls_ca_path: /etc/ssl/certs/ca-certificates.crt
+  # Model used for intent-based LLM routing (must be listed in model_providers)
+  llm_routing_model: Arch-Router
+  # Model used for agent orchestration (must be listed in model_providers)
+  agent_orchestration_model: Plano-Orchestrator
+
+# State storage for multi-turn conversation history
+state_storage:
+  type: memory            # "memory" (in-process) or "postgres" (persistent)
+  # connection_string is required when type is postgres.
+  # Supports environment variable substitution: $VAR or ${VAR}
+  # connection_string: postgresql://user:$DB_PASS@localhost:5432/plano
+
+# Input guardrails applied globally to all incoming requests
+prompt_guards:
+  input_guards:
+    jailbreak:
+      on_exception:
+        message: "I'm sorry, I can't help with that request."

 # OpenTelemetry tracing configuration
 tracing:
  # Random sampling percentage (1-100)
  random_sampling: 100
+  # Include internal Plano spans in traces
+  trace_arch_internal: false
+  # gRPC endpoint for OpenTelemetry collector (e.g., Jaeger, Tempo)
+  opentracing_grpc_endpoint: http://localhost:4317
+  span_attributes:
+    # Propagate request headers whose names start with these prefixes as span attributes
+    header_prefixes:
+      - x-user-
+      - x-org-
+    # Static key/value pairs added to every span
+    static:
+      environment: production
+      service.team: platform