Configuration Reference

The following is a complete reference of the plano_config.yml that controls the behavior of a single instance of the Plano gateway. This where you enable capabilities like routing to upstream LLm providers, defining prompt_targets where prompts get routed to, apply guardrails, and enable critical agent observability features.

  1# Plano Gateway configuration version
  2version: v0.3.0
  3
  4# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
  5agents:
  6  - id: weather_agent # Example agent for weather
  7    url: http://localhost:10510
  8
  9  - id: flight_agent # Example agent for flights
 10    url: http://localhost:10520
 11
 12# MCP filters applied to requests/responses (e.g., input validation, query rewriting)
 13filters:
 14  - id: input_guards # Example filter for input validation
 15    url: http://localhost:10500
 16    # type: mcp (default)
 17    # transport: streamable-http (default)
 18    # tool: input_guards (default - same as filter id)
 19
 20# LLM provider configurations with API keys and model routing
 21model_providers:
 22  - model: openai/gpt-4o
 23    access_key: $OPENAI_API_KEY
 24    default: true
 25
 26  - model: openai/gpt-4o-mini
 27    access_key: $OPENAI_API_KEY
 28
 29  - model: anthropic/claude-sonnet-4-0
 30    access_key: $ANTHROPIC_API_KEY
 31
 32  - model: mistral/ministral-3b-latest
 33    access_key: $MISTRAL_API_KEY
 34
 35  # routing_preferences: tags a model with named capabilities so Plano's LLM router
 36  # can select the best model for each request based on intent. Requires the
 37  # Arch-Router model (or equivalent) to be configured in overrides.llm_routing_model.
 38  # Each preference has a name (short label) and a description (used for intent matching).
 39  - model: openai/gpt-4o
 40    name: gpt-4o-coding       # Optional friendly name to distinguish multiple entries for same model
 41    access_key: $OPENAI_API_KEY
 42    routing_preferences:
 43      - name: code generation
 44        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
 45      - name: code review
 46        description: reviewing, analyzing, and suggesting improvements to existing code
 47
 48  - model: anthropic/claude-sonnet-4-0
 49    name: claude-sonnet-reasoning
 50    access_key: $ANTHROPIC_API_KEY
 51    routing_preferences:
 52      - name: reasoning
 53        description: complex multi-step reasoning, math, logic puzzles, and analytical tasks
 54
 55  # passthrough_auth: forwards the client's Authorization header upstream instead of
 56  # using the configured access_key. Useful for LiteLLM or similar proxy setups.
 57  - model: openai/gpt-4o-litellm
 58    base_url: https://litellm.example.com
 59    passthrough_auth: true
 60
 61  # provider_interface: specifies the API format when the provider doesn't match
 62  # the default inferred from the model name. Supported: openai, claude, gemini,
 63  # mistral, groq, deepseek, plano
 64  - model: groq/llama-3.3-70b-versatile
 65    access_key: $GROQ_API_KEY
 66    provider_interface: groq
 67
 68  # Custom/self-hosted endpoint with explicit http_host override
 69  - model: openai/llama-3.3-70b
 70    base_url: https://api.custom-provider.com
 71    http_host: api.custom-provider.com
 72    access_key: $CUSTOM_API_KEY
 73
 74# Model aliases - use friendly names instead of full provider model names
 75model_aliases:
 76  fast-llm:
 77    target: gpt-4o-mini
 78
 79  smart-llm:
 80    target: gpt-4o
 81
 82# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
 83listeners:
 84  # Agent listener for routing requests to multiple agents
 85  - type: agent
 86    name: travel_booking_service
 87    port: 8001
 88    router: plano_orchestrator_v1
 89    address: 0.0.0.0
 90    agents:
 91      - id: rag_agent
 92        description: virtual assistant for retrieval augmented generation tasks
 93        input_filters:
 94          - input_guards
 95
 96  # Model listener for direct LLM access
 97  - type: model
 98    name: model_1
 99    address: 0.0.0.0
100    port: 12000
101    timeout: 30s          # Request timeout (e.g. "30s", "60s")
102    max_retries: 3        # Number of retries on upstream failure
103    input_filters:        # Filters applied before forwarding to LLM
104      - input_guards
105    output_filters:       # Filters applied to LLM responses before returning to client
106      - input_guards
107
108  # Prompt listener for function calling (for prompt_targets)
109  - type: prompt
110    name: prompt_function_listener
111    address: 0.0.0.0
112    port: 10000
113
114# Reusable service endpoints
115endpoints:
116  app_server:
117    endpoint: 127.0.0.1:80
118    connect_timeout: 0.005s
119    protocol: http        # http or https
120
121  mistral_local:
122    endpoint: 127.0.0.1:8001
123
124  secure_service:
125    endpoint: api.example.com:443
126    protocol: https
127    http_host: api.example.com  # Override the Host header sent upstream
128
129# Optional top-level system prompt applied to all prompt_targets
130system_prompt: |
131  You are a helpful assistant. Always respond concisely and accurately.
132
133# Prompt targets for function calling and API orchestration
134prompt_targets:
135  - name: get_current_weather
136    description: Get current weather at a location.
137    parameters:
138      - name: location
139        description: The location to get the weather for
140        required: true
141        type: string
142        format: City, State
143      - name: days
144        description: the number of days for the request
145        required: true
146        type: int
147    endpoint:
148      name: app_server
149      path: /weather
150      http_method: POST
151    # Per-target system prompt (overrides top-level system_prompt for this target)
152    system_prompt: You are a weather expert. Provide accurate and concise weather information.
153    # auto_llm_dispatch_on_response: when true, the LLM is called again with the
154    # function response to produce a final natural-language answer for the user
155    auto_llm_dispatch_on_response: true
156
157# Rate limits - control token usage per model and request selector
158ratelimits:
159  - model: openai/gpt-4o
160    selector:
161      key: x-user-id       # HTTP header key used to identify the rate-limit subject
162      value: "*"           # Wildcard matches any value; use a specific string to target one
163    limit:
164      tokens: 100000       # Maximum tokens allowed in the given time unit
165      unit: hour           # Time unit: "minute", "hour", or "day"
166
167  - model: openai/gpt-4o-mini
168    selector:
169      key: x-org-id
170      value: acme-corp
171    limit:
172      tokens: 500000
173      unit: day
174
175# Global behavior overrides
176overrides:
177  # Threshold for routing a request to a prompt_target (0.0–1.0). Lower = more permissive.
178  prompt_target_intent_matching_threshold: 0.7
179  # Trim conversation history to fit within the model's context window
180  optimize_context_window: true
181  # Use Plano's agent orchestrator for multi-agent request routing
182  use_agent_orchestrator: true
183  # Connect timeout for upstream provider clusters (e.g., "5s", "10s"). Default: "5s"
184  upstream_connect_timeout: 10s
185  # Path to the trusted CA bundle for upstream TLS verification
186  upstream_tls_ca_path: /etc/ssl/certs/ca-certificates.crt
187  # Model used for intent-based LLM routing (must be listed in model_providers)
188  llm_routing_model: Arch-Router
189  # Model used for agent orchestration (must be listed in model_providers)
190  agent_orchestration_model: Plano-Orchestrator
191
192# State storage for multi-turn conversation history
193state_storage:
194  type: memory            # "memory" (in-process) or "postgres" (persistent)
195  # connection_string is required when type is postgres.
196  # Supports environment variable substitution: $VAR or ${VAR}
197  # connection_string: postgresql://user:$DB_PASS@localhost:5432/plano
198
199# Input guardrails applied globally to all incoming requests
200prompt_guards:
201  input_guards:
202    jailbreak:
203      on_exception:
204        message: "I'm sorry, I can't help with that request."
205
206# OpenTelemetry tracing configuration
207tracing:
208  # Random sampling percentage (1-100)
209  random_sampling: 100
210  # Include internal Plano spans in traces
211  trace_arch_internal: false
212  # gRPC endpoint for OpenTelemetry collector (e.g., Jaeger, Tempo)
213  opentracing_grpc_endpoint: http://localhost:4317
214  span_attributes:
215    # Propagate request headers whose names start with these prefixes as span attributes
216    header_prefixes:
217      - x-user-
218      - x-org-
219    # Static key/value pairs added to every span
220    static:
221      environment: production
222      service.team: platform