Configuration Reference
The following is a complete reference of the plano_config.yml that controls the behavior of a single instance of
the Plano gateway. This where you enable capabilities like routing to upstream LLm providers, defining prompt_targets
where prompts get routed to, apply guardrails, and enable critical agent observability features.
1# Plano Gateway configuration version
2version: v0.4.0
3
4# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
5agents:
6 - id: weather_agent # Example agent for weather
7 url: http://localhost:10510
8
9 - id: flight_agent # Example agent for flights
10 url: http://localhost:10520
11
12# MCP filters applied to requests/responses (e.g., input validation, query rewriting)
13filters:
14 - id: input_guards # Example filter for input validation
15 url: http://localhost:10500
16 # type: mcp (default)
17 # transport: streamable-http (default)
18 # tool: input_guards (default - same as filter id)
19
20# LLM provider configurations with API keys and model routing
21model_providers:
22 - model: openai/gpt-4o
23 access_key: $OPENAI_API_KEY
24 default: true
25
26 - model: openai/gpt-4o-mini
27 access_key: $OPENAI_API_KEY
28
29 - model: anthropic/claude-sonnet-4-0
30 access_key: $ANTHROPIC_API_KEY
31
32 - model: mistral/ministral-3b-latest
33 access_key: $MISTRAL_API_KEY
34
35 - model: groq/llama-3.3-70b-versatile
36 access_key: $GROQ_API_KEY
37
38 # passthrough_auth: forwards the client's Authorization header upstream instead of
39 # using the configured access_key. Useful for LiteLLM or similar proxy setups.
40 - model: openai/gpt-4o-litellm
41 base_url: https://litellm.example.com
42 passthrough_auth: true
43
44 # Custom/self-hosted endpoint with explicit http_host override
45 - model: openai/llama-3.3-70b
46 base_url: https://api.custom-provider.com
47 http_host: api.custom-provider.com
48 access_key: $CUSTOM_API_KEY
49
50# Model aliases - use friendly names instead of full provider model names
51model_aliases:
52 fast-llm:
53 target: gpt-4o-mini
54
55 smart-llm:
56 target: gpt-4o
57
58# routing_preferences: top-level list that tags named task categories with an
59# ordered pool of candidate models. Plano's LLM router matches incoming requests
60# against these descriptions and returns an ordered list of models; the client
61# uses models[0] as primary and retries with models[1], models[2]... on 429/5xx.
62# Requires overrides.llm_routing_model to point at Plano-Orchestrator (or equivalent).
63# Each model in `models` must be declared in model_providers above.
64# selection_policy is optional: {prefer: cheapest|fastest|none} lets the router
65# reorder candidates using live cost/latency data from model_metrics_sources.
66routing_preferences:
67 - name: code generation
68 description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
69 models:
70 - anthropic/claude-sonnet-4-0
71 - openai/gpt-4o
72 - groq/llama-3.3-70b-versatile
73 - name: code review
74 description: reviewing, analyzing, and suggesting improvements to existing code
75 models:
76 - anthropic/claude-sonnet-4-0
77 - groq/llama-3.3-70b-versatile
78 selection_policy:
79 prefer: cheapest
80
81# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
82listeners:
83 # Agent listener for routing requests to multiple agents
84 - type: agent
85 name: travel_booking_service
86 port: 8001
87 router: plano_orchestrator_v1
88 address: 0.0.0.0
89 agents:
90 - id: rag_agent
91 description: virtual assistant for retrieval augmented generation tasks
92 input_filters:
93 - input_guards
94
95 # Model listener for direct LLM access
96 - type: model
97 name: model_1
98 address: 0.0.0.0
99 port: 12000
100 timeout: 30s # Request timeout (e.g. "30s", "60s")
101 max_retries: 3 # Number of retries on upstream failure
102 input_filters: # Filters applied before forwarding to LLM
103 - input_guards
104 output_filters: # Filters applied to LLM responses before returning to client
105 - input_guards
106
107 # Prompt listener for function calling (for prompt_targets)
108 - type: prompt
109 name: prompt_function_listener
110 address: 0.0.0.0
111 port: 10000
112
113# Reusable service endpoints
114endpoints:
115 app_server:
116 endpoint: 127.0.0.1:80
117 connect_timeout: 0.005s
118 protocol: http # http or https
119
120 mistral_local:
121 endpoint: 127.0.0.1:8001
122
123 secure_service:
124 endpoint: api.example.com:443
125 protocol: https
126 http_host: api.example.com # Override the Host header sent upstream
127
128# Optional top-level system prompt applied to all prompt_targets
129system_prompt: |
130 You are a helpful assistant. Always respond concisely and accurately.
131
132# Prompt targets for function calling and API orchestration
133prompt_targets:
134 - name: get_current_weather
135 description: Get current weather at a location.
136 parameters:
137 - name: location
138 description: The location to get the weather for
139 required: true
140 type: string
141 format: City, State
142 - name: days
143 description: the number of days for the request
144 required: true
145 type: int
146 endpoint:
147 name: app_server
148 path: /weather
149 http_method: POST
150 # Per-target system prompt (overrides top-level system_prompt for this target)
151 system_prompt: You are a weather expert. Provide accurate and concise weather information.
152 # auto_llm_dispatch_on_response: when true, the LLM is called again with the
153 # function response to produce a final natural-language answer for the user
154 auto_llm_dispatch_on_response: true
155
156# Rate limits - control token usage per model and request selector
157ratelimits:
158 - model: openai/gpt-4o
159 selector:
160 key: x-user-id # HTTP header key used to identify the rate-limit subject
161 value: "*" # Wildcard matches any value; use a specific string to target one
162 limit:
163 tokens: 100000 # Maximum tokens allowed in the given time unit
164 unit: hour # Time unit: "minute", "hour", or "day"
165
166 - model: openai/gpt-4o-mini
167 selector:
168 key: x-org-id
169 value: acme-corp
170 limit:
171 tokens: 500000
172 unit: day
173
174# Global behavior overrides
175overrides:
176 # Threshold for routing a request to a prompt_target (0.0–1.0). Lower = more permissive.
177 prompt_target_intent_matching_threshold: 0.7
178 # Trim conversation history to fit within the model's context window
179 optimize_context_window: true
180 # Use Plano's agent orchestrator for multi-agent request routing
181 use_agent_orchestrator: false
182 # Connect timeout for upstream provider clusters (e.g., "5s", "10s"). Default: "5s"
183 upstream_connect_timeout: 10s
184 # Path to the trusted CA bundle for upstream TLS verification
185 upstream_tls_ca_path: /etc/ssl/certs/ca-certificates.crt
186 # Model used for intent-based LLM routing (must be listed in model_providers)
187 llm_routing_model: Plano-Orchestrator
188 # Model used for agent orchestration (must be listed in model_providers)
189 agent_orchestration_model: Plano-Orchestrator
190 # Disable agentic signal analysis (frustration, repetition, escalation, etc.)
191 # on LLM responses to save CPU. Default: false.
192 disable_signals: false
193
194# Model affinity — pin routing decisions for agentic loops
195routing:
196 session_ttl_seconds: 600 # How long a pinned session lasts (default: 600s / 10 min)
197 session_max_entries: 10000 # Max cached sessions before eviction (upper limit: 10000)
198 # session_cache controls the backend used to store affinity state.
199 # "memory" (default) is in-process and works for single-instance deployments.
200 # "redis" shares state across replicas — required for multi-replica / Kubernetes setups.
201 session_cache:
202 type: memory # "memory" (default) or "redis"
203 # url is required when type is "redis". Supports redis:// and rediss:// (TLS).
204 # url: redis://localhost:6379
205 # tenant_header: x-org-id # optional; when set, keys are scoped as plano:affinity:{tenant_id}:{session_id}
206
207# State storage for multi-turn conversation history
208state_storage:
209 type: memory # "memory" (in-process) or "postgres" (persistent)
210 # connection_string is required when type is postgres.
211 # Supports environment variable substitution: $VAR or ${VAR}
212 # connection_string: postgresql://user:$DB_PASS@localhost:5432/plano
213
214# Input guardrails applied globally to all incoming requests
215prompt_guards:
216 input_guards:
217 jailbreak:
218 on_exception:
219 message: "I'm sorry, I can't help with that request."
220
221# OpenTelemetry tracing configuration
222tracing:
223 # Random sampling percentage (1-100)
224 random_sampling: 100
225 # Include internal Plano spans in traces
226 trace_arch_internal: false
227 # gRPC endpoint for OpenTelemetry collector (e.g., Jaeger, Tempo)
228 opentracing_grpc_endpoint: http://localhost:4317
229 span_attributes:
230 # Propagate request headers whose names start with these prefixes as span attributes
231 header_prefixes:
232 - x-user-
233 - x-org-
234 # Static key/value pairs added to every span
235 static:
236 environment: production
237 service.team: platform