Configuration Reference
The following is a complete reference of the plano_config.yml that controls the behavior of a single instance of
the Plano gateway. This where you enable capabilities like routing to upstream LLm providers, defining prompt_targets
where prompts get routed to, apply guardrails, and enable critical agent observability features.
1# Plano Gateway configuration version
2version: v0.3.0
3
4# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
5agents:
6 - id: weather_agent # Example agent for weather
7 url: http://localhost:10510
8
9 - id: flight_agent # Example agent for flights
10 url: http://localhost:10520
11
12# MCP filters applied to requests/responses (e.g., input validation, query rewriting)
13filters:
14 - id: input_guards # Example filter for input validation
15 url: http://localhost:10500
16 # type: mcp (default)
17 # transport: streamable-http (default)
18 # tool: input_guards (default - same as filter id)
19
20# LLM provider configurations with API keys and model routing
21model_providers:
22 - model: openai/gpt-4o
23 access_key: $OPENAI_API_KEY
24 default: true
25
26 - model: openai/gpt-4o-mini
27 access_key: $OPENAI_API_KEY
28
29 - model: anthropic/claude-sonnet-4-0
30 access_key: $ANTHROPIC_API_KEY
31
32 - model: mistral/ministral-3b-latest
33 access_key: $MISTRAL_API_KEY
34
35 # routing_preferences: tags a model with named capabilities so Plano's LLM router
36 # can select the best model for each request based on intent. Requires the
37 # Arch-Router model (or equivalent) to be configured in overrides.llm_routing_model.
38 # Each preference has a name (short label) and a description (used for intent matching).
39 - model: openai/gpt-4o
40 name: gpt-4o-coding # Optional friendly name to distinguish multiple entries for same model
41 access_key: $OPENAI_API_KEY
42 routing_preferences:
43 - name: code generation
44 description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
45 - name: code review
46 description: reviewing, analyzing, and suggesting improvements to existing code
47
48 - model: anthropic/claude-sonnet-4-0
49 name: claude-sonnet-reasoning
50 access_key: $ANTHROPIC_API_KEY
51 routing_preferences:
52 - name: reasoning
53 description: complex multi-step reasoning, math, logic puzzles, and analytical tasks
54
55 # passthrough_auth: forwards the client's Authorization header upstream instead of
56 # using the configured access_key. Useful for LiteLLM or similar proxy setups.
57 - model: openai/gpt-4o-litellm
58 base_url: https://litellm.example.com
59 passthrough_auth: true
60
61 # provider_interface: specifies the API format when the provider doesn't match
62 # the default inferred from the model name. Supported: openai, claude, gemini,
63 # mistral, groq, deepseek, plano
64 - model: groq/llama-3.3-70b-versatile
65 access_key: $GROQ_API_KEY
66 provider_interface: groq
67
68 # Custom/self-hosted endpoint with explicit http_host override
69 - model: openai/llama-3.3-70b
70 base_url: https://api.custom-provider.com
71 http_host: api.custom-provider.com
72 access_key: $CUSTOM_API_KEY
73
74# Model aliases - use friendly names instead of full provider model names
75model_aliases:
76 fast-llm:
77 target: gpt-4o-mini
78
79 smart-llm:
80 target: gpt-4o
81
82# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
83listeners:
84 # Agent listener for routing requests to multiple agents
85 - type: agent
86 name: travel_booking_service
87 port: 8001
88 router: plano_orchestrator_v1
89 address: 0.0.0.0
90 agents:
91 - id: rag_agent
92 description: virtual assistant for retrieval augmented generation tasks
93 input_filters:
94 - input_guards
95
96 # Model listener for direct LLM access
97 - type: model
98 name: model_1
99 address: 0.0.0.0
100 port: 12000
101 timeout: 30s # Request timeout (e.g. "30s", "60s")
102 max_retries: 3 # Number of retries on upstream failure
103 input_filters: # Filters applied before forwarding to LLM
104 - input_guards
105 output_filters: # Filters applied to LLM responses before returning to client
106 - input_guards
107
108 # Prompt listener for function calling (for prompt_targets)
109 - type: prompt
110 name: prompt_function_listener
111 address: 0.0.0.0
112 port: 10000
113
114# Reusable service endpoints
115endpoints:
116 app_server:
117 endpoint: 127.0.0.1:80
118 connect_timeout: 0.005s
119 protocol: http # http or https
120
121 mistral_local:
122 endpoint: 127.0.0.1:8001
123
124 secure_service:
125 endpoint: api.example.com:443
126 protocol: https
127 http_host: api.example.com # Override the Host header sent upstream
128
129# Optional top-level system prompt applied to all prompt_targets
130system_prompt: |
131 You are a helpful assistant. Always respond concisely and accurately.
132
133# Prompt targets for function calling and API orchestration
134prompt_targets:
135 - name: get_current_weather
136 description: Get current weather at a location.
137 parameters:
138 - name: location
139 description: The location to get the weather for
140 required: true
141 type: string
142 format: City, State
143 - name: days
144 description: the number of days for the request
145 required: true
146 type: int
147 endpoint:
148 name: app_server
149 path: /weather
150 http_method: POST
151 # Per-target system prompt (overrides top-level system_prompt for this target)
152 system_prompt: You are a weather expert. Provide accurate and concise weather information.
153 # auto_llm_dispatch_on_response: when true, the LLM is called again with the
154 # function response to produce a final natural-language answer for the user
155 auto_llm_dispatch_on_response: true
156
157# Rate limits - control token usage per model and request selector
158ratelimits:
159 - model: openai/gpt-4o
160 selector:
161 key: x-user-id # HTTP header key used to identify the rate-limit subject
162 value: "*" # Wildcard matches any value; use a specific string to target one
163 limit:
164 tokens: 100000 # Maximum tokens allowed in the given time unit
165 unit: hour # Time unit: "minute", "hour", or "day"
166
167 - model: openai/gpt-4o-mini
168 selector:
169 key: x-org-id
170 value: acme-corp
171 limit:
172 tokens: 500000
173 unit: day
174
175# Global behavior overrides
176overrides:
177 # Threshold for routing a request to a prompt_target (0.0–1.0). Lower = more permissive.
178 prompt_target_intent_matching_threshold: 0.7
179 # Trim conversation history to fit within the model's context window
180 optimize_context_window: true
181 # Use Plano's agent orchestrator for multi-agent request routing
182 use_agent_orchestrator: true
183 # Connect timeout for upstream provider clusters (e.g., "5s", "10s"). Default: "5s"
184 upstream_connect_timeout: 10s
185 # Path to the trusted CA bundle for upstream TLS verification
186 upstream_tls_ca_path: /etc/ssl/certs/ca-certificates.crt
187 # Model used for intent-based LLM routing (must be listed in model_providers)
188 llm_routing_model: Arch-Router
189 # Model used for agent orchestration (must be listed in model_providers)
190 agent_orchestration_model: Plano-Orchestrator
191
192# State storage for multi-turn conversation history
193state_storage:
194 type: memory # "memory" (in-process) or "postgres" (persistent)
195 # connection_string is required when type is postgres.
196 # Supports environment variable substitution: $VAR or ${VAR}
197 # connection_string: postgresql://user:$DB_PASS@localhost:5432/plano
198
199# Input guardrails applied globally to all incoming requests
200prompt_guards:
201 input_guards:
202 jailbreak:
203 on_exception:
204 message: "I'm sorry, I can't help with that request."
205
206# OpenTelemetry tracing configuration
207tracing:
208 # Random sampling percentage (1-100)
209 random_sampling: 100
210 # Include internal Plano spans in traces
211 trace_arch_internal: false
212 # gRPC endpoint for OpenTelemetry collector (e.g., Jaeger, Tempo)
213 opentracing_grpc_endpoint: http://localhost:4317
214 span_attributes:
215 # Propagate request headers whose names start with these prefixes as span attributes
216 header_prefixes:
217 - x-user-
218 - x-org-
219 # Static key/value pairs added to every span
220 static:
221 environment: production
222 service.team: platform