$schema: 'http://json-schema.org/draft-07/schema#' type: object properties: version: type: string enum: - v0.1 - v0.1.0 - 0.1-beta - 0.2.0 - v0.3.0 - v0.4.0 agents: type: array items: type: object properties: id: type: string url: type: string additionalProperties: false required: - id - url filters: type: array items: type: object properties: id: type: string url: type: string type: type: string enum: - mcp - http transport: type: string enum: - streamable-http tool: type: string additionalProperties: false required: - id - url listeners: oneOf: - type: array additionalProperties: false items: type: object properties: name: type: string port: type: integer address: type: string timeout: type: string router: type: string enum: - plano_orchestrator_v1 max_retries: type: integer type: type: string enum: - model - prompt - agent agents: type: array items: type: object properties: id: type: string description: type: string default: type: boolean input_filters: type: array items: type: string additionalProperties: false required: - id - description input_filters: type: array items: type: string output_filters: type: array items: type: string additionalProperties: false required: - type - name - type: object # deprecated legacy format, use list format instead additionalProperties: false properties: ingress_traffic: type: object properties: address: type: string port: type: integer message_format: type: string enum: - openai timeout: type: string additionalProperties: false egress_traffic: type: object properties: address: type: string port: type: integer message_format: type: string enum: - openai timeout: type: string additionalProperties: false endpoints: type: object patternProperties: '^[a-zA-Z][a-zA-Z0-9_]*$': type: object properties: endpoint: type: string pattern: '^.*$' connect_timeout: type: string protocol: type: string enum: - http - https http_host: type: string additionalProperties: false required: - endpoint model_providers: type: array items: type: object properties: name: type: string access_key: type: string model: type: string default: type: boolean base_url: type: string passthrough_auth: type: boolean description: "When true, forwards the client's Authorization header to upstream instead of using the configured access_key. Useful for routing to services like LiteLLM that validate their own virtual keys." http_host: type: string provider_interface: type: string enum: - plano - claude - deepseek - groq - mistral - openai - xiaomi - gemini - chatgpt - digitalocean - vercel - openrouter headers: type: object additionalProperties: type: string description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)." routing_preferences: type: array description: "[DEPRECATED] Inline routing_preferences under a model_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md." items: type: object properties: name: type: string description: type: string additionalProperties: false required: - name - description retry_policy: type: object description: "Retry policy configuration. When not specified, no retry logic is enabled." properties: fallback_models: type: array description: "Ordered list of model identifiers to fallback to before using Provider_List." items: type: string default_strategy: type: string description: "Default retry strategy for unconfigured status codes. Default: different_provider." enum: - same_model - same_provider - different_provider default_max_attempts: type: integer description: "Default max retry attempts for unconfigured status codes. Default: 2." minimum: 0 on_status_codes: type: array description: "Per-status-code retry configuration." items: type: object properties: codes: type: array description: "List of status codes as integers or range strings (e.g. '502-504')." items: anyOf: - type: integer minimum: 100 maximum: 599 - type: string description: "Range string in 'start-end' format (e.g. '502-504')." strategy: type: string description: "Retry strategy for these status codes." enum: - same_model - same_provider - different_provider max_attempts: type: integer description: "Max retry attempts for these status codes." minimum: 0 additionalProperties: false required: - codes - strategy - max_attempts on_timeout: type: object description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts." properties: strategy: type: string description: "Retry strategy for timeout errors." enum: - same_model - same_provider - different_provider max_attempts: type: integer description: "Max retry attempts for timeout errors." minimum: 1 additionalProperties: false required: - strategy - max_attempts on_high_latency: type: object description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed." properties: threshold_ms: type: integer description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered." minimum: 1 measure: type: string description: "What latency metric to measure. Default: ttfb." enum: - ttfb - total strategy: type: string description: "Retry strategy when latency threshold is exceeded." enum: - same_model - same_provider - different_provider max_attempts: type: integer description: "Max retry attempts when latency threshold is exceeded." minimum: 1 block_duration_seconds: type: integer description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300." minimum: 1 scope: type: string description: "What to block: model-level or provider-level. Default: model." enum: - model - provider apply_to: type: string description: "Blocking scope: global or request-scoped. Default: global." enum: - global - request min_triggers: type: integer description: "Number of High_Latency_Events required before creating a block. Default: 1." minimum: 1 trigger_window_seconds: type: integer description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1." minimum: 1 additionalProperties: false required: - threshold_ms - strategy - max_attempts - block_duration_seconds backoff: type: object description: "Exponential backoff configuration. When omitted, no backoff delays are applied." properties: apply_to: type: string description: "REQUIRED. Determines when backoff delays are applied." enum: - same_model - same_provider - global base_ms: type: integer description: "Base delay in milliseconds for exponential backoff. Default: 100." minimum: 1 max_ms: type: integer description: "Maximum delay in milliseconds for exponential backoff. Default: 5000." minimum: 1 jitter: type: boolean description: "Add random jitter to prevent thundering herd. Default: true." additionalProperties: false required: - apply_to retry_after_handling: type: object description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)." properties: scope: type: string description: "What to block: model-level or provider-level. Default: model." enum: - model - provider apply_to: type: string description: "Blocking scope: request-scoped or global. Default: global." enum: - request - global max_retry_after_seconds: type: integer description: "Maximum Retry-After value honored in seconds. Default: 300." minimum: 1 additionalProperties: false max_retry_duration_ms: type: integer description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry." minimum: 0 additionalProperties: false additionalProperties: false required: - model llm_providers: # deprecated for legacy support, use model_providers instead type: array items: type: object properties: name: type: string access_key: type: string model: type: string default: type: boolean base_url: type: string passthrough_auth: type: boolean description: "When true, forwards the client's Authorization header to upstream instead of using the configured access_key. Useful for routing to services like LiteLLM that validate their own virtual keys." http_host: type: string provider_interface: type: string enum: - plano - claude - deepseek - groq - mistral - openai - xiaomi - gemini - chatgpt - digitalocean - vercel - openrouter headers: type: object additionalProperties: type: string description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)." routing_preferences: type: array description: "[DEPRECATED] Inline routing_preferences under an llm_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md." items: type: object properties: name: type: string description: type: string additionalProperties: false required: - name - description retry_policy: type: object description: "Retry policy configuration. When not specified, no retry logic is enabled." properties: fallback_models: type: array description: "Ordered list of model identifiers to fallback to before using Provider_List." items: type: string default_strategy: type: string description: "Default retry strategy for unconfigured status codes. Default: different_provider." enum: - same_model - same_provider - different_provider default_max_attempts: type: integer description: "Default max retry attempts for unconfigured status codes. Default: 2." minimum: 0 on_status_codes: type: array description: "Per-status-code retry configuration." items: type: object properties: codes: type: array description: "List of status codes as integers or range strings (e.g. '502-504')." items: anyOf: - type: integer minimum: 100 maximum: 599 - type: string description: "Range string in 'start-end' format (e.g. '502-504')." strategy: type: string description: "Retry strategy for these status codes." enum: - same_model - same_provider - different_provider max_attempts: type: integer description: "Max retry attempts for these status codes." minimum: 0 additionalProperties: false required: - codes - strategy - max_attempts on_timeout: type: object description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts." properties: strategy: type: string description: "Retry strategy for timeout errors." enum: - same_model - same_provider - different_provider max_attempts: type: integer description: "Max retry attempts for timeout errors." minimum: 1 additionalProperties: false required: - strategy - max_attempts on_high_latency: type: object description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed." properties: threshold_ms: type: integer description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered." minimum: 1 measure: type: string description: "What latency metric to measure. Default: ttfb." enum: - ttfb - total strategy: type: string description: "Retry strategy when latency threshold is exceeded." enum: - same_model - same_provider - different_provider max_attempts: type: integer description: "Max retry attempts when latency threshold is exceeded." minimum: 1 block_duration_seconds: type: integer description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300." minimum: 1 scope: type: string description: "What to block: model-level or provider-level. Default: model." enum: - model - provider apply_to: type: string description: "Blocking scope: global or request-scoped. Default: global." enum: - global - request min_triggers: type: integer description: "Number of High_Latency_Events required before creating a block. Default: 1." minimum: 1 trigger_window_seconds: type: integer description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1." minimum: 1 additionalProperties: false required: - threshold_ms - strategy - max_attempts - block_duration_seconds backoff: type: object description: "Exponential backoff configuration. When omitted, no backoff delays are applied." properties: apply_to: type: string description: "REQUIRED. Determines when backoff delays are applied." enum: - same_model - same_provider - global base_ms: type: integer description: "Base delay in milliseconds for exponential backoff. Default: 100." minimum: 1 max_ms: type: integer description: "Maximum delay in milliseconds for exponential backoff. Default: 5000." minimum: 1 jitter: type: boolean description: "Add random jitter to prevent thundering herd. Default: true." additionalProperties: false required: - apply_to retry_after_handling: type: object description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)." properties: scope: type: string description: "What to block: model-level or provider-level. Default: model." enum: - model - provider apply_to: type: string description: "Blocking scope: request-scoped or global. Default: global." enum: - request - global max_retry_after_seconds: type: integer description: "Maximum Retry-After value honored in seconds. Default: 300." minimum: 1 additionalProperties: false max_retry_duration_ms: type: integer description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry." minimum: 0 additionalProperties: false additionalProperties: false required: - model model_aliases: type: object patternProperties: '^.*$': type: object properties: target: type: string additionalProperties: false required: - target overrides: type: object properties: prompt_target_intent_matching_threshold: type: number optimize_context_window: type: boolean use_agent_orchestrator: type: boolean disable_signals: type: boolean description: "Disable agentic signal analysis (frustration, repetition, escalation, etc.) on LLM responses to save CPU. Default false." upstream_connect_timeout: type: string description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'." upstream_tls_ca_path: type: string description: "Path to the trusted CA bundle for upstream TLS verification. Default is '/etc/ssl/certs/ca-certificates.crt'." llm_routing_model: type: string description: "Model name for the LLM router (e.g., 'Plano-Orchestrator'). Must match a model in model_providers." agent_orchestration_model: type: string description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers." orchestrator_model_context_length: type: integer description: "Maximum token length for the orchestrator/routing model context window. Default is 8192." system_prompt: type: string prompt_targets: type: array items: type: object properties: name: type: string default: type: boolean description: type: string auto_llm_dispatch_on_response: type: boolean parameters: type: array items: type: object properties: name: type: string additionalProperties: false required: type: boolean default: anyOf: - type: string - type: integer - type: boolean description: type: string type: type: string enum: type: array items: anyOf: - type: string - type: integer - type: boolean in_path: type: boolean format: type: string additionalProperties: false required: - name - description - type endpoint: type: object properties: name: type: string path: type: string http_method: type: string enum: - GET - POST http_headers: type: object additionalProperties: type: string additionalProperties: false required: - name - path system_prompt: type: string additionalProperties: false required: - name - description ratelimits: type: array items: type: object properties: model: type: string selector: type: object properties: key: type: string value: type: string additionalProperties: false required: - key - value limit: type: object properties: tokens: type: integer unit: type: string additionalProperties: false required: - tokens - unit additionalProperties: false required: - model - selector - limit tracing: type: object properties: random_sampling: type: integer trace_arch_internal: type: boolean opentracing_grpc_endpoint: type: string span_attributes: type: object properties: header_prefixes: type: array items: type: string static: type: object additionalProperties: type: string additionalProperties: false additionalProperties: false mode: type: string enum: - llm - prompt routing: type: object properties: llm_provider: type: string model: type: string session_ttl_seconds: type: integer minimum: 1 description: TTL in seconds for session-pinned routing cache entries. Default 600 (10 minutes). session_max_entries: type: integer minimum: 1 maximum: 10000 description: Maximum number of session-pinned routing cache entries. Default 10000. session_cache: type: object properties: type: type: string enum: - memory - redis default: memory description: Session cache backend. "memory" (default) is in-process; "redis" is shared across replicas. url: type: string description: Redis URL, e.g. redis://localhost:6379. Required when type is redis. tenant_header: type: string description: > Optional HTTP header name whose value is used as a tenant prefix in the cache key. When set, keys are scoped as plano:affinity:{tenant_id}:{session_id}. additionalProperties: false additionalProperties: false state_storage: type: object properties: type: type: string enum: - memory - postgres connection_string: type: string description: Required when type is postgres. Supports environment variable substitution using $VAR or ${VAR} syntax. additionalProperties: false required: - type # Note: connection_string is conditionally required based on type # If type is 'postgres', connection_string must be provided # If type is 'memory', connection_string is not needed allOf: - if: properties: type: const: postgres then: required: - connection_string prompt_guards: type: object properties: input_guards: type: object properties: jailbreak: type: object properties: on_exception: type: object properties: message: type: string additionalProperties: false required: - message additionalProperties: false required: - on_exception additionalProperties: false required: - jailbreak routing_preferences: type: array items: type: object properties: name: type: string description: type: string models: type: array items: type: string minItems: 1 selection_policy: type: object properties: prefer: type: string enum: - cheapest - fastest - none additionalProperties: false required: - prefer additionalProperties: false required: - name - description - models model_metrics_sources: type: array items: oneOf: - type: object properties: type: type: string const: cost provider: type: string enum: - digitalocean refresh_interval: type: integer minimum: 1 description: "Refresh interval in seconds" model_aliases: type: object description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'" additionalProperties: type: string required: - type - provider additionalProperties: false - type: object properties: type: type: string const: latency provider: type: string enum: - prometheus url: type: string query: type: string refresh_interval: type: integer minimum: 1 description: "Refresh interval in seconds" required: - type - provider - url - query additionalProperties: false additionalProperties: false required: - version - listeners