plano/config/plano_config_schema.yaml
Troy Mitchell 18dbbd25f7 config: add retry_policy to plano_config_schema.yaml
Add JSON schema definitions for retry policy configuration including
RetryPolicy, BackoffConfig, RetryAfterConfig, HighLatencyConfig,
LatencyTriggerConfig, RetryStrategy, StatusCodeEntry, and all
associated enums.

Signed-off-by: Troy Mitchell <i@troy-y.org>
2026-04-28 15:26:14 +08:00

977 lines
31 KiB
YAML

$schema: 'http://json-schema.org/draft-07/schema#'
type: object
properties:
version:
type: string
enum:
- v0.1
- v0.1.0
- 0.1-beta
- 0.2.0
- v0.3.0
- v0.4.0
agents:
type: array
items:
type: object
properties:
id:
type: string
url:
type: string
additionalProperties: false
required:
- id
- url
filters:
type: array
items:
type: object
properties:
id:
type: string
url:
type: string
type:
type: string
enum:
- mcp
- http
transport:
type: string
enum:
- streamable-http
tool:
type: string
additionalProperties: false
required:
- id
- url
listeners:
oneOf:
- type: array
additionalProperties: false
items:
type: object
properties:
name:
type: string
port:
type: integer
address:
type: string
timeout:
type: string
router:
type: string
enum:
- plano_orchestrator_v1
max_retries:
type: integer
type:
type: string
enum:
- model
- prompt
- agent
agents:
type: array
items:
type: object
properties:
id:
type: string
description:
type: string
default:
type: boolean
input_filters:
type: array
items:
type: string
additionalProperties: false
required:
- id
- description
input_filters:
type: array
items:
type: string
output_filters:
type: array
items:
type: string
additionalProperties: false
required:
- type
- name
- type: object # deprecated legacy format, use list format instead
additionalProperties: false
properties:
ingress_traffic:
type: object
properties:
address:
type: string
port:
type: integer
message_format:
type: string
enum:
- openai
timeout:
type: string
additionalProperties: false
egress_traffic:
type: object
properties:
address:
type: string
port:
type: integer
message_format:
type: string
enum:
- openai
timeout:
type: string
additionalProperties: false
endpoints:
type: object
patternProperties:
'^[a-zA-Z][a-zA-Z0-9_]*$':
type: object
properties:
endpoint:
type: string
pattern: '^.*$'
connect_timeout:
type: string
protocol:
type: string
enum:
- http
- https
http_host:
type: string
additionalProperties: false
required:
- endpoint
model_providers:
type: array
items:
type: object
properties:
name:
type: string
access_key:
type: string
model:
type: string
default:
type: boolean
base_url:
type: string
passthrough_auth:
type: boolean
description: "When true, forwards the client's Authorization header to upstream instead of using the configured access_key. Useful for routing to services like LiteLLM that validate their own virtual keys."
http_host:
type: string
provider_interface:
type: string
enum:
- plano
- claude
- deepseek
- groq
- mistral
- openai
- xiaomi
- gemini
- chatgpt
- digitalocean
- vercel
- openrouter
headers:
type: object
additionalProperties:
type: string
description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
routing_preferences:
type: array
description: "[DEPRECATED] Inline routing_preferences under a model_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
items:
type: object
properties:
name:
type: string
description:
type: string
additionalProperties: false
required:
- name
- description
retry_policy:
type: object
description: "Retry policy configuration. When not specified, no retry logic is enabled."
properties:
fallback_models:
type: array
description: "Ordered list of model identifiers to fallback to before using Provider_List."
items:
type: string
default_strategy:
type: string
description: "Default retry strategy for unconfigured status codes. Default: different_provider."
enum:
- same_model
- same_provider
- different_provider
default_max_attempts:
type: integer
description: "Default max retry attempts for unconfigured status codes. Default: 2."
minimum: 0
on_status_codes:
type: array
description: "Per-status-code retry configuration."
items:
type: object
properties:
codes:
type: array
description: "List of status codes as integers or range strings (e.g. '502-504')."
items:
anyOf:
- type: integer
minimum: 100
maximum: 599
- type: string
description: "Range string in 'start-end' format (e.g. '502-504')."
strategy:
type: string
description: "Retry strategy for these status codes."
enum:
- same_model
- same_provider
- different_provider
max_attempts:
type: integer
description: "Max retry attempts for these status codes."
minimum: 0
additionalProperties: false
required:
- codes
- strategy
- max_attempts
on_timeout:
type: object
description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts."
properties:
strategy:
type: string
description: "Retry strategy for timeout errors."
enum:
- same_model
- same_provider
- different_provider
max_attempts:
type: integer
description: "Max retry attempts for timeout errors."
minimum: 1
additionalProperties: false
required:
- strategy
- max_attempts
on_high_latency:
type: object
description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed."
properties:
threshold_ms:
type: integer
description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered."
minimum: 1
measure:
type: string
description: "What latency metric to measure. Default: ttfb."
enum:
- ttfb
- total
strategy:
type: string
description: "Retry strategy when latency threshold is exceeded."
enum:
- same_model
- same_provider
- different_provider
max_attempts:
type: integer
description: "Max retry attempts when latency threshold is exceeded."
minimum: 1
block_duration_seconds:
type: integer
description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300."
minimum: 1
scope:
type: string
description: "What to block: model-level or provider-level. Default: model."
enum:
- model
- provider
apply_to:
type: string
description: "Blocking scope: global or request-scoped. Default: global."
enum:
- global
- request
min_triggers:
type: integer
description: "Number of High_Latency_Events required before creating a block. Default: 1."
minimum: 1
trigger_window_seconds:
type: integer
description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1."
minimum: 1
additionalProperties: false
required:
- threshold_ms
- strategy
- max_attempts
- block_duration_seconds
backoff:
type: object
description: "Exponential backoff configuration. When omitted, no backoff delays are applied."
properties:
apply_to:
type: string
description: "REQUIRED. Determines when backoff delays are applied."
enum:
- same_model
- same_provider
- global
base_ms:
type: integer
description: "Base delay in milliseconds for exponential backoff. Default: 100."
minimum: 1
max_ms:
type: integer
description: "Maximum delay in milliseconds for exponential backoff. Default: 5000."
minimum: 1
jitter:
type: boolean
description: "Add random jitter to prevent thundering herd. Default: true."
additionalProperties: false
required:
- apply_to
retry_after_handling:
type: object
description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)."
properties:
scope:
type: string
description: "What to block: model-level or provider-level. Default: model."
enum:
- model
- provider
apply_to:
type: string
description: "Blocking scope: request-scoped or global. Default: global."
enum:
- request
- global
max_retry_after_seconds:
type: integer
description: "Maximum Retry-After value honored in seconds. Default: 300."
minimum: 1
additionalProperties: false
max_retry_duration_ms:
type: integer
description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry."
minimum: 0
additionalProperties: false
additionalProperties: false
required:
- model
llm_providers: # deprecated for legacy support, use model_providers instead
type: array
items:
type: object
properties:
name:
type: string
access_key:
type: string
model:
type: string
default:
type: boolean
base_url:
type: string
passthrough_auth:
type: boolean
description: "When true, forwards the client's Authorization header to upstream instead of using the configured access_key. Useful for routing to services like LiteLLM that validate their own virtual keys."
http_host:
type: string
provider_interface:
type: string
enum:
- plano
- claude
- deepseek
- groq
- mistral
- openai
- xiaomi
- gemini
- chatgpt
- digitalocean
- vercel
- openrouter
headers:
type: object
additionalProperties:
type: string
description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
routing_preferences:
type: array
description: "[DEPRECATED] Inline routing_preferences under an llm_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
items:
type: object
properties:
name:
type: string
description:
type: string
additionalProperties: false
required:
- name
- description
retry_policy:
type: object
description: "Retry policy configuration. When not specified, no retry logic is enabled."
properties:
fallback_models:
type: array
description: "Ordered list of model identifiers to fallback to before using Provider_List."
items:
type: string
default_strategy:
type: string
description: "Default retry strategy for unconfigured status codes. Default: different_provider."
enum:
- same_model
- same_provider
- different_provider
default_max_attempts:
type: integer
description: "Default max retry attempts for unconfigured status codes. Default: 2."
minimum: 0
on_status_codes:
type: array
description: "Per-status-code retry configuration."
items:
type: object
properties:
codes:
type: array
description: "List of status codes as integers or range strings (e.g. '502-504')."
items:
anyOf:
- type: integer
minimum: 100
maximum: 599
- type: string
description: "Range string in 'start-end' format (e.g. '502-504')."
strategy:
type: string
description: "Retry strategy for these status codes."
enum:
- same_model
- same_provider
- different_provider
max_attempts:
type: integer
description: "Max retry attempts for these status codes."
minimum: 0
additionalProperties: false
required:
- codes
- strategy
- max_attempts
on_timeout:
type: object
description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts."
properties:
strategy:
type: string
description: "Retry strategy for timeout errors."
enum:
- same_model
- same_provider
- different_provider
max_attempts:
type: integer
description: "Max retry attempts for timeout errors."
minimum: 1
additionalProperties: false
required:
- strategy
- max_attempts
on_high_latency:
type: object
description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed."
properties:
threshold_ms:
type: integer
description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered."
minimum: 1
measure:
type: string
description: "What latency metric to measure. Default: ttfb."
enum:
- ttfb
- total
strategy:
type: string
description: "Retry strategy when latency threshold is exceeded."
enum:
- same_model
- same_provider
- different_provider
max_attempts:
type: integer
description: "Max retry attempts when latency threshold is exceeded."
minimum: 1
block_duration_seconds:
type: integer
description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300."
minimum: 1
scope:
type: string
description: "What to block: model-level or provider-level. Default: model."
enum:
- model
- provider
apply_to:
type: string
description: "Blocking scope: global or request-scoped. Default: global."
enum:
- global
- request
min_triggers:
type: integer
description: "Number of High_Latency_Events required before creating a block. Default: 1."
minimum: 1
trigger_window_seconds:
type: integer
description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1."
minimum: 1
additionalProperties: false
required:
- threshold_ms
- strategy
- max_attempts
- block_duration_seconds
backoff:
type: object
description: "Exponential backoff configuration. When omitted, no backoff delays are applied."
properties:
apply_to:
type: string
description: "REQUIRED. Determines when backoff delays are applied."
enum:
- same_model
- same_provider
- global
base_ms:
type: integer
description: "Base delay in milliseconds for exponential backoff. Default: 100."
minimum: 1
max_ms:
type: integer
description: "Maximum delay in milliseconds for exponential backoff. Default: 5000."
minimum: 1
jitter:
type: boolean
description: "Add random jitter to prevent thundering herd. Default: true."
additionalProperties: false
required:
- apply_to
retry_after_handling:
type: object
description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)."
properties:
scope:
type: string
description: "What to block: model-level or provider-level. Default: model."
enum:
- model
- provider
apply_to:
type: string
description: "Blocking scope: request-scoped or global. Default: global."
enum:
- request
- global
max_retry_after_seconds:
type: integer
description: "Maximum Retry-After value honored in seconds. Default: 300."
minimum: 1
additionalProperties: false
max_retry_duration_ms:
type: integer
description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry."
minimum: 0
additionalProperties: false
additionalProperties: false
required:
- model
model_aliases:
type: object
patternProperties:
'^.*$':
type: object
properties:
target:
type: string
additionalProperties: false
required:
- target
overrides:
type: object
properties:
prompt_target_intent_matching_threshold:
type: number
optimize_context_window:
type: boolean
use_agent_orchestrator:
type: boolean
disable_signals:
type: boolean
description: "Disable agentic signal analysis (frustration, repetition, escalation, etc.) on LLM responses to save CPU. Default false."
upstream_connect_timeout:
type: string
description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'."
upstream_tls_ca_path:
type: string
description: "Path to the trusted CA bundle for upstream TLS verification. Default is '/etc/ssl/certs/ca-certificates.crt'."
llm_routing_model:
type: string
description: "Model name for the LLM router (e.g., 'Plano-Orchestrator'). Must match a model in model_providers."
agent_orchestration_model:
type: string
description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers."
orchestrator_model_context_length:
type: integer
description: "Maximum token length for the orchestrator/routing model context window. Default is 8192."
system_prompt:
type: string
prompt_targets:
type: array
items:
type: object
properties:
name:
type: string
default:
type: boolean
description:
type: string
auto_llm_dispatch_on_response:
type: boolean
parameters:
type: array
items:
type: object
properties:
name:
type: string
additionalProperties: false
required:
type: boolean
default:
anyOf:
- type: string
- type: integer
- type: boolean
description:
type: string
type:
type: string
enum:
type: array
items:
anyOf:
- type: string
- type: integer
- type: boolean
in_path:
type: boolean
format:
type: string
additionalProperties: false
required:
- name
- description
- type
endpoint:
type: object
properties:
name:
type: string
path:
type: string
http_method:
type: string
enum:
- GET
- POST
http_headers:
type: object
additionalProperties:
type: string
additionalProperties: false
required:
- name
- path
system_prompt:
type: string
additionalProperties: false
required:
- name
- description
ratelimits:
type: array
items:
type: object
properties:
model:
type: string
selector:
type: object
properties:
key:
type: string
value:
type: string
additionalProperties: false
required:
- key
- value
limit:
type: object
properties:
tokens:
type: integer
unit:
type: string
additionalProperties: false
required:
- tokens
- unit
additionalProperties: false
required:
- model
- selector
- limit
tracing:
type: object
properties:
random_sampling:
type: integer
trace_arch_internal:
type: boolean
opentracing_grpc_endpoint:
type: string
span_attributes:
type: object
properties:
header_prefixes:
type: array
items:
type: string
static:
type: object
additionalProperties:
type: string
additionalProperties: false
additionalProperties: false
mode:
type: string
enum:
- llm
- prompt
routing:
type: object
properties:
llm_provider:
type: string
model:
type: string
session_ttl_seconds:
type: integer
minimum: 1
description: TTL in seconds for session-pinned routing cache entries. Default 600 (10 minutes).
session_max_entries:
type: integer
minimum: 1
maximum: 10000
description: Maximum number of session-pinned routing cache entries. Default 10000.
session_cache:
type: object
properties:
type:
type: string
enum:
- memory
- redis
default: memory
description: Session cache backend. "memory" (default) is in-process; "redis" is shared across replicas.
url:
type: string
description: Redis URL, e.g. redis://localhost:6379. Required when type is redis.
tenant_header:
type: string
description: >
Optional HTTP header name whose value is used as a tenant prefix in the cache key.
When set, keys are scoped as plano:affinity:{tenant_id}:{session_id}.
additionalProperties: false
additionalProperties: false
state_storage:
type: object
properties:
type:
type: string
enum:
- memory
- postgres
connection_string:
type: string
description: Required when type is postgres. Supports environment variable substitution using $VAR or ${VAR} syntax.
additionalProperties: false
required:
- type
# Note: connection_string is conditionally required based on type
# If type is 'postgres', connection_string must be provided
# If type is 'memory', connection_string is not needed
allOf:
- if:
properties:
type:
const: postgres
then:
required:
- connection_string
prompt_guards:
type: object
properties:
input_guards:
type: object
properties:
jailbreak:
type: object
properties:
on_exception:
type: object
properties:
message:
type: string
additionalProperties: false
required:
- message
additionalProperties: false
required:
- on_exception
additionalProperties: false
required:
- jailbreak
routing_preferences:
type: array
items:
type: object
properties:
name:
type: string
description:
type: string
models:
type: array
items:
type: string
minItems: 1
selection_policy:
type: object
properties:
prefer:
type: string
enum:
- cheapest
- fastest
- none
additionalProperties: false
required:
- prefer
additionalProperties: false
required:
- name
- description
- models
model_metrics_sources:
type: array
items:
oneOf:
- type: object
properties:
type:
type: string
const: cost
provider:
type: string
enum:
- digitalocean
refresh_interval:
type: integer
minimum: 1
description: "Refresh interval in seconds"
model_aliases:
type: object
description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
additionalProperties:
type: string
required:
- type
- provider
additionalProperties: false
- type: object
properties:
type:
type: string
const: latency
provider:
type: string
enum:
- prometheus
url:
type: string
query:
type: string
refresh_interval:
type: integer
minimum: 1
description: "Refresh interval in seconds"
required:
- type
- provider
- url
- query
additionalProperties: false
additionalProperties: false
required:
- version
- listeners