plano/config/plano_config_schema.yaml

$schema: 'http://json-schema.org/draft-07/schema#'
type: object
properties:
  version:
    type: string
    enum:
      - v0.1
      - v0.1.0
      - 0.1-beta
      - 0.2.0
      - v0.3.0
      - v0.4.0

  agents:
    type: array
    items:
      type: object
      properties:
        id:
          type: string
        url:
          type: string
      additionalProperties: false
      required:
        - id
        - url
  filters:
    type: array
    items:
      type: object
      properties:
        id:
          type: string
        url:
          type: string
        type:
          type: string
          enum:
            - mcp
            - http
        transport:
          type: string
          enum:
            - streamable-http
        tool:
          type: string
      additionalProperties: false
      required:
        - id
        - url
  listeners:
    oneOf:
      - type: array
        additionalProperties: false
        items:
          type: object
          properties:
            name:
              type: string
            port:
              type: integer
            address:
              type: string
            timeout:
              type: string
            router:
              type: string
              enum:
                - plano_orchestrator_v1
            max_retries:
              type: integer
            type:
              type: string
              enum:
                - model
                - prompt
                - agent
            agents:
              type: array
              items:
                type: object
                properties:
                  id:
                    type: string
                  description:
                    type: string
                  default:
                    type: boolean
                  input_filters:
                    type: array
                    items:
                      type: string
                additionalProperties: false
                required:
                  - id
                  - description
            input_filters:
              type: array
              items:
                type: string
            output_filters:
              type: array
              items:
                type: string
          additionalProperties: false
          required:
            - type
            - name
      - type: object # deprecated legacy format, use list format instead
        additionalProperties: false
        properties:
          ingress_traffic:
            type: object
            properties:
              address:
                type: string
              port:
                type: integer
              message_format:
                type: string
                enum:
                  - openai
              timeout:
                type: string
            additionalProperties: false
          egress_traffic:
            type: object
            properties:
              address:
                type: string
              port:
                type: integer
              message_format:
                type: string
                enum:
                  - openai
              timeout:
                type: string
            additionalProperties: false
  endpoints:
    type: object
    patternProperties:
      '^[a-zA-Z][a-zA-Z0-9_]*$':
        type: object
        properties:
          endpoint:
            type: string
            pattern: '^.*$'
          connect_timeout:
            type: string
          protocol:
            type: string
            enum:
              - http
              - https
          http_host:
            type: string
        additionalProperties: false
        required:
          - endpoint

  model_providers:
    type: array
    items:
      type: object
      properties:
        name:
          type: string
        access_key:
          type: string
        model:
          type: string
        default:
          type: boolean
        base_url:
          type: string
        passthrough_auth:
          type: boolean
          description: "When true, forwards the client's Authorization header to upstream instead of using the configured access_key. Useful for routing to services like LiteLLM that validate their own virtual keys."
        http_host:
          type: string
        provider_interface:
          type: string
          enum:
            - plano
            - claude
            - deepseek
            - groq
            - mistral
            - openai
            - xiaomi
            - gemini
            - chatgpt
            - digitalocean
            - vercel
            - openrouter
        headers:
          type: object
          additionalProperties:
            type: string
          description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
        routing_preferences:
          type: array
          description: "[DEPRECATED] Inline routing_preferences under a model_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
          items:
            type: object
            properties:
              name:
                type: string
              description:
                type: string
          additionalProperties: false
          required:
            - name
            - description
        retry_policy:
          type: object
          description: "Retry policy configuration. When not specified, no retry logic is enabled."
          properties:
            fallback_models:
              type: array
              description: "Ordered list of model identifiers to fallback to before using Provider_List."
              items:
                type: string
            default_strategy:
              type: string
              description: "Default retry strategy for unconfigured status codes. Default: different_provider."
              enum:
                - same_model
                - same_provider
                - different_provider
            default_max_attempts:
              type: integer
              description: "Default max retry attempts for unconfigured status codes. Default: 2."
              minimum: 0
            on_status_codes:
              type: array
              description: "Per-status-code retry configuration."
              items:
                type: object
                properties:
                  codes:
                    type: array
                    description: "List of status codes as integers or range strings (e.g. '502-504')."
                    items:
                      anyOf:
                        - type: integer
                          minimum: 100
                          maximum: 599
                        - type: string
                          description: "Range string in 'start-end' format (e.g. '502-504')."
                  strategy:
                    type: string
                    description: "Retry strategy for these status codes."
                    enum:
                      - same_model
                      - same_provider
                      - different_provider
                  max_attempts:
                    type: integer
                    description: "Max retry attempts for these status codes."
                    minimum: 0
                additionalProperties: false
                required:
                  - codes
                  - strategy
                  - max_attempts
            on_timeout:
              type: object
              description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts."
              properties:
                strategy:
                  type: string
                  description: "Retry strategy for timeout errors."
                  enum:
                    - same_model
                    - same_provider
                    - different_provider
                max_attempts:
                  type: integer
                  description: "Max retry attempts for timeout errors."
                  minimum: 1
              additionalProperties: false
              required:
                - strategy
                - max_attempts
            on_high_latency:
              type: object
              description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed."
              properties:
                threshold_ms:
                  type: integer
                  description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered."
                  minimum: 1
                measure:
                  type: string
                  description: "What latency metric to measure. Default: ttfb."
                  enum:
                    - ttfb
                    - total
                strategy:
                  type: string
                  description: "Retry strategy when latency threshold is exceeded."
                  enum:
                    - same_model
                    - same_provider
                    - different_provider
                max_attempts:
                  type: integer
                  description: "Max retry attempts when latency threshold is exceeded."
                  minimum: 1
                block_duration_seconds:
                  type: integer
                  description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300."
                  minimum: 1
                scope:
                  type: string
                  description: "What to block: model-level or provider-level. Default: model."
                  enum:
                    - model
                    - provider
                apply_to:
                  type: string
                  description: "Blocking scope: global or request-scoped. Default: global."
                  enum:
                    - global
                    - request
                min_triggers:
                  type: integer
                  description: "Number of High_Latency_Events required before creating a block. Default: 1."
                  minimum: 1
                trigger_window_seconds:
                  type: integer
                  description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1."
                  minimum: 1
              additionalProperties: false
              required:
                - threshold_ms
                - strategy
                - max_attempts
                - block_duration_seconds
            backoff:
              type: object
              description: "Exponential backoff configuration. When omitted, no backoff delays are applied."
              properties:
                apply_to:
                  type: string
                  description: "REQUIRED. Determines when backoff delays are applied."
                  enum:
                    - same_model
                    - same_provider
                    - global
                base_ms:
                  type: integer
                  description: "Base delay in milliseconds for exponential backoff. Default: 100."
                  minimum: 1
                max_ms:
                  type: integer
                  description: "Maximum delay in milliseconds for exponential backoff. Default: 5000."
                  minimum: 1
                jitter:
                  type: boolean
                  description: "Add random jitter to prevent thundering herd. Default: true."
              additionalProperties: false
              required:
                - apply_to
            retry_after_handling:
              type: object
              description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)."
              properties:
                scope:
                  type: string
                  description: "What to block: model-level or provider-level. Default: model."
                  enum:
                    - model
                    - provider
                apply_to:
                  type: string
                  description: "Blocking scope: request-scoped or global. Default: global."
                  enum:
                    - request
                    - global
                max_retry_after_seconds:
                  type: integer
                  description: "Maximum Retry-After value honored in seconds. Default: 300."
                  minimum: 1
              additionalProperties: false
            max_retry_duration_ms:
              type: integer
              description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry."
              minimum: 0
          additionalProperties: false
      additionalProperties: false
      required:
        - model

  llm_providers: # deprecated for legacy support, use model_providers instead
    type: array
    items:
      type: object
      properties:
        name:
          type: string
        access_key:
          type: string
        model:
          type: string
        default:
          type: boolean
        base_url:
          type: string
        passthrough_auth:
          type: boolean
          description: "When true, forwards the client's Authorization header to upstream instead of using the configured access_key. Useful for routing to services like LiteLLM that validate their own virtual keys."
        http_host:
          type: string
        provider_interface:
          type: string
          enum:
            - plano
            - claude
            - deepseek
            - groq
            - mistral
            - openai
            - xiaomi
            - gemini
            - chatgpt
            - digitalocean
            - vercel
            - openrouter
        headers:
          type: object
          additionalProperties:
            type: string
          description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
        routing_preferences:
          type: array
          description: "[DEPRECATED] Inline routing_preferences under an llm_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
          items:
            type: object
            properties:
              name:
                type: string
              description:
                type: string
          additionalProperties: false
          required:
            - name
            - description
        retry_policy:
          type: object
          description: "Retry policy configuration. When not specified, no retry logic is enabled."
          properties:
            fallback_models:
              type: array
              description: "Ordered list of model identifiers to fallback to before using Provider_List."
              items:
                type: string
            default_strategy:
              type: string
              description: "Default retry strategy for unconfigured status codes. Default: different_provider."
              enum:
                - same_model
                - same_provider
                - different_provider
            default_max_attempts:
              type: integer
              description: "Default max retry attempts for unconfigured status codes. Default: 2."
              minimum: 0
            on_status_codes:
              type: array
              description: "Per-status-code retry configuration."
              items:
                type: object
                properties:
                  codes:
                    type: array
                    description: "List of status codes as integers or range strings (e.g. '502-504')."
                    items:
                      anyOf:
                        - type: integer
                          minimum: 100
                          maximum: 599
                        - type: string
                          description: "Range string in 'start-end' format (e.g. '502-504')."
                  strategy:
                    type: string
                    description: "Retry strategy for these status codes."
                    enum:
                      - same_model
                      - same_provider
                      - different_provider
                  max_attempts:
                    type: integer
                    description: "Max retry attempts for these status codes."
                    minimum: 0
                additionalProperties: false
                required:
                  - codes
                  - strategy
                  - max_attempts
            on_timeout:
              type: object
              description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts."
              properties:
                strategy:
                  type: string
                  description: "Retry strategy for timeout errors."
                  enum:
                    - same_model
                    - same_provider
                    - different_provider
                max_attempts:
                  type: integer
                  description: "Max retry attempts for timeout errors."
                  minimum: 1
              additionalProperties: false
              required:
                - strategy
                - max_attempts
            on_high_latency:
              type: object
              description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed."
              properties:
                threshold_ms:
                  type: integer
                  description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered."
                  minimum: 1
                measure:
                  type: string
                  description: "What latency metric to measure. Default: ttfb."
                  enum:
                    - ttfb
                    - total
                strategy:
                  type: string
                  description: "Retry strategy when latency threshold is exceeded."
                  enum:
                    - same_model
                    - same_provider
                    - different_provider
                max_attempts:
                  type: integer
                  description: "Max retry attempts when latency threshold is exceeded."
                  minimum: 1
                block_duration_seconds:
                  type: integer
                  description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300."
                  minimum: 1
                scope:
                  type: string
                  description: "What to block: model-level or provider-level. Default: model."
                  enum:
                    - model
                    - provider
                apply_to:
                  type: string
                  description: "Blocking scope: global or request-scoped. Default: global."
                  enum:
                    - global
                    - request
                min_triggers:
                  type: integer
                  description: "Number of High_Latency_Events required before creating a block. Default: 1."
                  minimum: 1
                trigger_window_seconds:
                  type: integer
                  description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1."
                  minimum: 1
              additionalProperties: false
              required:
                - threshold_ms
                - strategy
                - max_attempts
                - block_duration_seconds
            backoff:
              type: object
              description: "Exponential backoff configuration. When omitted, no backoff delays are applied."
              properties:
                apply_to:
                  type: string
                  description: "REQUIRED. Determines when backoff delays are applied."
                  enum:
                    - same_model
                    - same_provider
                    - global
                base_ms:
                  type: integer
                  description: "Base delay in milliseconds for exponential backoff. Default: 100."
                  minimum: 1
                max_ms:
                  type: integer
                  description: "Maximum delay in milliseconds for exponential backoff. Default: 5000."
                  minimum: 1
                jitter:
                  type: boolean
                  description: "Add random jitter to prevent thundering herd. Default: true."
              additionalProperties: false
              required:
                - apply_to
            retry_after_handling:
              type: object
              description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)."
              properties:
                scope:
                  type: string
                  description: "What to block: model-level or provider-level. Default: model."
                  enum:
                    - model
                    - provider
                apply_to:
                  type: string
                  description: "Blocking scope: request-scoped or global. Default: global."
                  enum:
                    - request
                    - global
                max_retry_after_seconds:
                  type: integer
                  description: "Maximum Retry-After value honored in seconds. Default: 300."
                  minimum: 1
              additionalProperties: false
            max_retry_duration_ms:
              type: integer
              description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry."
              minimum: 0
          additionalProperties: false
      additionalProperties: false
      required:
        - model

  model_aliases:
    type: object
    patternProperties:
      '^.*$':
        type: object
        properties:
          target:
            type: string
        additionalProperties: false
        required:
          - target

  overrides:
    type: object
    properties:
      prompt_target_intent_matching_threshold:
        type: number
      optimize_context_window:
        type: boolean
      use_agent_orchestrator:
        type: boolean
      disable_signals:
        type: boolean
        description: "Disable agentic signal analysis (frustration, repetition, escalation, etc.) on LLM responses to save CPU. Default false."
      upstream_connect_timeout:
        type: string
        description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'."
      upstream_tls_ca_path:
        type: string
        description: "Path to the trusted CA bundle for upstream TLS verification. Default is '/etc/ssl/certs/ca-certificates.crt'."
      llm_routing_model:
        type: string
        description: "Model name for the LLM router (e.g., 'Plano-Orchestrator'). Must match a model in model_providers."
      agent_orchestration_model:
        type: string
        description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers."
      orchestrator_model_context_length:
        type: integer
        description: "Maximum token length for the orchestrator/routing model context window. Default is 8192."
  system_prompt:
    type: string
  prompt_targets:
    type: array
    items:
      type: object
      properties:
        name:
          type: string
        default:
          type: boolean
        description:
          type: string
        auto_llm_dispatch_on_response:
          type: boolean
        parameters:
          type: array
          items:
            type: object
            properties:
              name:
                type: string
              additionalProperties: false
              required:
                type: boolean
              default:
                anyOf:
                  - type: string
                  - type: integer
                  - type: boolean
              description:
                type: string
              type:
                type: string
              enum:
                type: array
                items:
                  anyOf:
                    - type: string
                    - type: integer
                    - type: boolean
              in_path:
                type: boolean
              format:
                type: string
            additionalProperties: false
            required:
              - name
              - description
              - type
        endpoint:
          type: object
          properties:
            name:
              type: string
            path:
              type: string
            http_method:
              type: string
              enum:
                - GET
                - POST
            http_headers:
              type: object
              additionalProperties:
                type: string
          additionalProperties: false
          required:
            - name
            - path
        system_prompt:
          type: string
      additionalProperties: false
      required:
        - name
        - description
  ratelimits:
    type: array
    items:
      type: object
      properties:
        model:
          type: string
        selector:
          type: object
          properties:
            key:
              type: string
            value:
              type: string
          additionalProperties: false
          required:
            - key
            - value
        limit:
          type: object
          properties:
            tokens:
              type: integer
            unit:
              type: string
          additionalProperties: false
          required:
            - tokens
            - unit
      additionalProperties: false
      required:
        - model
        - selector
        - limit
  tracing:
    type: object
    properties:
      random_sampling:
        type: integer
      trace_arch_internal:
        type: boolean
      opentracing_grpc_endpoint:
        type: string
      span_attributes:
        type: object
        properties:
          header_prefixes:
            type: array
            items:
              type: string
          static:
            type: object
            additionalProperties:
              type: string
        additionalProperties: false
    additionalProperties: false
  mode:
    type: string
    enum:
      - llm
      - prompt
  routing:
    type: object
    properties:
      llm_provider:
        type: string
      model:
        type: string
      session_ttl_seconds:
        type: integer
        minimum: 1
        description: TTL in seconds for session-pinned routing cache entries. Default 600 (10 minutes).
      session_max_entries:
        type: integer
        minimum: 1
        maximum: 10000
        description: Maximum number of session-pinned routing cache entries. Default 10000.
      session_cache:
        type: object
        properties:
          type:
            type: string
            enum:
              - memory
              - redis
            default: memory
            description: Session cache backend. "memory" (default) is in-process; "redis" is shared across replicas.
          url:
            type: string
            description: Redis URL, e.g. redis://localhost:6379. Required when type is redis.
          tenant_header:
            type: string
            description: >
              Optional HTTP header name whose value is used as a tenant prefix in the cache key.
              When set, keys are scoped as plano:affinity:{tenant_id}:{session_id}.
        additionalProperties: false
    additionalProperties: false
  state_storage:
    type: object
    properties:
      type:
        type: string
        enum:
          - memory
          - postgres
      connection_string:
        type: string
        description: Required when type is postgres. Supports environment variable substitution using $VAR or ${VAR} syntax.
    additionalProperties: false
    required:
      - type
    # Note: connection_string is conditionally required based on type
    # If type is 'postgres', connection_string must be provided
    # If type is 'memory', connection_string is not needed
    allOf:
      - if:
          properties:
            type:
              const: postgres
        then:
          required:
            - connection_string
  prompt_guards:
    type: object
    properties:
      input_guards:
        type: object
        properties:
          jailbreak:
            type: object
            properties:
              on_exception:
                type: object
                properties:
                  message:
                    type: string
                additionalProperties: false
                required:
                  - message
            additionalProperties: false
            required:
              - on_exception
        additionalProperties: false
        required:
          - jailbreak
  routing_preferences:
    type: array
    items:
      type: object
      properties:
        name:
          type: string
        description:
          type: string
        models:
          type: array
          items:
            type: string
          minItems: 1
        selection_policy:
          type: object
          properties:
            prefer:
              type: string
              enum:
                - cheapest
                - fastest
                - none
          additionalProperties: false
          required:
            - prefer
      additionalProperties: false
      required:
        - name
        - description
        - models

  model_metrics_sources:
    type: array
    items:
      oneOf:
        - type: object
          properties:
            type:
              type: string
              const: cost
            provider:
              type: string
              enum:
                - digitalocean
            refresh_interval:
              type: integer
              minimum: 1
              description: "Refresh interval in seconds"
            model_aliases:
              type: object
              description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
              additionalProperties:
                type: string
          required:
            - type
            - provider
          additionalProperties: false
        - type: object
          properties:
            type:
              type: string
              const: latency
            provider:
              type: string
              enum:
                - prometheus
            url:
              type: string
            query:
              type: string
            refresh_interval:
              type: integer
              minimum: 1
              description: "Refresh interval in seconds"
          required:
            - type
            - provider
            - url
            - query
          additionalProperties: false

additionalProperties: false
required:
  - version
  - listeners