mirror of
https://github.com/katanemo/plano.git
synced 2026-06-20 15:28:07 +02:00
Add JSON schema definitions for retry policy configuration including RetryPolicy, BackoffConfig, RetryAfterConfig, HighLatencyConfig, LatencyTriggerConfig, RetryStrategy, StatusCodeEntry, and all associated enums. Signed-off-by: Troy Mitchell <i@troy-y.org>
977 lines
31 KiB
YAML
977 lines
31 KiB
YAML
$schema: 'http://json-schema.org/draft-07/schema#'
|
|
type: object
|
|
properties:
|
|
version:
|
|
type: string
|
|
enum:
|
|
- v0.1
|
|
- v0.1.0
|
|
- 0.1-beta
|
|
- 0.2.0
|
|
- v0.3.0
|
|
- v0.4.0
|
|
|
|
agents:
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
url:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- id
|
|
- url
|
|
filters:
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
url:
|
|
type: string
|
|
type:
|
|
type: string
|
|
enum:
|
|
- mcp
|
|
- http
|
|
transport:
|
|
type: string
|
|
enum:
|
|
- streamable-http
|
|
tool:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- id
|
|
- url
|
|
listeners:
|
|
oneOf:
|
|
- type: array
|
|
additionalProperties: false
|
|
items:
|
|
type: object
|
|
properties:
|
|
name:
|
|
type: string
|
|
port:
|
|
type: integer
|
|
address:
|
|
type: string
|
|
timeout:
|
|
type: string
|
|
router:
|
|
type: string
|
|
enum:
|
|
- plano_orchestrator_v1
|
|
max_retries:
|
|
type: integer
|
|
type:
|
|
type: string
|
|
enum:
|
|
- model
|
|
- prompt
|
|
- agent
|
|
agents:
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
description:
|
|
type: string
|
|
default:
|
|
type: boolean
|
|
input_filters:
|
|
type: array
|
|
items:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- id
|
|
- description
|
|
input_filters:
|
|
type: array
|
|
items:
|
|
type: string
|
|
output_filters:
|
|
type: array
|
|
items:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- type
|
|
- name
|
|
- type: object # deprecated legacy format, use list format instead
|
|
additionalProperties: false
|
|
properties:
|
|
ingress_traffic:
|
|
type: object
|
|
properties:
|
|
address:
|
|
type: string
|
|
port:
|
|
type: integer
|
|
message_format:
|
|
type: string
|
|
enum:
|
|
- openai
|
|
timeout:
|
|
type: string
|
|
additionalProperties: false
|
|
egress_traffic:
|
|
type: object
|
|
properties:
|
|
address:
|
|
type: string
|
|
port:
|
|
type: integer
|
|
message_format:
|
|
type: string
|
|
enum:
|
|
- openai
|
|
timeout:
|
|
type: string
|
|
additionalProperties: false
|
|
endpoints:
|
|
type: object
|
|
patternProperties:
|
|
'^[a-zA-Z][a-zA-Z0-9_]*$':
|
|
type: object
|
|
properties:
|
|
endpoint:
|
|
type: string
|
|
pattern: '^.*$'
|
|
connect_timeout:
|
|
type: string
|
|
protocol:
|
|
type: string
|
|
enum:
|
|
- http
|
|
- https
|
|
http_host:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- endpoint
|
|
|
|
model_providers:
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
name:
|
|
type: string
|
|
access_key:
|
|
type: string
|
|
model:
|
|
type: string
|
|
default:
|
|
type: boolean
|
|
base_url:
|
|
type: string
|
|
passthrough_auth:
|
|
type: boolean
|
|
description: "When true, forwards the client's Authorization header to upstream instead of using the configured access_key. Useful for routing to services like LiteLLM that validate their own virtual keys."
|
|
http_host:
|
|
type: string
|
|
provider_interface:
|
|
type: string
|
|
enum:
|
|
- plano
|
|
- claude
|
|
- deepseek
|
|
- groq
|
|
- mistral
|
|
- openai
|
|
- xiaomi
|
|
- gemini
|
|
- chatgpt
|
|
- digitalocean
|
|
- vercel
|
|
- openrouter
|
|
headers:
|
|
type: object
|
|
additionalProperties:
|
|
type: string
|
|
description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
|
|
routing_preferences:
|
|
type: array
|
|
description: "[DEPRECATED] Inline routing_preferences under a model_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
|
|
items:
|
|
type: object
|
|
properties:
|
|
name:
|
|
type: string
|
|
description:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- name
|
|
- description
|
|
retry_policy:
|
|
type: object
|
|
description: "Retry policy configuration. When not specified, no retry logic is enabled."
|
|
properties:
|
|
fallback_models:
|
|
type: array
|
|
description: "Ordered list of model identifiers to fallback to before using Provider_List."
|
|
items:
|
|
type: string
|
|
default_strategy:
|
|
type: string
|
|
description: "Default retry strategy for unconfigured status codes. Default: different_provider."
|
|
enum:
|
|
- same_model
|
|
- same_provider
|
|
- different_provider
|
|
default_max_attempts:
|
|
type: integer
|
|
description: "Default max retry attempts for unconfigured status codes. Default: 2."
|
|
minimum: 0
|
|
on_status_codes:
|
|
type: array
|
|
description: "Per-status-code retry configuration."
|
|
items:
|
|
type: object
|
|
properties:
|
|
codes:
|
|
type: array
|
|
description: "List of status codes as integers or range strings (e.g. '502-504')."
|
|
items:
|
|
anyOf:
|
|
- type: integer
|
|
minimum: 100
|
|
maximum: 599
|
|
- type: string
|
|
description: "Range string in 'start-end' format (e.g. '502-504')."
|
|
strategy:
|
|
type: string
|
|
description: "Retry strategy for these status codes."
|
|
enum:
|
|
- same_model
|
|
- same_provider
|
|
- different_provider
|
|
max_attempts:
|
|
type: integer
|
|
description: "Max retry attempts for these status codes."
|
|
minimum: 0
|
|
additionalProperties: false
|
|
required:
|
|
- codes
|
|
- strategy
|
|
- max_attempts
|
|
on_timeout:
|
|
type: object
|
|
description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts."
|
|
properties:
|
|
strategy:
|
|
type: string
|
|
description: "Retry strategy for timeout errors."
|
|
enum:
|
|
- same_model
|
|
- same_provider
|
|
- different_provider
|
|
max_attempts:
|
|
type: integer
|
|
description: "Max retry attempts for timeout errors."
|
|
minimum: 1
|
|
additionalProperties: false
|
|
required:
|
|
- strategy
|
|
- max_attempts
|
|
on_high_latency:
|
|
type: object
|
|
description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed."
|
|
properties:
|
|
threshold_ms:
|
|
type: integer
|
|
description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered."
|
|
minimum: 1
|
|
measure:
|
|
type: string
|
|
description: "What latency metric to measure. Default: ttfb."
|
|
enum:
|
|
- ttfb
|
|
- total
|
|
strategy:
|
|
type: string
|
|
description: "Retry strategy when latency threshold is exceeded."
|
|
enum:
|
|
- same_model
|
|
- same_provider
|
|
- different_provider
|
|
max_attempts:
|
|
type: integer
|
|
description: "Max retry attempts when latency threshold is exceeded."
|
|
minimum: 1
|
|
block_duration_seconds:
|
|
type: integer
|
|
description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300."
|
|
minimum: 1
|
|
scope:
|
|
type: string
|
|
description: "What to block: model-level or provider-level. Default: model."
|
|
enum:
|
|
- model
|
|
- provider
|
|
apply_to:
|
|
type: string
|
|
description: "Blocking scope: global or request-scoped. Default: global."
|
|
enum:
|
|
- global
|
|
- request
|
|
min_triggers:
|
|
type: integer
|
|
description: "Number of High_Latency_Events required before creating a block. Default: 1."
|
|
minimum: 1
|
|
trigger_window_seconds:
|
|
type: integer
|
|
description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1."
|
|
minimum: 1
|
|
additionalProperties: false
|
|
required:
|
|
- threshold_ms
|
|
- strategy
|
|
- max_attempts
|
|
- block_duration_seconds
|
|
backoff:
|
|
type: object
|
|
description: "Exponential backoff configuration. When omitted, no backoff delays are applied."
|
|
properties:
|
|
apply_to:
|
|
type: string
|
|
description: "REQUIRED. Determines when backoff delays are applied."
|
|
enum:
|
|
- same_model
|
|
- same_provider
|
|
- global
|
|
base_ms:
|
|
type: integer
|
|
description: "Base delay in milliseconds for exponential backoff. Default: 100."
|
|
minimum: 1
|
|
max_ms:
|
|
type: integer
|
|
description: "Maximum delay in milliseconds for exponential backoff. Default: 5000."
|
|
minimum: 1
|
|
jitter:
|
|
type: boolean
|
|
description: "Add random jitter to prevent thundering herd. Default: true."
|
|
additionalProperties: false
|
|
required:
|
|
- apply_to
|
|
retry_after_handling:
|
|
type: object
|
|
description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)."
|
|
properties:
|
|
scope:
|
|
type: string
|
|
description: "What to block: model-level or provider-level. Default: model."
|
|
enum:
|
|
- model
|
|
- provider
|
|
apply_to:
|
|
type: string
|
|
description: "Blocking scope: request-scoped or global. Default: global."
|
|
enum:
|
|
- request
|
|
- global
|
|
max_retry_after_seconds:
|
|
type: integer
|
|
description: "Maximum Retry-After value honored in seconds. Default: 300."
|
|
minimum: 1
|
|
additionalProperties: false
|
|
max_retry_duration_ms:
|
|
type: integer
|
|
description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry."
|
|
minimum: 0
|
|
additionalProperties: false
|
|
additionalProperties: false
|
|
required:
|
|
- model
|
|
|
|
llm_providers: # deprecated for legacy support, use model_providers instead
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
name:
|
|
type: string
|
|
access_key:
|
|
type: string
|
|
model:
|
|
type: string
|
|
default:
|
|
type: boolean
|
|
base_url:
|
|
type: string
|
|
passthrough_auth:
|
|
type: boolean
|
|
description: "When true, forwards the client's Authorization header to upstream instead of using the configured access_key. Useful for routing to services like LiteLLM that validate their own virtual keys."
|
|
http_host:
|
|
type: string
|
|
provider_interface:
|
|
type: string
|
|
enum:
|
|
- plano
|
|
- claude
|
|
- deepseek
|
|
- groq
|
|
- mistral
|
|
- openai
|
|
- xiaomi
|
|
- gemini
|
|
- chatgpt
|
|
- digitalocean
|
|
- vercel
|
|
- openrouter
|
|
headers:
|
|
type: object
|
|
additionalProperties:
|
|
type: string
|
|
description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
|
|
routing_preferences:
|
|
type: array
|
|
description: "[DEPRECATED] Inline routing_preferences under an llm_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
|
|
items:
|
|
type: object
|
|
properties:
|
|
name:
|
|
type: string
|
|
description:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- name
|
|
- description
|
|
retry_policy:
|
|
type: object
|
|
description: "Retry policy configuration. When not specified, no retry logic is enabled."
|
|
properties:
|
|
fallback_models:
|
|
type: array
|
|
description: "Ordered list of model identifiers to fallback to before using Provider_List."
|
|
items:
|
|
type: string
|
|
default_strategy:
|
|
type: string
|
|
description: "Default retry strategy for unconfigured status codes. Default: different_provider."
|
|
enum:
|
|
- same_model
|
|
- same_provider
|
|
- different_provider
|
|
default_max_attempts:
|
|
type: integer
|
|
description: "Default max retry attempts for unconfigured status codes. Default: 2."
|
|
minimum: 0
|
|
on_status_codes:
|
|
type: array
|
|
description: "Per-status-code retry configuration."
|
|
items:
|
|
type: object
|
|
properties:
|
|
codes:
|
|
type: array
|
|
description: "List of status codes as integers or range strings (e.g. '502-504')."
|
|
items:
|
|
anyOf:
|
|
- type: integer
|
|
minimum: 100
|
|
maximum: 599
|
|
- type: string
|
|
description: "Range string in 'start-end' format (e.g. '502-504')."
|
|
strategy:
|
|
type: string
|
|
description: "Retry strategy for these status codes."
|
|
enum:
|
|
- same_model
|
|
- same_provider
|
|
- different_provider
|
|
max_attempts:
|
|
type: integer
|
|
description: "Max retry attempts for these status codes."
|
|
minimum: 0
|
|
additionalProperties: false
|
|
required:
|
|
- codes
|
|
- strategy
|
|
- max_attempts
|
|
on_timeout:
|
|
type: object
|
|
description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts."
|
|
properties:
|
|
strategy:
|
|
type: string
|
|
description: "Retry strategy for timeout errors."
|
|
enum:
|
|
- same_model
|
|
- same_provider
|
|
- different_provider
|
|
max_attempts:
|
|
type: integer
|
|
description: "Max retry attempts for timeout errors."
|
|
minimum: 1
|
|
additionalProperties: false
|
|
required:
|
|
- strategy
|
|
- max_attempts
|
|
on_high_latency:
|
|
type: object
|
|
description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed."
|
|
properties:
|
|
threshold_ms:
|
|
type: integer
|
|
description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered."
|
|
minimum: 1
|
|
measure:
|
|
type: string
|
|
description: "What latency metric to measure. Default: ttfb."
|
|
enum:
|
|
- ttfb
|
|
- total
|
|
strategy:
|
|
type: string
|
|
description: "Retry strategy when latency threshold is exceeded."
|
|
enum:
|
|
- same_model
|
|
- same_provider
|
|
- different_provider
|
|
max_attempts:
|
|
type: integer
|
|
description: "Max retry attempts when latency threshold is exceeded."
|
|
minimum: 1
|
|
block_duration_seconds:
|
|
type: integer
|
|
description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300."
|
|
minimum: 1
|
|
scope:
|
|
type: string
|
|
description: "What to block: model-level or provider-level. Default: model."
|
|
enum:
|
|
- model
|
|
- provider
|
|
apply_to:
|
|
type: string
|
|
description: "Blocking scope: global or request-scoped. Default: global."
|
|
enum:
|
|
- global
|
|
- request
|
|
min_triggers:
|
|
type: integer
|
|
description: "Number of High_Latency_Events required before creating a block. Default: 1."
|
|
minimum: 1
|
|
trigger_window_seconds:
|
|
type: integer
|
|
description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1."
|
|
minimum: 1
|
|
additionalProperties: false
|
|
required:
|
|
- threshold_ms
|
|
- strategy
|
|
- max_attempts
|
|
- block_duration_seconds
|
|
backoff:
|
|
type: object
|
|
description: "Exponential backoff configuration. When omitted, no backoff delays are applied."
|
|
properties:
|
|
apply_to:
|
|
type: string
|
|
description: "REQUIRED. Determines when backoff delays are applied."
|
|
enum:
|
|
- same_model
|
|
- same_provider
|
|
- global
|
|
base_ms:
|
|
type: integer
|
|
description: "Base delay in milliseconds for exponential backoff. Default: 100."
|
|
minimum: 1
|
|
max_ms:
|
|
type: integer
|
|
description: "Maximum delay in milliseconds for exponential backoff. Default: 5000."
|
|
minimum: 1
|
|
jitter:
|
|
type: boolean
|
|
description: "Add random jitter to prevent thundering herd. Default: true."
|
|
additionalProperties: false
|
|
required:
|
|
- apply_to
|
|
retry_after_handling:
|
|
type: object
|
|
description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)."
|
|
properties:
|
|
scope:
|
|
type: string
|
|
description: "What to block: model-level or provider-level. Default: model."
|
|
enum:
|
|
- model
|
|
- provider
|
|
apply_to:
|
|
type: string
|
|
description: "Blocking scope: request-scoped or global. Default: global."
|
|
enum:
|
|
- request
|
|
- global
|
|
max_retry_after_seconds:
|
|
type: integer
|
|
description: "Maximum Retry-After value honored in seconds. Default: 300."
|
|
minimum: 1
|
|
additionalProperties: false
|
|
max_retry_duration_ms:
|
|
type: integer
|
|
description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry."
|
|
minimum: 0
|
|
additionalProperties: false
|
|
additionalProperties: false
|
|
required:
|
|
- model
|
|
|
|
model_aliases:
|
|
type: object
|
|
patternProperties:
|
|
'^.*$':
|
|
type: object
|
|
properties:
|
|
target:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- target
|
|
|
|
overrides:
|
|
type: object
|
|
properties:
|
|
prompt_target_intent_matching_threshold:
|
|
type: number
|
|
optimize_context_window:
|
|
type: boolean
|
|
use_agent_orchestrator:
|
|
type: boolean
|
|
disable_signals:
|
|
type: boolean
|
|
description: "Disable agentic signal analysis (frustration, repetition, escalation, etc.) on LLM responses to save CPU. Default false."
|
|
upstream_connect_timeout:
|
|
type: string
|
|
description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'."
|
|
upstream_tls_ca_path:
|
|
type: string
|
|
description: "Path to the trusted CA bundle for upstream TLS verification. Default is '/etc/ssl/certs/ca-certificates.crt'."
|
|
llm_routing_model:
|
|
type: string
|
|
description: "Model name for the LLM router (e.g., 'Plano-Orchestrator'). Must match a model in model_providers."
|
|
agent_orchestration_model:
|
|
type: string
|
|
description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers."
|
|
orchestrator_model_context_length:
|
|
type: integer
|
|
description: "Maximum token length for the orchestrator/routing model context window. Default is 8192."
|
|
system_prompt:
|
|
type: string
|
|
prompt_targets:
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
name:
|
|
type: string
|
|
default:
|
|
type: boolean
|
|
description:
|
|
type: string
|
|
auto_llm_dispatch_on_response:
|
|
type: boolean
|
|
parameters:
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
name:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
type: boolean
|
|
default:
|
|
anyOf:
|
|
- type: string
|
|
- type: integer
|
|
- type: boolean
|
|
description:
|
|
type: string
|
|
type:
|
|
type: string
|
|
enum:
|
|
type: array
|
|
items:
|
|
anyOf:
|
|
- type: string
|
|
- type: integer
|
|
- type: boolean
|
|
in_path:
|
|
type: boolean
|
|
format:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- name
|
|
- description
|
|
- type
|
|
endpoint:
|
|
type: object
|
|
properties:
|
|
name:
|
|
type: string
|
|
path:
|
|
type: string
|
|
http_method:
|
|
type: string
|
|
enum:
|
|
- GET
|
|
- POST
|
|
http_headers:
|
|
type: object
|
|
additionalProperties:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- name
|
|
- path
|
|
system_prompt:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- name
|
|
- description
|
|
ratelimits:
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
model:
|
|
type: string
|
|
selector:
|
|
type: object
|
|
properties:
|
|
key:
|
|
type: string
|
|
value:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- key
|
|
- value
|
|
limit:
|
|
type: object
|
|
properties:
|
|
tokens:
|
|
type: integer
|
|
unit:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- tokens
|
|
- unit
|
|
additionalProperties: false
|
|
required:
|
|
- model
|
|
- selector
|
|
- limit
|
|
tracing:
|
|
type: object
|
|
properties:
|
|
random_sampling:
|
|
type: integer
|
|
trace_arch_internal:
|
|
type: boolean
|
|
opentracing_grpc_endpoint:
|
|
type: string
|
|
span_attributes:
|
|
type: object
|
|
properties:
|
|
header_prefixes:
|
|
type: array
|
|
items:
|
|
type: string
|
|
static:
|
|
type: object
|
|
additionalProperties:
|
|
type: string
|
|
additionalProperties: false
|
|
additionalProperties: false
|
|
mode:
|
|
type: string
|
|
enum:
|
|
- llm
|
|
- prompt
|
|
routing:
|
|
type: object
|
|
properties:
|
|
llm_provider:
|
|
type: string
|
|
model:
|
|
type: string
|
|
session_ttl_seconds:
|
|
type: integer
|
|
minimum: 1
|
|
description: TTL in seconds for session-pinned routing cache entries. Default 600 (10 minutes).
|
|
session_max_entries:
|
|
type: integer
|
|
minimum: 1
|
|
maximum: 10000
|
|
description: Maximum number of session-pinned routing cache entries. Default 10000.
|
|
session_cache:
|
|
type: object
|
|
properties:
|
|
type:
|
|
type: string
|
|
enum:
|
|
- memory
|
|
- redis
|
|
default: memory
|
|
description: Session cache backend. "memory" (default) is in-process; "redis" is shared across replicas.
|
|
url:
|
|
type: string
|
|
description: Redis URL, e.g. redis://localhost:6379. Required when type is redis.
|
|
tenant_header:
|
|
type: string
|
|
description: >
|
|
Optional HTTP header name whose value is used as a tenant prefix in the cache key.
|
|
When set, keys are scoped as plano:affinity:{tenant_id}:{session_id}.
|
|
additionalProperties: false
|
|
additionalProperties: false
|
|
state_storage:
|
|
type: object
|
|
properties:
|
|
type:
|
|
type: string
|
|
enum:
|
|
- memory
|
|
- postgres
|
|
connection_string:
|
|
type: string
|
|
description: Required when type is postgres. Supports environment variable substitution using $VAR or ${VAR} syntax.
|
|
additionalProperties: false
|
|
required:
|
|
- type
|
|
# Note: connection_string is conditionally required based on type
|
|
# If type is 'postgres', connection_string must be provided
|
|
# If type is 'memory', connection_string is not needed
|
|
allOf:
|
|
- if:
|
|
properties:
|
|
type:
|
|
const: postgres
|
|
then:
|
|
required:
|
|
- connection_string
|
|
prompt_guards:
|
|
type: object
|
|
properties:
|
|
input_guards:
|
|
type: object
|
|
properties:
|
|
jailbreak:
|
|
type: object
|
|
properties:
|
|
on_exception:
|
|
type: object
|
|
properties:
|
|
message:
|
|
type: string
|
|
additionalProperties: false
|
|
required:
|
|
- message
|
|
additionalProperties: false
|
|
required:
|
|
- on_exception
|
|
additionalProperties: false
|
|
required:
|
|
- jailbreak
|
|
routing_preferences:
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
name:
|
|
type: string
|
|
description:
|
|
type: string
|
|
models:
|
|
type: array
|
|
items:
|
|
type: string
|
|
minItems: 1
|
|
selection_policy:
|
|
type: object
|
|
properties:
|
|
prefer:
|
|
type: string
|
|
enum:
|
|
- cheapest
|
|
- fastest
|
|
- none
|
|
additionalProperties: false
|
|
required:
|
|
- prefer
|
|
additionalProperties: false
|
|
required:
|
|
- name
|
|
- description
|
|
- models
|
|
|
|
model_metrics_sources:
|
|
type: array
|
|
items:
|
|
oneOf:
|
|
- type: object
|
|
properties:
|
|
type:
|
|
type: string
|
|
const: cost
|
|
provider:
|
|
type: string
|
|
enum:
|
|
- digitalocean
|
|
refresh_interval:
|
|
type: integer
|
|
minimum: 1
|
|
description: "Refresh interval in seconds"
|
|
model_aliases:
|
|
type: object
|
|
description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
|
|
additionalProperties:
|
|
type: string
|
|
required:
|
|
- type
|
|
- provider
|
|
additionalProperties: false
|
|
- type: object
|
|
properties:
|
|
type:
|
|
type: string
|
|
const: latency
|
|
provider:
|
|
type: string
|
|
enum:
|
|
- prometheus
|
|
url:
|
|
type: string
|
|
query:
|
|
type: string
|
|
refresh_interval:
|
|
type: integer
|
|
minimum: 1
|
|
description: "Refresh interval in seconds"
|
|
required:
|
|
- type
|
|
- provider
|
|
- url
|
|
- query
|
|
additionalProperties: false
|
|
|
|
additionalProperties: false
|
|
required:
|
|
- version
|
|
- listeners
|