plano/docs/source/resources/includes/plano_config_full_reference.yaml

# Plano Gateway configuration version
version: v0.4.0

# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
agents:
  - id: weather_agent # Example agent for weather
    url: http://localhost:10510

  - id: flight_agent # Example agent for flights
    url: http://localhost:10520

# MCP filters applied to requests/responses (e.g., input validation, query rewriting)
filters:
  - id: input_guards # Example filter for input validation
    url: http://localhost:10500
    # type: mcp (default)
    # transport: streamable-http (default)
    # tool: input_guards (default - same as filter id)

# LLM provider configurations with API keys and model routing
model_providers:
  - model: openai/gpt-4o
    access_key: $OPENAI_API_KEY
    default: true

  - model: openai/gpt-4o-mini
    access_key: $OPENAI_API_KEY

  - model: anthropic/claude-sonnet-4-0
    access_key: $ANTHROPIC_API_KEY

  - model: mistral/ministral-3b-latest
    access_key: $MISTRAL_API_KEY

  - model: groq/llama-3.3-70b-versatile
    access_key: $GROQ_API_KEY

  # passthrough_auth: forwards the client's Authorization header upstream instead of
  # using the configured access_key. Useful for LiteLLM or similar proxy setups.
  - model: openai/gpt-4o-litellm
    base_url: https://litellm.example.com
    passthrough_auth: true

  # Custom/self-hosted endpoint with explicit http_host override
  - model: openai/llama-3.3-70b
    base_url: https://api.custom-provider.com
    http_host: api.custom-provider.com
    access_key: $CUSTOM_API_KEY

# Model aliases - use friendly names instead of full provider model names
model_aliases:
  fast-llm:
    target: gpt-4o-mini

  smart-llm:
    target: gpt-4o

# routing_preferences: top-level list that tags named task categories with an
# ordered pool of candidate models. Plano's LLM router matches incoming requests
# against these descriptions and returns an ordered list of models; the client
# uses models[0] as primary and retries with models[1], models[2]... on 429/5xx.
# Requires overrides.llm_routing_model to point at Plano-Orchestrator (or equivalent).
# Each model in `models` must be declared in model_providers above.
# selection_policy is optional: {prefer: cheapest|fastest|none} lets the router
# reorder candidates using live cost/latency data from model_metrics_sources.
routing_preferences:
  - name: code generation
    description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
    models:
      - anthropic/claude-sonnet-4-0
      - openai/gpt-4o
      - groq/llama-3.3-70b-versatile
  - name: code review
    description: reviewing, analyzing, and suggesting improvements to existing code
    models:
      - anthropic/claude-sonnet-4-0
      - groq/llama-3.3-70b-versatile
    selection_policy:
      prefer: cheapest

# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
listeners:
  # Agent listener for routing requests to multiple agents
  - type: agent
    name: travel_booking_service
    port: 8001
    router: plano_orchestrator_v1
    address: 0.0.0.0
    agents:
      - id: rag_agent
        description: virtual assistant for retrieval augmented generation tasks
        input_filters:
          - input_guards

  # Model listener for direct LLM access
  - type: model
    name: model_1
    address: 0.0.0.0
    port: 12000
    timeout: 30s          # Request timeout (e.g. "30s", "60s")
    max_retries: 3        # Number of retries on upstream failure
    input_filters:        # Filters applied before forwarding to LLM
      - input_guards
    output_filters:       # Filters applied to LLM responses before returning to client
      - input_guards

  # Prompt listener for function calling (for prompt_targets)
  - type: prompt
    name: prompt_function_listener
    address: 0.0.0.0
    port: 10000

# Reusable service endpoints
endpoints:
  app_server:
    endpoint: 127.0.0.1:80
    connect_timeout: 0.005s
    protocol: http        # http or https

  mistral_local:
    endpoint: 127.0.0.1:8001

  secure_service:
    endpoint: api.example.com:443
    protocol: https
    http_host: api.example.com  # Override the Host header sent upstream

# Optional top-level system prompt applied to all prompt_targets
system_prompt: |
  You are a helpful assistant. Always respond concisely and accurately.

# Prompt targets for function calling and API orchestration
prompt_targets:
  - name: get_current_weather
    description: Get current weather at a location.
    parameters:
      - name: location
        description: The location to get the weather for
        required: true
        type: string
        format: City, State
      - name: days
        description: the number of days for the request
        required: true
        type: int
    endpoint:
      name: app_server
      path: /weather
      http_method: POST
    # Per-target system prompt (overrides top-level system_prompt for this target)
    system_prompt: You are a weather expert. Provide accurate and concise weather information.
    # auto_llm_dispatch_on_response: when true, the LLM is called again with the
    # function response to produce a final natural-language answer for the user
    auto_llm_dispatch_on_response: true

# Rate limits - control token usage per model and request selector
ratelimits:
  - model: openai/gpt-4o
    selector:
      key: x-user-id       # HTTP header key used to identify the rate-limit subject
      value: "*"           # Wildcard matches any value; use a specific string to target one
    limit:
      tokens: 100000       # Maximum tokens allowed in the given time unit
      unit: hour           # Time unit: "minute", "hour", or "day"

  - model: openai/gpt-4o-mini
    selector:
      key: x-org-id
      value: acme-corp
    limit:
      tokens: 500000
      unit: day

# Global behavior overrides
overrides:
  # Threshold for routing a request to a prompt_target (0.0–1.0). Lower = more permissive.
  prompt_target_intent_matching_threshold: 0.7
  # Trim conversation history to fit within the model's context window
  optimize_context_window: true
  # Use Plano's agent orchestrator for multi-agent request routing
  use_agent_orchestrator: false
  # Connect timeout for upstream provider clusters (e.g., "5s", "10s"). Default: "5s"
  upstream_connect_timeout: 10s
  # Path to the trusted CA bundle for upstream TLS verification
  upstream_tls_ca_path: /etc/ssl/certs/ca-certificates.crt
  # Model used for intent-based LLM routing (must be listed in model_providers)
  llm_routing_model: Plano-Orchestrator
  # Model used for agent orchestration (must be listed in model_providers)
  agent_orchestration_model: Plano-Orchestrator
  # Disable agentic signal analysis (frustration, repetition, escalation, etc.)
  # on LLM responses to save CPU. Default: false.
  disable_signals: false

# Model affinity — pin routing decisions for agentic loops
routing:
  session_ttl_seconds: 600    # How long a pinned session lasts (default: 600s / 10 min)
  session_max_entries: 10000  # Max cached sessions before eviction (upper limit: 10000)
  # session_cache controls the backend used to store affinity state.
  # "memory" (default) is in-process and works for single-instance deployments.
  # "redis" shares state across replicas — required for multi-replica / Kubernetes setups.
  session_cache:
    type: memory              # "memory" (default) or "redis"
    # url is required when type is "redis". Supports redis:// and rediss:// (TLS).
    # url: redis://localhost:6379
    # tenant_header: x-org-id  # optional; when set, keys are scoped as plano:affinity:{tenant_id}:{session_id}

# State storage for multi-turn conversation history
state_storage:
  type: memory            # "memory" (in-process) or "postgres" (persistent)
  # connection_string is required when type is postgres.
  # Supports environment variable substitution: $VAR or ${VAR}
  # connection_string: postgresql://user:$DB_PASS@localhost:5432/plano

# Input guardrails applied globally to all incoming requests
prompt_guards:
  input_guards:
    jailbreak:
      on_exception:
        message: "I'm sorry, I can't help with that request."

# OpenTelemetry tracing configuration
tracing:
  # Random sampling percentage (1-100)
  random_sampling: 100
  # Include internal Plano spans in traces
  trace_arch_internal: false
  # gRPC endpoint for OpenTelemetry collector (e.g., Jaeger, Tempo)
  opentracing_grpc_endpoint: http://localhost:4317
  span_attributes:
    # Propagate request headers whose names start with these prefixes as span attributes
    header_prefixes:
      - x-user-
      - x-org-
    # Static key/value pairs added to every span
    static:
      environment: production
      service.team: platform
-												Rename all arch references to plano (#745)

* Rename all arch references to plano across the codebase

Complete rebrand from "Arch"/"archgw" to "Plano" including:
- Config files: arch_config_schema.yaml, workflow, demo configs
- Environment variables: ARCH_CONFIG_* → PLANO_CONFIG_*
- Python CLI: variables, functions, file paths, docker mounts
- Rust crates: config paths, log messages, metadata keys
- Docker/build: Dockerfile, supervisord, .dockerignore, .gitignore
- Docker Compose: volume mounts and env vars across all demos/tests
- GitHub workflows: job/step names
- Shell scripts: log messages
- Demos: Python code, READMEs, VS Code configs, Grafana dashboard
- Docs: RST includes, code comments, config references
- Package metadata: package.json, pyproject.toml, uv.lock

External URLs (docs.archgw.com, github.com/katanemo/archgw) left as-is.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Update remaining arch references in docs

- Rename RST cross-reference labels: arch_access_logging, arch_overview_tracing, arch_overview_threading → plano_*
- Update label references in request_lifecycle.rst
- Rename arch_config_state_storage_example.yaml → plano_config_state_storage_example.yaml
- Update config YAML comments: "Arch creates/uses" → "Plano creates/uses"
- Update "the Arch gateway" → "the Plano gateway" in configuration_reference.rst
- Update arch_config_schema.yaml reference in provider_models.py
- Rename arch_agent_router → plano_agent_router in config example

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Fix remaining arch references found in second pass

- config/docker-compose.dev.yaml: ARCH_CONFIG_FILE → PLANO_CONFIG_FILE,
  arch_config.yaml → plano_config.yaml, archgw_logs → plano_logs
- config/test_passthrough.yaml: container mount path
- tests/e2e/docker-compose.yaml: source file path (was still arch_config.yaml)
- cli/planoai/core.py: comment and log message
- crates/brightstaff/src/tracing/constants.rs: doc comment
- tests/{e2e,archgw}/common.py: get_arch_messages → get_plano_messages,
  arch_state/arch_messages variables renamed
- tests/{e2e,archgw}/test_prompt_gateway.py: updated imports and usages
- demos/shared/test_runner/{common,test_demos}.py: same renames
- tests/e2e/test_model_alias_routing.py: docstring
- .dockerignore: archgw_modelserver → plano_modelserver
- demos/use_cases/claude_code_router/pretty_model_resolution.sh: container name

Note: x-arch-* HTTP header values and Rust constant names intentionally
preserved for backwards compatibility with existing deployments.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
											
										
										
											2026-02-13 15:16:56 -08:00
+								# Plano Gateway configuration version
-												fix(routing): auto-migrate v0.3.0 inline routing_preferences to v0.4.0 top-level (#912)

* fix(routing): auto-migrate v0.3.0 inline routing_preferences to v0.4.0 top-level

Lift inline routing_preferences under each model_provider into the
top-level routing_preferences list with merged models[] and bump
version to v0.4.0, with a deprecation warning. Existing v0.3.0
demo configs (Claude Code, Codex, preference_based_routing, etc.)
keep working unchanged. Schema flags the inline shape as deprecated
but still accepts it. Docs and skills updated to canonical top-level
multi-model form.

* test(common): bump reference config assertion to v0.4.0

The rendered reference config was bumped to v0.4.0 when its inline
routing_preferences were lifted to the top level; align the
configuration deserialization test with that change.

* fix(config_generator): bump version to v0.4.0 up front in migration

Move the v0.3.0 -> v0.4.0 version bump to the top of
migrate_inline_routing_preferences so it runs unconditionally,
including for configs that already declare top-level
routing_preferences at v0.3.0. Previously the bump only fired
when inline migration produced entries, leaving top-level v0.3.0
configs rejected by brightstaff's v0.4.0 gate. Tests updated to
cover the new behavior and to confirm we never downgrade newer
versions.

* fix(config_generator): gate routing_preferences migration on version < v0.4.0

Short-circuit the migration when the config already declares v0.4.0
or newer. Anything at v0.4.0+ is assumed to be on the canonical
top-level shape and is passed through untouched, including stray
inline preferences (which are the author's bug to fix). Only v0.3.0
and older configs are rewritten and bumped.
											
										
										
											2026-04-24 12:31:44 -07:00
+								version: v0.4.0
-												Docs branch - v1 of our tech docs (#69)

* added the first set of docs for our technical docs

* more docuemtnation changes

* added support for prompt processing and updated life of a request

* updated docs to including getting help sections and updated life of a request

* committing local changes for getting started guide, sample applications, and full reference spec for prompt-config

* updated configuration reference, added sample app skeleton, updated favico

* fixed the configuration refernce file, and made minor changes to the intent detection. commit v1 for now

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
Co-authored-by: Adil Hafeez <adil@katanemo.com>
											
										
										
											2024-09-20 17:08:42 -07:00
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
 								agents:
-												feat: add passthrough_auth option for forwarding client Authorization header (#687)

* feat: add passthrough_auth option for forwarding client Authorization header

* fix tests

* Update comment to reflect upstream forwarding

* Apply suggestions from code review

---------

Co-authored-by: Adil Hafeez <adil.hafeez@gmail.com>
Co-authored-by: Adil Hafeez <adil@katanemo.com>
											
										
										
											2026-01-15 00:06:28 +01:00
+								  - id: weather_agent # Example agent for weather
-												Run plano natively by default (#744)


											
										
										
											2026-03-05 07:35:25 -08:00
+								    url: http://localhost:10510
-												update config (#93)


											
										
										
											2024-09-30 17:49:05 -07:00
-												feat: add passthrough_auth option for forwarding client Authorization header (#687)

* feat: add passthrough_auth option for forwarding client Authorization header

* fix tests

* Update comment to reflect upstream forwarding

* Apply suggestions from code review

---------

Co-authored-by: Adil Hafeez <adil.hafeez@gmail.com>
Co-authored-by: Adil Hafeez <adil@katanemo.com>
											
										
										
											2026-01-15 00:06:28 +01:00
+								  - id: flight_agent # Example agent for flights
-												Run plano natively by default (#744)


											
										
										
											2026-03-05 07:35:25 -08:00
+								    url: http://localhost:10520
-												update config (#93)


											
										
										
											2024-09-30 17:49:05 -07:00
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								# MCP filters applied to requests/responses (e.g., input validation, query rewriting)
 								filters:
-												feat: add passthrough_auth option for forwarding client Authorization header (#687)

* feat: add passthrough_auth option for forwarding client Authorization header

* fix tests

* Update comment to reflect upstream forwarding

* Apply suggestions from code review

---------

Co-authored-by: Adil Hafeez <adil.hafeez@gmail.com>
Co-authored-by: Adil Hafeez <adil@katanemo.com>
											
										
										
											2026-01-15 00:06:28 +01:00
+								  - id: input_guards # Example filter for input validation
-												Run plano natively by default (#744)


											
										
										
											2026-03-05 07:35:25 -08:00
+								    url: http://localhost:10500
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								    # type: mcp (default)
 								    # transport: streamable-http (default)
 								    # tool: input_guards (default - same as filter id)
 								# LLM provider configurations with API keys and model routing
 								model_providers:
 								  - model: openai/gpt-4o
-												Use large github action machine to run e2e tests (#230)


											
										
										
											2024-10-30 17:54:51 -07:00
+								    access_key: $OPENAI_API_KEY
-												Docs branch - v1 of our tech docs (#69)

* added the first set of docs for our technical docs

* more docuemtnation changes

* added support for prompt processing and updated life of a request

* updated docs to including getting help sections and updated life of a request

* committing local changes for getting started guide, sample applications, and full reference spec for prompt-config

* updated configuration reference, added sample app skeleton, updated favico

* fixed the configuration refernce file, and made minor changes to the intent detection. commit v1 for now

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
Co-authored-by: Adil Hafeez <adil@katanemo.com>
											
										
										
											2024-09-20 17:08:42 -07:00
+								    default: true
-												update config (#93)


											
										
										
											2024-09-30 17:49:05 -07:00
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								  - model: openai/gpt-4o-mini
 								    access_key: $OPENAI_API_KEY
 								  - model: anthropic/claude-sonnet-4-0
 								    access_key: $ANTHROPIC_API_KEY
-												update config (#93)


											
										
										
											2024-09-30 17:49:05 -07:00
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								  - model: mistral/ministral-3b-latest
 								    access_key: $MISTRAL_API_KEY
-												Docs branch - v1 of our tech docs (#69)

* added the first set of docs for our technical docs

* more docuemtnation changes

* added support for prompt processing and updated life of a request

* updated docs to including getting help sections and updated life of a request

* committing local changes for getting started guide, sample applications, and full reference spec for prompt-config

* updated configuration reference, added sample app skeleton, updated favico

* fixed the configuration refernce file, and made minor changes to the intent detection. commit v1 for now

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
Co-authored-by: Adil Hafeez <adil@katanemo.com>
											
										
										
											2024-09-20 17:08:42 -07:00
-												model routing: cost/latency ranking with ranked fallback list (#849)
											
										
										
											2026-03-30 13:46:52 -07:00
+								  - model: groq/llama-3.3-70b-versatile
 								    access_key: $GROQ_API_KEY
-												expand configuration reference with missing fields (#851)
											
										
										
											2026-03-30 15:25:05 -04:00
 								  # passthrough_auth: forwards the client's Authorization header upstream instead of
 								  # using the configured access_key. Useful for LiteLLM or similar proxy setups.
-												feat: add passthrough_auth option for forwarding client Authorization header (#687)

* feat: add passthrough_auth option for forwarding client Authorization header

* fix tests

* Update comment to reflect upstream forwarding

* Apply suggestions from code review

---------

Co-authored-by: Adil Hafeez <adil.hafeez@gmail.com>
Co-authored-by: Adil Hafeez <adil@katanemo.com>
											
										
										
											2026-01-15 00:06:28 +01:00
+								  - model: openai/gpt-4o-litellm
 								    base_url: https://litellm.example.com
 								    passthrough_auth: true
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
-												expand configuration reference with missing fields (#851)
											
										
										
											2026-03-30 15:25:05 -04:00
+								  # Custom/self-hosted endpoint with explicit http_host override
 								  - model: openai/llama-3.3-70b
 								    base_url: https://api.custom-provider.com
 								    http_host: api.custom-provider.com
 								    access_key: $CUSTOM_API_KEY
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								# Model aliases - use friendly names instead of full provider model names
-												draft commit to add support for xAI, TogehterAI, AzureOpenAI (#570)

* draft commit to add support for xAI, LambdaAI, TogehterAI, AzureOpenAI

* fixing failing tests and updating rederend config file

* Update arch_config_with_aliases.yaml

* adding the AZURE_API_KEY to the GH workflow for e2e

* fixing GH secerts

* adding valdiating for azure_openai

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-167.local>
											
										
										
											2025-09-18 18:36:30 -07:00
+								model_aliases:
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								  fast-llm:
 								    target: gpt-4o-mini
 								  smart-llm:
-												draft commit to add support for xAI, TogehterAI, AzureOpenAI (#570)

* draft commit to add support for xAI, LambdaAI, TogehterAI, AzureOpenAI

* fixing failing tests and updating rederend config file

* Update arch_config_with_aliases.yaml

* adding the AZURE_API_KEY to the GH workflow for e2e

* fixing GH secerts

* adding valdiating for azure_openai

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-167.local>
											
										
										
											2025-09-18 18:36:30 -07:00
+								    target: gpt-4o
-												fix(routing): auto-migrate v0.3.0 inline routing_preferences to v0.4.0 top-level (#912)

* fix(routing): auto-migrate v0.3.0 inline routing_preferences to v0.4.0 top-level

Lift inline routing_preferences under each model_provider into the
top-level routing_preferences list with merged models[] and bump
version to v0.4.0, with a deprecation warning. Existing v0.3.0
demo configs (Claude Code, Codex, preference_based_routing, etc.)
keep working unchanged. Schema flags the inline shape as deprecated
but still accepts it. Docs and skills updated to canonical top-level
multi-model form.

* test(common): bump reference config assertion to v0.4.0

The rendered reference config was bumped to v0.4.0 when its inline
routing_preferences were lifted to the top level; align the
configuration deserialization test with that change.

* fix(config_generator): bump version to v0.4.0 up front in migration

Move the v0.3.0 -> v0.4.0 version bump to the top of
migrate_inline_routing_preferences so it runs unconditionally,
including for configs that already declare top-level
routing_preferences at v0.3.0. Previously the bump only fired
when inline migration produced entries, leaving top-level v0.3.0
configs rejected by brightstaff's v0.4.0 gate. Tests updated to
cover the new behavior and to confirm we never downgrade newer
versions.

* fix(config_generator): gate routing_preferences migration on version < v0.4.0

Short-circuit the migration when the config already declares v0.4.0
or newer. Anything at v0.4.0+ is assumed to be on the canonical
top-level shape and is passed through untouched, including stray
inline preferences (which are the author's bug to fix). Only v0.3.0
and older configs are rewritten and bumped.
											
										
										
											2026-04-24 12:31:44 -07:00
+								# routing_preferences: top-level list that tags named task categories with an
 								# ordered pool of candidate models. Plano's LLM router matches incoming requests
 								# against these descriptions and returns an ordered list of models; the client
 								# uses models[0] as primary and retries with models[1], models[2]... on 429/5xx.
 								# Requires overrides.llm_routing_model to point at Plano-Orchestrator (or equivalent).
 								# Each model in `models` must be declared in model_providers above.
 								# selection_policy is optional: {prefer: cheapest|fastest|none} lets the router
 								# reorder candidates using live cost/latency data from model_metrics_sources.
 								routing_preferences:
 								  - name: code generation
 								    description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
 								    models:
 								      - anthropic/claude-sonnet-4-0
 								      - openai/gpt-4o
 								      - groq/llama-3.3-70b-versatile
 								  - name: code review
 								    description: reviewing, analyzing, and suggesting improvements to existing code
 								    models:
 								      - anthropic/claude-sonnet-4-0
 								      - groq/llama-3.3-70b-versatile
 								    selection_policy:
 								      prefer: cheapest
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
 								listeners:
 								  # Agent listener for routing requests to multiple agents
 								  - type: agent
 								    name: travel_booking_service
 								    port: 8001
 								    router: plano_orchestrator_v1
 								    address: 0.0.0.0
 								    agents:
 								      - id: rag_agent
 								        description: virtual assistant for retrieval augmented generation tasks
-												add output filter chain (#822)
											
										
										
											2026-03-18 17:58:20 -07:00
+								        input_filters:
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								          - input_guards
 								  # Model listener for direct LLM access
 								  - type: model
 								    name: model_1
 								    address: 0.0.0.0
 								    port: 12000
-												expand configuration reference with missing fields (#851)
											
										
										
											2026-03-30 15:25:05 -04:00
+								    timeout: 30s          # Request timeout (e.g. "30s", "60s")
 								    max_retries: 3        # Number of retries on upstream failure
 								    input_filters:        # Filters applied before forwarding to LLM
 								      - input_guards
 								    output_filters:       # Filters applied to LLM responses before returning to client
 								      - input_guards
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
 								  # Prompt listener for function calling (for prompt_targets)
 								  - type: prompt
 								    name: prompt_function_listener
 								    address: 0.0.0.0
 								    port: 10000
-												update config (#93)


											
										
										
											2024-09-30 17:49:05 -07:00
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								# Reusable service endpoints
 								endpoints:
 								  app_server:
 								    endpoint: 127.0.0.1:80
 								    connect_timeout: 0.005s
-												expand configuration reference with missing fields (#851)
											
										
										
											2026-03-30 15:25:05 -04:00
+								    protocol: http        # http or https
-												Docs branch - v1 of our tech docs (#69)

* added the first set of docs for our technical docs

* more docuemtnation changes

* added support for prompt processing and updated life of a request

* updated docs to including getting help sections and updated life of a request

* committing local changes for getting started guide, sample applications, and full reference spec for prompt-config

* updated configuration reference, added sample app skeleton, updated favico

* fixed the configuration refernce file, and made minor changes to the intent detection. commit v1 for now

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
Co-authored-by: Adil Hafeez <adil@katanemo.com>
											
										
										
											2024-09-20 17:08:42 -07:00
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								  mistral_local:
 								    endpoint: 127.0.0.1:8001
-												expand configuration reference with missing fields (#851)
											
										
										
											2026-03-30 15:25:05 -04:00
+								  secure_service:
 								    endpoint: api.example.com:443
 								    protocol: https
 								    http_host: api.example.com  # Override the Host header sent upstream
 								# Optional top-level system prompt applied to all prompt_targets
 								system_prompt: |
 								  You are a helpful assistant. Always respond concisely and accurately.
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								# Prompt targets for function calling and API orchestration
-												Docs branch - v1 of our tech docs (#69)

* added the first set of docs for our technical docs

* more docuemtnation changes

* added support for prompt processing and updated life of a request

* updated docs to including getting help sections and updated life of a request

* committing local changes for getting started guide, sample applications, and full reference spec for prompt-config

* updated configuration reference, added sample app skeleton, updated favico

* fixed the configuration refernce file, and made minor changes to the intent detection. commit v1 for now

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
Co-authored-by: Adil Hafeez <adil@katanemo.com>
											
										
										
											2024-09-20 17:08:42 -07:00
+								prompt_targets:
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								  - name: get_current_weather
 								    description: Get current weather at a location.
 								    parameters:
 								      - name: location
 								        description: The location to get the weather for
 								        required: true
 								        type: string
 								        format: City, State
 								      - name: days
 								        description: the number of days for the request
 								        required: true
 								        type: int
-												update config (#93)


											
										
										
											2024-09-30 17:49:05 -07:00
+								    endpoint:
 								      name: app_server
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								      path: /weather
-												update getting started guide and add llm gateway and prompt gateway samples (#330)


											
										
										
											2024-12-06 14:37:33 -08:00
+								      http_method: POST
-												expand configuration reference with missing fields (#851)
											
										
										
											2026-03-30 15:25:05 -04:00
+								    # Per-target system prompt (overrides top-level system_prompt for this target)
 								    system_prompt: You are a weather expert. Provide accurate and concise weather information.
 								    # auto_llm_dispatch_on_response: when true, the LLM is called again with the
 								    # function response to produce a final natural-language answer for the user
 								    auto_llm_dispatch_on_response: true
 								# Rate limits - control token usage per model and request selector
 								ratelimits:
 								  - model: openai/gpt-4o
 								    selector:
 								      key: x-user-id       # HTTP header key used to identify the rate-limit subject
 								      value: "*"           # Wildcard matches any value; use a specific string to target one
 								    limit:
 								      tokens: 100000       # Maximum tokens allowed in the given time unit
 								      unit: hour           # Time unit: "minute", "hour", or "day"
 								  - model: openai/gpt-4o-mini
 								    selector:
 								      key: x-org-id
 								      value: acme-corp
 								    limit:
 								      tokens: 500000
 								      unit: day
 								# Global behavior overrides
 								overrides:
 								  # Threshold for routing a request to a prompt_target (0.0–1.0). Lower = more permissive.
 								  prompt_target_intent_matching_threshold: 0.7
 								  # Trim conversation history to fit within the model's context window
 								  optimize_context_window: true
 								  # Use Plano's agent orchestrator for multi-agent request routing
-												model routing: cost/latency ranking with ranked fallback list (#849)
											
										
										
											2026-03-30 13:46:52 -07:00
+								  use_agent_orchestrator: false
-												expand configuration reference with missing fields (#851)
											
										
										
											2026-03-30 15:25:05 -04:00
+								  # Connect timeout for upstream provider clusters (e.g., "5s", "10s"). Default: "5s"
 								  upstream_connect_timeout: 10s
 								  # Path to the trusted CA bundle for upstream TLS verification
 								  upstream_tls_ca_path: /etc/ssl/certs/ca-certificates.crt
 								  # Model used for intent-based LLM routing (must be listed in model_providers)
-												use plano-orchestrator for LLM routing, remove arch-router (#886)
											
										
										
											2026-04-15 16:41:42 -07:00
+								  llm_routing_model: Plano-Orchestrator
-												expand configuration reference with missing fields (#851)
											
										
										
											2026-03-30 15:25:05 -04:00
+								  # Model used for agent orchestration (must be listed in model_providers)
 								  agent_orchestration_model: Plano-Orchestrator
-												add overrides.disable_signals to skip CPU-heavy signal analysis (#906)
											
										
										
											2026-04-23 11:38:29 -07:00
+								  # Disable agentic signal analysis (frustration, repetition, escalation, etc.)
 								  # on LLM responses to save CPU. Default: false.
 								  disable_signals: false
-												expand configuration reference with missing fields (#851)
											
										
										
											2026-03-30 15:25:05 -04:00
-												Model affinity for consistent model selection in agentic loops (#827)
											
										
										
											2026-04-08 17:32:02 -07:00
+								# Model affinity — pin routing decisions for agentic loops
 								routing:
 								  session_ttl_seconds: 600    # How long a pinned session lasts (default: 600s / 10 min)
 								  session_max_entries: 10000  # Max cached sessions before eviction (upper limit: 10000)
-												Redis-backed session cache for cross-replica model affinity (#879)

* add pluggable session cache with Redis backend

* add Redis session affinity demos (Docker Compose and Kubernetes)

* address PR review feedback on session cache

* document Redis session cache backend for model affinity

* sync rendered config reference with session_cache addition

* add tenant-scoped Redis session cache keys and remove dead log_affinity_hit

- Add tenant_header to SessionCacheConfig; when set, cache keys are scoped
  as plano:affinity:{tenant_id}:{session_id} for multi-tenant isolation
- Thread tenant_id through RouterService, routing_service, and llm handlers
- Use Cow<'_, str> in session_key to avoid allocation when no tenant is set
- Remove unused log_affinity_hit (logging was already inlined at call sites)

* remove session_affinity_redis and session_affinity_redis_k8s demos
											
										
										
											2026-04-13 19:30:47 -07:00
+								  # session_cache controls the backend used to store affinity state.
 								  # "memory" (default) is in-process and works for single-instance deployments.
 								  # "redis" shares state across replicas — required for multi-replica / Kubernetes setups.
 								  session_cache:
 								    type: memory              # "memory" (default) or "redis"
 								    # url is required when type is "redis". Supports redis:// and rediss:// (TLS).
 								    # url: redis://localhost:6379
 								    # tenant_header: x-org-id  # optional; when set, keys are scoped as plano:affinity:{tenant_id}:{session_id}
-												Model affinity for consistent model selection in agentic loops (#827)
											
										
										
											2026-04-08 17:32:02 -07:00
-												expand configuration reference with missing fields (#851)
											
										
										
											2026-03-30 15:25:05 -04:00
+								# State storage for multi-turn conversation history
 								state_storage:
 								  type: memory            # "memory" (in-process) or "postgres" (persistent)
 								  # connection_string is required when type is postgres.
 								  # Supports environment variable substitution: $VAR or ${VAR}
 								  # connection_string: postgresql://user:$DB_PASS@localhost:5432/plano
 								# Input guardrails applied globally to all incoming requests
 								prompt_guards:
 								  input_guards:
 								    jailbreak:
 								      on_exception:
 								        message: "I'm sorry, I can't help with that request."
-												Fix errors and improve Doc (#143)

* Fix link issues and add icons

* Improve Doc

* fix test

* making minor modifications to shuguangs' doc changes

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
Co-authored-by: Adil Hafeez <adil@katanemo.com>
											
										
										
											2024-10-08 13:18:34 -07:00
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								# OpenTelemetry tracing configuration
-												Split listener (#141)


											
										
										
											2024-10-08 16:24:08 -07:00
+								tracing:
-												Update docs to Plano (#639)


											
										
										
											2025-12-23 17:14:50 -08:00
+								  # Random sampling percentage (1-100)
 								  random_sampling: 100
-												expand configuration reference with missing fields (#851)
											
										
										
											2026-03-30 15:25:05 -04:00
+								  # Include internal Plano spans in traces
 								  trace_arch_internal: false
 								  # gRPC endpoint for OpenTelemetry collector (e.g., Jaeger, Tempo)
 								  opentracing_grpc_endpoint: http://localhost:4317
 								  span_attributes:
 								    # Propagate request headers whose names start with these prefixes as span attributes
 								    header_prefixes:
 								      - x-user-
 								      - x-org-
 								    # Static key/value pairs added to every span
 								    static:
 								      environment: production
 								      service.team: platform