Rename all arch references to plano (#745)
* Rename all arch references to plano across the codebase
Complete rebrand from "Arch"/"archgw" to "Plano" including:
- Config files: arch_config_schema.yaml, workflow, demo configs
- Environment variables: ARCH_CONFIG_* → PLANO_CONFIG_*
- Python CLI: variables, functions, file paths, docker mounts
- Rust crates: config paths, log messages, metadata keys
- Docker/build: Dockerfile, supervisord, .dockerignore, .gitignore
- Docker Compose: volume mounts and env vars across all demos/tests
- GitHub workflows: job/step names
- Shell scripts: log messages
- Demos: Python code, READMEs, VS Code configs, Grafana dashboard
- Docs: RST includes, code comments, config references
- Package metadata: package.json, pyproject.toml, uv.lock
External URLs (docs.archgw.com, github.com/katanemo/archgw) left as-is.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Update remaining arch references in docs
- Rename RST cross-reference labels: arch_access_logging, arch_overview_tracing, arch_overview_threading → plano_*
- Update label references in request_lifecycle.rst
- Rename arch_config_state_storage_example.yaml → plano_config_state_storage_example.yaml
- Update config YAML comments: "Arch creates/uses" → "Plano creates/uses"
- Update "the Arch gateway" → "the Plano gateway" in configuration_reference.rst
- Update arch_config_schema.yaml reference in provider_models.py
- Rename arch_agent_router → plano_agent_router in config example
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Fix remaining arch references found in second pass
- config/docker-compose.dev.yaml: ARCH_CONFIG_FILE → PLANO_CONFIG_FILE,
arch_config.yaml → plano_config.yaml, archgw_logs → plano_logs
- config/test_passthrough.yaml: container mount path
- tests/e2e/docker-compose.yaml: source file path (was still arch_config.yaml)
- cli/planoai/core.py: comment and log message
- crates/brightstaff/src/tracing/constants.rs: doc comment
- tests/{e2e,archgw}/common.py: get_arch_messages → get_plano_messages,
arch_state/arch_messages variables renamed
- tests/{e2e,archgw}/test_prompt_gateway.py: updated imports and usages
- demos/shared/test_runner/{common,test_demos}.py: same renames
- tests/e2e/test_model_alias_routing.py: docstring
- .dockerignore: archgw_modelserver → plano_modelserver
- demos/use_cases/claude_code_router/pretty_model_resolution.sh: container name
Note: x-arch-* HTTP header values and Rust constant names intentionally
preserved for backwards compatibility with existing deployments.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 15:16:56 -08:00
|
|
|
|
# Plano Gateway configuration version
|
2025-12-23 17:14:50 -08:00
|
|
|
|
version: v0.3.0
|
2024-09-20 17:08:42 -07:00
|
|
|
|
|
2025-12-23 17:14:50 -08:00
|
|
|
|
# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
|
|
|
|
|
|
agents:
|
2026-01-15 00:06:28 +01:00
|
|
|
|
- id: weather_agent # Example agent for weather
|
2026-03-05 07:35:25 -08:00
|
|
|
|
url: http://localhost:10510
|
2024-09-30 17:49:05 -07:00
|
|
|
|
|
2026-01-15 00:06:28 +01:00
|
|
|
|
- id: flight_agent # Example agent for flights
|
2026-03-05 07:35:25 -08:00
|
|
|
|
url: http://localhost:10520
|
2024-09-30 17:49:05 -07:00
|
|
|
|
|
2025-12-23 17:14:50 -08:00
|
|
|
|
# MCP filters applied to requests/responses (e.g., input validation, query rewriting)
|
|
|
|
|
|
filters:
|
2026-01-15 00:06:28 +01:00
|
|
|
|
- id: input_guards # Example filter for input validation
|
2026-03-05 07:35:25 -08:00
|
|
|
|
url: http://localhost:10500
|
2025-12-23 17:14:50 -08:00
|
|
|
|
# type: mcp (default)
|
|
|
|
|
|
# transport: streamable-http (default)
|
|
|
|
|
|
# tool: input_guards (default - same as filter id)
|
|
|
|
|
|
|
|
|
|
|
|
# LLM provider configurations with API keys and model routing
|
|
|
|
|
|
model_providers:
|
|
|
|
|
|
- model: openai/gpt-4o
|
2024-10-30 17:54:51 -07:00
|
|
|
|
access_key: $OPENAI_API_KEY
|
2024-09-20 17:08:42 -07:00
|
|
|
|
default: true
|
2024-09-30 17:49:05 -07:00
|
|
|
|
|
2025-12-23 17:14:50 -08:00
|
|
|
|
- model: openai/gpt-4o-mini
|
|
|
|
|
|
access_key: $OPENAI_API_KEY
|
|
|
|
|
|
|
|
|
|
|
|
- model: anthropic/claude-sonnet-4-0
|
|
|
|
|
|
access_key: $ANTHROPIC_API_KEY
|
2024-09-30 17:49:05 -07:00
|
|
|
|
|
2025-12-23 17:14:50 -08:00
|
|
|
|
- model: mistral/ministral-3b-latest
|
|
|
|
|
|
access_key: $MISTRAL_API_KEY
|
2024-09-20 17:08:42 -07:00
|
|
|
|
|
2026-03-30 15:25:05 -04:00
|
|
|
|
# routing_preferences: tags a model with named capabilities so Plano's LLM router
|
|
|
|
|
|
# can select the best model for each request based on intent. Requires the
|
|
|
|
|
|
# Arch-Router model (or equivalent) to be configured in overrides.llm_routing_model.
|
|
|
|
|
|
# Each preference has a name (short label) and a description (used for intent matching).
|
2026-03-30 13:46:52 -07:00
|
|
|
|
- model: groq/llama-3.3-70b-versatile
|
|
|
|
|
|
access_key: $GROQ_API_KEY
|
2026-03-30 15:25:05 -04:00
|
|
|
|
routing_preferences:
|
|
|
|
|
|
- name: code generation
|
|
|
|
|
|
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
|
|
|
|
|
- name: code review
|
|
|
|
|
|
description: reviewing, analyzing, and suggesting improvements to existing code
|
|
|
|
|
|
|
|
|
|
|
|
# passthrough_auth: forwards the client's Authorization header upstream instead of
|
|
|
|
|
|
# using the configured access_key. Useful for LiteLLM or similar proxy setups.
|
2026-01-15 00:06:28 +01:00
|
|
|
|
- model: openai/gpt-4o-litellm
|
|
|
|
|
|
base_url: https://litellm.example.com
|
|
|
|
|
|
passthrough_auth: true
|
2025-12-23 17:14:50 -08:00
|
|
|
|
|
2026-03-30 15:25:05 -04:00
|
|
|
|
# Custom/self-hosted endpoint with explicit http_host override
|
|
|
|
|
|
- model: openai/llama-3.3-70b
|
|
|
|
|
|
base_url: https://api.custom-provider.com
|
|
|
|
|
|
http_host: api.custom-provider.com
|
|
|
|
|
|
access_key: $CUSTOM_API_KEY
|
|
|
|
|
|
|
2025-12-23 17:14:50 -08:00
|
|
|
|
# Model aliases - use friendly names instead of full provider model names
|
2025-09-18 18:36:30 -07:00
|
|
|
|
model_aliases:
|
2025-12-23 17:14:50 -08:00
|
|
|
|
fast-llm:
|
|
|
|
|
|
target: gpt-4o-mini
|
|
|
|
|
|
|
|
|
|
|
|
smart-llm:
|
2025-09-18 18:36:30 -07:00
|
|
|
|
target: gpt-4o
|
|
|
|
|
|
|
2025-12-23 17:14:50 -08:00
|
|
|
|
# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
|
|
|
|
|
|
listeners:
|
|
|
|
|
|
# Agent listener for routing requests to multiple agents
|
|
|
|
|
|
- type: agent
|
|
|
|
|
|
name: travel_booking_service
|
|
|
|
|
|
port: 8001
|
|
|
|
|
|
router: plano_orchestrator_v1
|
|
|
|
|
|
address: 0.0.0.0
|
|
|
|
|
|
agents:
|
|
|
|
|
|
- id: rag_agent
|
|
|
|
|
|
description: virtual assistant for retrieval augmented generation tasks
|
2026-03-18 17:58:20 -07:00
|
|
|
|
input_filters:
|
2025-12-23 17:14:50 -08:00
|
|
|
|
- input_guards
|
|
|
|
|
|
|
|
|
|
|
|
# Model listener for direct LLM access
|
|
|
|
|
|
- type: model
|
|
|
|
|
|
name: model_1
|
|
|
|
|
|
address: 0.0.0.0
|
|
|
|
|
|
port: 12000
|
2026-03-30 15:25:05 -04:00
|
|
|
|
timeout: 30s # Request timeout (e.g. "30s", "60s")
|
|
|
|
|
|
max_retries: 3 # Number of retries on upstream failure
|
|
|
|
|
|
input_filters: # Filters applied before forwarding to LLM
|
|
|
|
|
|
- input_guards
|
|
|
|
|
|
output_filters: # Filters applied to LLM responses before returning to client
|
|
|
|
|
|
- input_guards
|
2025-12-23 17:14:50 -08:00
|
|
|
|
|
|
|
|
|
|
# Prompt listener for function calling (for prompt_targets)
|
|
|
|
|
|
- type: prompt
|
|
|
|
|
|
name: prompt_function_listener
|
|
|
|
|
|
address: 0.0.0.0
|
|
|
|
|
|
port: 10000
|
2024-09-30 17:49:05 -07:00
|
|
|
|
|
2025-12-23 17:14:50 -08:00
|
|
|
|
# Reusable service endpoints
|
|
|
|
|
|
endpoints:
|
|
|
|
|
|
app_server:
|
|
|
|
|
|
endpoint: 127.0.0.1:80
|
|
|
|
|
|
connect_timeout: 0.005s
|
2026-03-30 15:25:05 -04:00
|
|
|
|
protocol: http # http or https
|
2024-09-20 17:08:42 -07:00
|
|
|
|
|
2025-12-23 17:14:50 -08:00
|
|
|
|
mistral_local:
|
|
|
|
|
|
endpoint: 127.0.0.1:8001
|
|
|
|
|
|
|
2026-03-30 15:25:05 -04:00
|
|
|
|
secure_service:
|
|
|
|
|
|
endpoint: api.example.com:443
|
|
|
|
|
|
protocol: https
|
|
|
|
|
|
http_host: api.example.com # Override the Host header sent upstream
|
|
|
|
|
|
|
|
|
|
|
|
# Optional top-level system prompt applied to all prompt_targets
|
|
|
|
|
|
system_prompt: |
|
|
|
|
|
|
You are a helpful assistant. Always respond concisely and accurately.
|
|
|
|
|
|
|
2025-12-23 17:14:50 -08:00
|
|
|
|
# Prompt targets for function calling and API orchestration
|
2024-09-20 17:08:42 -07:00
|
|
|
|
prompt_targets:
|
2025-12-23 17:14:50 -08:00
|
|
|
|
- name: get_current_weather
|
|
|
|
|
|
description: Get current weather at a location.
|
|
|
|
|
|
parameters:
|
|
|
|
|
|
- name: location
|
|
|
|
|
|
description: The location to get the weather for
|
|
|
|
|
|
required: true
|
|
|
|
|
|
type: string
|
|
|
|
|
|
format: City, State
|
|
|
|
|
|
- name: days
|
|
|
|
|
|
description: the number of days for the request
|
|
|
|
|
|
required: true
|
|
|
|
|
|
type: int
|
2024-09-30 17:49:05 -07:00
|
|
|
|
endpoint:
|
|
|
|
|
|
name: app_server
|
2025-12-23 17:14:50 -08:00
|
|
|
|
path: /weather
|
2024-12-06 14:37:33 -08:00
|
|
|
|
http_method: POST
|
2026-03-30 15:25:05 -04:00
|
|
|
|
# Per-target system prompt (overrides top-level system_prompt for this target)
|
|
|
|
|
|
system_prompt: You are a weather expert. Provide accurate and concise weather information.
|
|
|
|
|
|
# auto_llm_dispatch_on_response: when true, the LLM is called again with the
|
|
|
|
|
|
# function response to produce a final natural-language answer for the user
|
|
|
|
|
|
auto_llm_dispatch_on_response: true
|
|
|
|
|
|
|
|
|
|
|
|
# Rate limits - control token usage per model and request selector
|
|
|
|
|
|
ratelimits:
|
|
|
|
|
|
- model: openai/gpt-4o
|
|
|
|
|
|
selector:
|
|
|
|
|
|
key: x-user-id # HTTP header key used to identify the rate-limit subject
|
|
|
|
|
|
value: "*" # Wildcard matches any value; use a specific string to target one
|
|
|
|
|
|
limit:
|
|
|
|
|
|
tokens: 100000 # Maximum tokens allowed in the given time unit
|
|
|
|
|
|
unit: hour # Time unit: "minute", "hour", or "day"
|
|
|
|
|
|
|
|
|
|
|
|
- model: openai/gpt-4o-mini
|
|
|
|
|
|
selector:
|
|
|
|
|
|
key: x-org-id
|
|
|
|
|
|
value: acme-corp
|
|
|
|
|
|
limit:
|
|
|
|
|
|
tokens: 500000
|
|
|
|
|
|
unit: day
|
|
|
|
|
|
|
|
|
|
|
|
# Global behavior overrides
|
|
|
|
|
|
overrides:
|
|
|
|
|
|
# Threshold for routing a request to a prompt_target (0.0–1.0). Lower = more permissive.
|
|
|
|
|
|
prompt_target_intent_matching_threshold: 0.7
|
|
|
|
|
|
# Trim conversation history to fit within the model's context window
|
|
|
|
|
|
optimize_context_window: true
|
|
|
|
|
|
# Use Plano's agent orchestrator for multi-agent request routing
|
2026-03-30 13:46:52 -07:00
|
|
|
|
use_agent_orchestrator: false
|
2026-03-30 15:25:05 -04:00
|
|
|
|
# Connect timeout for upstream provider clusters (e.g., "5s", "10s"). Default: "5s"
|
|
|
|
|
|
upstream_connect_timeout: 10s
|
|
|
|
|
|
# Path to the trusted CA bundle for upstream TLS verification
|
|
|
|
|
|
upstream_tls_ca_path: /etc/ssl/certs/ca-certificates.crt
|
|
|
|
|
|
# Model used for intent-based LLM routing (must be listed in model_providers)
|
|
|
|
|
|
llm_routing_model: Arch-Router
|
|
|
|
|
|
# Model used for agent orchestration (must be listed in model_providers)
|
|
|
|
|
|
agent_orchestration_model: Plano-Orchestrator
|
|
|
|
|
|
|
2026-04-08 17:32:02 -07:00
|
|
|
|
# Model affinity — pin routing decisions for agentic loops
|
|
|
|
|
|
routing:
|
|
|
|
|
|
session_ttl_seconds: 600 # How long a pinned session lasts (default: 600s / 10 min)
|
|
|
|
|
|
session_max_entries: 10000 # Max cached sessions before eviction (upper limit: 10000)
|
|
|
|
|
|
|
2026-03-30 15:25:05 -04:00
|
|
|
|
# State storage for multi-turn conversation history
|
|
|
|
|
|
state_storage:
|
|
|
|
|
|
type: memory # "memory" (in-process) or "postgres" (persistent)
|
|
|
|
|
|
# connection_string is required when type is postgres.
|
|
|
|
|
|
# Supports environment variable substitution: $VAR or ${VAR}
|
|
|
|
|
|
# connection_string: postgresql://user:$DB_PASS@localhost:5432/plano
|
|
|
|
|
|
|
|
|
|
|
|
# Input guardrails applied globally to all incoming requests
|
|
|
|
|
|
prompt_guards:
|
|
|
|
|
|
input_guards:
|
|
|
|
|
|
jailbreak:
|
|
|
|
|
|
on_exception:
|
|
|
|
|
|
message: "I'm sorry, I can't help with that request."
|
2024-10-08 13:18:34 -07:00
|
|
|
|
|
2025-12-23 17:14:50 -08:00
|
|
|
|
# OpenTelemetry tracing configuration
|
2024-10-08 16:24:08 -07:00
|
|
|
|
tracing:
|
2025-12-23 17:14:50 -08:00
|
|
|
|
# Random sampling percentage (1-100)
|
|
|
|
|
|
random_sampling: 100
|
2026-03-30 15:25:05 -04:00
|
|
|
|
# Include internal Plano spans in traces
|
|
|
|
|
|
trace_arch_internal: false
|
|
|
|
|
|
# gRPC endpoint for OpenTelemetry collector (e.g., Jaeger, Tempo)
|
|
|
|
|
|
opentracing_grpc_endpoint: http://localhost:4317
|
|
|
|
|
|
span_attributes:
|
|
|
|
|
|
# Propagate request headers whose names start with these prefixes as span attributes
|
|
|
|
|
|
header_prefixes:
|
|
|
|
|
|
- x-user-
|
|
|
|
|
|
- x-org-
|
|
|
|
|
|
# Static key/value pairs added to every span
|
|
|
|
|
|
static:
|
|
|
|
|
|
environment: production
|
|
|
|
|
|
service.team: platform
|