diff --git a/docs/source/resources/includes/plano_config_full_reference.yaml b/docs/source/resources/includes/plano_config_full_reference.yaml index 6d76224a..452bc17a 100644 --- a/docs/source/resources/includes/plano_config_full_reference.yaml +++ b/docs/source/resources/includes/plano_config_full_reference.yaml @@ -36,35 +36,20 @@ model_providers: # can select the best model for each request based on intent. Requires the # Arch-Router model (or equivalent) to be configured in overrides.llm_routing_model. # Each preference has a name (short label) and a description (used for intent matching). - - model: openai/gpt-4o - name: gpt-4o-coding # Optional friendly name to distinguish multiple entries for same model - access_key: $OPENAI_API_KEY + - model: groq/llama-3.3-70b-versatile + access_key: $GROQ_API_KEY routing_preferences: - name: code generation description: generating new code snippets, functions, or boilerplate based on user prompts or requirements - name: code review description: reviewing, analyzing, and suggesting improvements to existing code - - model: anthropic/claude-sonnet-4-0 - name: claude-sonnet-reasoning - access_key: $ANTHROPIC_API_KEY - routing_preferences: - - name: reasoning - description: complex multi-step reasoning, math, logic puzzles, and analytical tasks - # passthrough_auth: forwards the client's Authorization header upstream instead of # using the configured access_key. Useful for LiteLLM or similar proxy setups. - model: openai/gpt-4o-litellm base_url: https://litellm.example.com passthrough_auth: true - # provider_interface: specifies the API format when the provider doesn't match - # the default inferred from the model name. Supported: openai, claude, gemini, - # mistral, groq, deepseek, plano - - model: groq/llama-3.3-70b-versatile - access_key: $GROQ_API_KEY - provider_interface: groq - # Custom/self-hosted endpoint with explicit http_host override - model: openai/llama-3.3-70b base_url: https://api.custom-provider.com @@ -179,7 +164,7 @@ overrides: # Trim conversation history to fit within the model's context window optimize_context_window: true # Use Plano's agent orchestrator for multi-agent request routing - use_agent_orchestrator: true + use_agent_orchestrator: false # Connect timeout for upstream provider clusters (e.g., "5s", "10s"). Default: "5s" upstream_connect_timeout: 10s # Path to the trusted CA bundle for upstream TLS verification diff --git a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml index f63ab831..c4a17762 100644 --- a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml +++ b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml @@ -8,6 +8,7 @@ endpoints: connect_timeout: 0.005s endpoint: 127.0.0.1 port: 80 + protocol: http flight_agent: endpoint: localhost port: 10520 @@ -19,6 +20,11 @@ endpoints: mistral_local: endpoint: 127.0.0.1 port: 8001 + secure_service: + endpoint: api.example.com + http_host: api.example.com + port: 443 + protocol: https weather_agent: endpoint: localhost port: 10510 @@ -38,6 +44,9 @@ listeners: router: plano_orchestrator_v1 type: agent - address: 0.0.0.0 + input_filters: + - input_guards + max_retries: 3 model_providers: - access_key: $OPENAI_API_KEY default: true @@ -56,6 +65,16 @@ listeners: model: ministral-3b-latest name: mistral/ministral-3b-latest provider_interface: mistral + - access_key: $GROQ_API_KEY + model: llama-3.3-70b-versatile + name: groq/llama-3.3-70b-versatile + provider_interface: groq + routing_preferences: + - description: generating new code snippets, functions, or boilerplate based on + user prompts or requirements + name: code generation + - description: reviewing, analyzing, and suggesting improvements to existing code + name: code review - base_url: https://litellm.example.com cluster_name: openai_litellm.example.com endpoint: litellm.example.com @@ -65,8 +84,21 @@ listeners: port: 443 protocol: https provider_interface: openai + - access_key: $CUSTOM_API_KEY + base_url: https://api.custom-provider.com + cluster_name: openai_api.custom-provider.com + endpoint: api.custom-provider.com + http_host: api.custom-provider.com + model: llama-3.3-70b + name: openai/llama-3.3-70b + port: 443 + protocol: https + provider_interface: openai name: model_1 + output_filters: + - input_guards port: 12000 + timeout: 30s type: model - address: 0.0.0.0 name: prompt_function_listener @@ -95,6 +127,16 @@ model_providers: model: ministral-3b-latest name: mistral/ministral-3b-latest provider_interface: mistral +- access_key: $GROQ_API_KEY + model: llama-3.3-70b-versatile + name: groq/llama-3.3-70b-versatile + provider_interface: groq + routing_preferences: + - description: generating new code snippets, functions, or boilerplate based on + user prompts or requirements + name: code generation + - description: reviewing, analyzing, and suggesting improvements to existing code + name: code review - base_url: https://litellm.example.com cluster_name: openai_litellm.example.com endpoint: litellm.example.com @@ -104,6 +146,20 @@ model_providers: port: 443 protocol: https provider_interface: openai +- access_key: $CUSTOM_API_KEY + base_url: https://api.custom-provider.com + cluster_name: openai_api.custom-provider.com + endpoint: api.custom-provider.com + http_host: api.custom-provider.com + model: llama-3.3-70b + name: openai/llama-3.3-70b + port: 443 + protocol: https + provider_interface: openai +- internal: true + model: Arch-Router + name: arch-router + provider_interface: plano - internal: true model: Arch-Function name: arch-function @@ -112,8 +168,22 @@ model_providers: model: Plano-Orchestrator name: plano/orchestrator provider_interface: plano +overrides: + agent_orchestration_model: Plano-Orchestrator + llm_routing_model: Arch-Router + optimize_context_window: true + prompt_target_intent_matching_threshold: 0.7 + upstream_connect_timeout: 10s + upstream_tls_ca_path: /etc/ssl/certs/ca-certificates.crt + use_agent_orchestrator: false +prompt_guards: + input_guards: + jailbreak: + on_exception: + message: I'm sorry, I can't help with that request. prompt_targets: -- description: Get current weather at a location. +- auto_llm_dispatch_on_response: true + description: Get current weather at a location. endpoint: http_method: POST name: app_server @@ -129,7 +199,36 @@ prompt_targets: name: days required: true type: int + system_prompt: You are a weather expert. Provide accurate and concise weather information. +ratelimits: +- limit: + tokens: 100000 + unit: hour + model: openai/gpt-4o + selector: + key: x-user-id + value: '*' +- limit: + tokens: 500000 + unit: day + model: openai/gpt-4o-mini + selector: + key: x-org-id + value: acme-corp +state_storage: + type: memory +system_prompt: 'You are a helpful assistant. Always respond concisely and accurately. + + ' tracing: opentracing_grpc_endpoint: http://localhost:4317 random_sampling: 100 + span_attributes: + header_prefixes: + - x-user- + - x-org- + static: + environment: production + service.team: platform + trace_arch_internal: false version: v0.3.0