diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index 522968c9..347de5d6 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -185,6 +185,41 @@ def validate_and_render_schema(): f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, path must be empty" ) + routing = config_yaml.get("routing", {}) + policy_provider = routing.get("policy_provider") + if policy_provider: + policy_url = policy_provider.get("url") + if not policy_url: + raise Exception( + "routing.policy_provider.url is required when policy_provider is set" + ) + if "$" in policy_url: + policy_url = os.path.expandvars(policy_url) + policy_url_result = urlparse(policy_url) + if ( + policy_url_result.scheme not in ["http", "https"] + or not policy_url_result.hostname + ): + raise Exception( + f"Invalid routing.policy_provider.url {policy_provider.get('url')}, must be a valid http/https URL" + ) + + ttl_seconds = policy_provider.get("ttl_seconds") + if ttl_seconds is not None and ttl_seconds <= 0: + raise Exception( + "routing.policy_provider.ttl_seconds must be greater than 0" + ) + + headers = policy_provider.get("headers") + if headers is not None: + if not isinstance(headers, dict): + raise Exception("routing.policy_provider.headers must be an object") + for key, value in headers.items(): + if not isinstance(key, str) or not isinstance(value, str): + raise Exception( + "routing.policy_provider.headers must contain string keys and string values" + ) + llms_with_endpoint = [] llms_with_endpoint_cluster_names = set() updated_model_providers = [] diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index b63cb824..e202a705 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -411,10 +411,27 @@ properties: routing: type: object properties: + model_provider: + type: string llm_provider: type: string model: type: string + policy_provider: + type: object + properties: + url: + type: string + headers: + type: object + additionalProperties: + type: string + ttl_seconds: + type: integer + minimum: 1 + additionalProperties: false + required: + - url additionalProperties: false state_storage: type: object diff --git a/docs/source/guides/llm_router.rst b/docs/source/guides/llm_router.rst index 188b1e30..d6a60942 100644 --- a/docs/source/guides/llm_router.rst +++ b/docs/source/guides/llm_router.rst @@ -193,6 +193,65 @@ Clients can let the router decide or still specify aliases: # No model specified - router will analyze and choose claude-sonnet-4-5 ) +External Policy Provider (policy_id) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For multitenant deployments, Plano can fetch routing preferences from an external HTTP endpoint using a ``policy_id`` provided by the caller. + +Resolution order is: + +1. Inline ``routing_policy`` in request payload +2. ``policy_id`` lookup via ``routing.policy_provider`` +3. Metadata ``plano_preference_config`` +4. Config-file ``routing_preferences`` + +.. code-block:: yaml + :caption: External Policy Provider Configuration + + routing: + model_provider: arch-router + model: Arch-Router + policy_provider: + url: https://my-service.internal/v1/routing-policy + headers: + Authorization: Bearer $POLICY_API_KEY + ttl_seconds: 300 + +When ``policy_id`` is provided and no inline ``routing_policy`` is present, Plano fetches: + +.. code-block:: text + + GET https://my-service.internal/v1/routing-policy?policy_id=customer-abc-123 + +.. code-block:: json + :caption: Routing request with policy_id + + { + "messages": [{"role": "user", "content": "Help me summarize this"}], + "policy_id": "customer-abc-123" + } + +.. code-block:: json + :caption: Expected response from external policy endpoint + + { + "policy_id": "customer-abc-123", + "routing_preferences": [ + { + "model": "openai/gpt-4o", + "routing_preferences": [ + {"name": "quick response", "description": "fast lightweight responses"} + ] + }, + { + "model": "anthropic/claude-sonnet-4-0", + "routing_preferences": [ + {"name": "deep analysis", "description": "comprehensive detailed analysis"} + ] + } + ] + } + Arch-Router ----------- diff --git a/docs/source/resources/includes/plano_config_full_reference.yaml b/docs/source/resources/includes/plano_config_full_reference.yaml index a650baea..f420d1c4 100644 --- a/docs/source/resources/includes/plano_config_full_reference.yaml +++ b/docs/source/resources/includes/plano_config_full_reference.yaml @@ -47,6 +47,18 @@ model_aliases: smart-llm: target: gpt-4o +# Optional routing policy provider for multitenant preference-based routing. +# If policy_id is included in the request and inline routing_policy is absent, +# Plano fetches routing preferences from this endpoint and caches by policy_id. +routing: + model_provider: arch-router + model: Arch-Router + policy_provider: + url: https://my-service.internal/v1/routing-policy + headers: + Authorization: Bearer $POLICY_API_KEY + ttl_seconds: 300 + # HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access listeners: # Agent listener for routing requests to multiple agents