mirror of
https://github.com/katanemo/plano.git
synced 2026-06-26 15:39:40 +02:00
feat: routing configuration with external policy provider support
This commit is contained in:
parent
5aeb69e034
commit
2c643532ef
4 changed files with 123 additions and 0 deletions
|
|
@ -185,6 +185,41 @@ def validate_and_render_schema():
|
||||||
f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, path must be empty"
|
f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, path must be empty"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
routing = config_yaml.get("routing", {})
|
||||||
|
policy_provider = routing.get("policy_provider")
|
||||||
|
if policy_provider:
|
||||||
|
policy_url = policy_provider.get("url")
|
||||||
|
if not policy_url:
|
||||||
|
raise Exception(
|
||||||
|
"routing.policy_provider.url is required when policy_provider is set"
|
||||||
|
)
|
||||||
|
if "$" in policy_url:
|
||||||
|
policy_url = os.path.expandvars(policy_url)
|
||||||
|
policy_url_result = urlparse(policy_url)
|
||||||
|
if (
|
||||||
|
policy_url_result.scheme not in ["http", "https"]
|
||||||
|
or not policy_url_result.hostname
|
||||||
|
):
|
||||||
|
raise Exception(
|
||||||
|
f"Invalid routing.policy_provider.url {policy_provider.get('url')}, must be a valid http/https URL"
|
||||||
|
)
|
||||||
|
|
||||||
|
ttl_seconds = policy_provider.get("ttl_seconds")
|
||||||
|
if ttl_seconds is not None and ttl_seconds <= 0:
|
||||||
|
raise Exception(
|
||||||
|
"routing.policy_provider.ttl_seconds must be greater than 0"
|
||||||
|
)
|
||||||
|
|
||||||
|
headers = policy_provider.get("headers")
|
||||||
|
if headers is not None:
|
||||||
|
if not isinstance(headers, dict):
|
||||||
|
raise Exception("routing.policy_provider.headers must be an object")
|
||||||
|
for key, value in headers.items():
|
||||||
|
if not isinstance(key, str) or not isinstance(value, str):
|
||||||
|
raise Exception(
|
||||||
|
"routing.policy_provider.headers must contain string keys and string values"
|
||||||
|
)
|
||||||
|
|
||||||
llms_with_endpoint = []
|
llms_with_endpoint = []
|
||||||
llms_with_endpoint_cluster_names = set()
|
llms_with_endpoint_cluster_names = set()
|
||||||
updated_model_providers = []
|
updated_model_providers = []
|
||||||
|
|
|
||||||
|
|
@ -411,10 +411,27 @@ properties:
|
||||||
routing:
|
routing:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
model_provider:
|
||||||
|
type: string
|
||||||
llm_provider:
|
llm_provider:
|
||||||
type: string
|
type: string
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
|
policy_provider:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
url:
|
||||||
|
type: string
|
||||||
|
headers:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
ttl_seconds:
|
||||||
|
type: integer
|
||||||
|
minimum: 1
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- url
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
state_storage:
|
state_storage:
|
||||||
type: object
|
type: object
|
||||||
|
|
|
||||||
|
|
@ -193,6 +193,65 @@ Clients can let the router decide or still specify aliases:
|
||||||
# No model specified - router will analyze and choose claude-sonnet-4-5
|
# No model specified - router will analyze and choose claude-sonnet-4-5
|
||||||
)
|
)
|
||||||
|
|
||||||
|
External Policy Provider (policy_id)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
For multitenant deployments, Plano can fetch routing preferences from an external HTTP endpoint using a ``policy_id`` provided by the caller.
|
||||||
|
|
||||||
|
Resolution order is:
|
||||||
|
|
||||||
|
1. Inline ``routing_policy`` in request payload
|
||||||
|
2. ``policy_id`` lookup via ``routing.policy_provider``
|
||||||
|
3. Metadata ``plano_preference_config``
|
||||||
|
4. Config-file ``routing_preferences``
|
||||||
|
|
||||||
|
.. code-block:: yaml
|
||||||
|
:caption: External Policy Provider Configuration
|
||||||
|
|
||||||
|
routing:
|
||||||
|
model_provider: arch-router
|
||||||
|
model: Arch-Router
|
||||||
|
policy_provider:
|
||||||
|
url: https://my-service.internal/v1/routing-policy
|
||||||
|
headers:
|
||||||
|
Authorization: Bearer $POLICY_API_KEY
|
||||||
|
ttl_seconds: 300
|
||||||
|
|
||||||
|
When ``policy_id`` is provided and no inline ``routing_policy`` is present, Plano fetches:
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
GET https://my-service.internal/v1/routing-policy?policy_id=customer-abc-123
|
||||||
|
|
||||||
|
.. code-block:: json
|
||||||
|
:caption: Routing request with policy_id
|
||||||
|
|
||||||
|
{
|
||||||
|
"messages": [{"role": "user", "content": "Help me summarize this"}],
|
||||||
|
"policy_id": "customer-abc-123"
|
||||||
|
}
|
||||||
|
|
||||||
|
.. code-block:: json
|
||||||
|
:caption: Expected response from external policy endpoint
|
||||||
|
|
||||||
|
{
|
||||||
|
"policy_id": "customer-abc-123",
|
||||||
|
"routing_preferences": [
|
||||||
|
{
|
||||||
|
"model": "openai/gpt-4o",
|
||||||
|
"routing_preferences": [
|
||||||
|
{"name": "quick response", "description": "fast lightweight responses"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "anthropic/claude-sonnet-4-0",
|
||||||
|
"routing_preferences": [
|
||||||
|
{"name": "deep analysis", "description": "comprehensive detailed analysis"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Arch-Router
|
Arch-Router
|
||||||
-----------
|
-----------
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,18 @@ model_aliases:
|
||||||
smart-llm:
|
smart-llm:
|
||||||
target: gpt-4o
|
target: gpt-4o
|
||||||
|
|
||||||
|
# Optional routing policy provider for multitenant preference-based routing.
|
||||||
|
# If policy_id is included in the request and inline routing_policy is absent,
|
||||||
|
# Plano fetches routing preferences from this endpoint and caches by policy_id.
|
||||||
|
routing:
|
||||||
|
model_provider: arch-router
|
||||||
|
model: Arch-Router
|
||||||
|
policy_provider:
|
||||||
|
url: https://my-service.internal/v1/routing-policy
|
||||||
|
headers:
|
||||||
|
Authorization: Bearer $POLICY_API_KEY
|
||||||
|
ttl_seconds: 300
|
||||||
|
|
||||||
# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
|
# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
|
||||||
listeners:
|
listeners:
|
||||||
# Agent listener for routing requests to multiple agents
|
# Agent listener for routing requests to multiple agents
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue