mirror of
https://github.com/katanemo/plano.git
synced 2026-05-27 14:17:15 +02:00
feat: add provider arbitrage policy and fallback routing
This commit is contained in:
parent
de2d8847f3
commit
07ad4c6ae2
10 changed files with 670 additions and 57 deletions
|
|
@ -191,6 +191,7 @@ def validate_and_render_schema():
|
|||
llms_with_usage = []
|
||||
model_name_keys = set()
|
||||
model_usage_name_keys = set()
|
||||
arbitrage_rank_validations = []
|
||||
|
||||
print("listeners: ", listeners)
|
||||
|
||||
|
|
@ -254,6 +255,30 @@ def validate_and_render_schema():
|
|||
f"Model {model_name} has routing_preferences but uses wildcard (*). Models with routing preferences cannot be wildcards."
|
||||
)
|
||||
|
||||
arbitrage_policy = model_provider.get("arbitrage_policy")
|
||||
if arbitrage_policy:
|
||||
arbitrage_enabled = arbitrage_policy.get("enabled", False)
|
||||
arbitrage_rank = arbitrage_policy.get("rank", [])
|
||||
|
||||
if arbitrage_enabled and len(arbitrage_rank) == 0:
|
||||
raise Exception(
|
||||
f"Model {model_name} has arbitrage_policy.enabled=true but rank is empty. Please provide at least one ranked candidate."
|
||||
)
|
||||
|
||||
if arbitrage_enabled and is_wildcard:
|
||||
raise Exception(
|
||||
f"Model {model_name} has arbitrage_policy.enabled=true but uses wildcard (*). Arbitrage policy requires deterministic model candidates."
|
||||
)
|
||||
|
||||
if len(arbitrage_rank) != len(set(arbitrage_rank)):
|
||||
raise Exception(
|
||||
f"Model {model_name} has duplicate entries in arbitrage_policy.rank. Please provide each candidate once in ranked order."
|
||||
)
|
||||
|
||||
if arbitrage_enabled:
|
||||
provider_label = model_provider.get("name") or model_name
|
||||
arbitrage_rank_validations.append((provider_label, arbitrage_rank))
|
||||
|
||||
# Validate azure_openai and ollama provider requires base_url
|
||||
if (provider in SUPPORTED_PROVIDERS_WITH_BASE_URL) and model_provider.get(
|
||||
"base_url"
|
||||
|
|
@ -417,6 +442,16 @@ def validate_and_render_schema():
|
|||
}
|
||||
)
|
||||
|
||||
arbitrage_allowed_targets = model_name_keys.union(model_provider_name_set)
|
||||
for provider_name, rank in arbitrage_rank_validations:
|
||||
for ranked_candidate in rank:
|
||||
if ranked_candidate not in arbitrage_allowed_targets:
|
||||
raise Exception(
|
||||
f"Model provider '{provider_name}' has arbitrage_policy.rank candidate '{ranked_candidate}' "
|
||||
"that is not defined in model_providers. "
|
||||
"Use a configured provider name, model id, or provider/model slug."
|
||||
)
|
||||
|
||||
config_yaml["model_providers"] = deepcopy(updated_model_providers)
|
||||
|
||||
listeners_with_provider = 0
|
||||
|
|
|
|||
|
|
@ -289,6 +289,107 @@ llm_providers:
|
|||
tracing:
|
||||
random_sampling: 100
|
||||
|
||||
""",
|
||||
},
|
||||
{
|
||||
"id": "arbitrage_policy_enabled_requires_non_empty_rank",
|
||||
"expected_error": "arbitrage_policy.enabled=true but rank is empty",
|
||||
"plano_config": """
|
||||
version: v0.1.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
arbitrage_policy:
|
||||
enabled: true
|
||||
rank: []
|
||||
""",
|
||||
},
|
||||
{
|
||||
"id": "arbitrage_policy_rank_candidate_must_exist",
|
||||
"expected_error": "arbitrage_policy.rank candidate 'openai/not-configured'",
|
||||
"plano_config": """
|
||||
version: v0.1.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
arbitrage_policy:
|
||||
enabled: true
|
||||
rank:
|
||||
- openai/not-configured
|
||||
""",
|
||||
},
|
||||
{
|
||||
"id": "arbitrage_policy_rejects_duplicate_rank_entries",
|
||||
"expected_error": "duplicate entries in arbitrage_policy.rank",
|
||||
"plano_config": """
|
||||
version: v0.1.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
arbitrage_policy:
|
||||
enabled: true
|
||||
rank:
|
||||
- openai/gpt-4o-mini
|
||||
- openai/gpt-4o-mini
|
||||
""",
|
||||
},
|
||||
{
|
||||
"id": "arbitrage_policy_valid_rank",
|
||||
"expected_error": None,
|
||||
"plano_config": """
|
||||
version: v0.1.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
- model: groq/llama-3.1-8b-instant
|
||||
access_key: $GROQ_API_KEY
|
||||
arbitrage_policy:
|
||||
enabled: true
|
||||
rank:
|
||||
- openai/gpt-4o-mini
|
||||
- openai/gpt-4o
|
||||
on_failure:
|
||||
fallback_to_primary: true
|
||||
""",
|
||||
},
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue