mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-22 21:28:12 +02:00
feat(chat): route models by provider capabilities
This commit is contained in:
parent
8f20a32571
commit
c28c4f5785
18 changed files with 429 additions and 319 deletions
|
|
@ -75,10 +75,10 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
|
|||
config,
|
||||
"GLOBAL_LLM_CONFIGS",
|
||||
[
|
||||
{"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"},
|
||||
{"id": -2, "litellm_provider": "openai", "model_name": "gpt-free", "api_key": "k1"},
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": "gpt-prem",
|
||||
"api_key": "k2",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -117,7 +117,7 @@ async def test_premium_eligible_auto_prefers_premium_over_free(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -2,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": "gpt-free",
|
||||
"api_key": "k1",
|
||||
"billing_tier": "free",
|
||||
|
|
@ -125,7 +125,7 @@ async def test_premium_eligible_auto_prefers_premium_over_free(monkeypatch):
|
|||
},
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": "gpt-prem",
|
||||
"api_key": "k2",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -164,7 +164,7 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "AZURE_OPENAI",
|
||||
"litellm_provider": "azure",
|
||||
"model_name": "gpt-5.1",
|
||||
"api_key": "k1",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -173,7 +173,7 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
|
|||
},
|
||||
{
|
||||
"id": -2,
|
||||
"provider": "AZURE_OPENAI",
|
||||
"litellm_provider": "azure",
|
||||
"model_name": "gpt-5.4",
|
||||
"api_key": "k2",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -182,7 +182,7 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
|
|||
},
|
||||
{
|
||||
"id": -3,
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": "openai/gpt-5.4",
|
||||
"api_key": "k3",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -222,7 +222,7 @@ async def test_next_turn_reuses_existing_pin(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": "gpt-prem",
|
||||
"api_key": "k2",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -263,7 +263,7 @@ async def test_premium_eligible_auto_can_pin_premium(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": "gpt-prem",
|
||||
"api_key": "k2",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -301,14 +301,14 @@ async def test_premium_ineligible_auto_pins_free_only(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -2,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": "gpt-free",
|
||||
"api_key": "k1",
|
||||
"billing_tier": "free",
|
||||
},
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": "gpt-prem",
|
||||
"api_key": "k2",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -346,14 +346,14 @@ async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -2,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": "gpt-free",
|
||||
"api_key": "k1",
|
||||
"billing_tier": "free",
|
||||
},
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": "gpt-prem",
|
||||
"api_key": "k2",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -391,14 +391,14 @@ async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -2,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": "gpt-free",
|
||||
"api_key": "k1",
|
||||
"billing_tier": "free",
|
||||
},
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": "gpt-prem",
|
||||
"api_key": "k2",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -437,7 +437,7 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
|
|||
config,
|
||||
"GLOBAL_LLM_CONFIGS",
|
||||
[
|
||||
{"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"},
|
||||
{"id": -2, "litellm_provider": "openai", "model_name": "gpt-free", "api_key": "k1"},
|
||||
],
|
||||
)
|
||||
|
||||
|
|
@ -462,7 +462,7 @@ async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
|
|||
config,
|
||||
"GLOBAL_LLM_CONFIGS",
|
||||
[
|
||||
{"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"},
|
||||
{"id": -2, "litellm_provider": "openai", "model_name": "gpt-free", "api_key": "k1"},
|
||||
],
|
||||
)
|
||||
|
||||
|
|
@ -504,7 +504,7 @@ async def test_health_gated_config_is_excluded_from_selection(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": "venice/dead-model",
|
||||
"api_key": "k1",
|
||||
"billing_tier": "free",
|
||||
|
|
@ -514,7 +514,7 @@ async def test_health_gated_config_is_excluded_from_selection(monkeypatch):
|
|||
},
|
||||
{
|
||||
"id": -2,
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": "google/gemini-flash",
|
||||
"api_key": "k1",
|
||||
"billing_tier": "free",
|
||||
|
|
@ -556,7 +556,7 @@ async def test_tier_a_locks_first_premium_user_skips_or(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "AZURE_OPENAI",
|
||||
"litellm_provider": "azure",
|
||||
"model_name": "gpt-5",
|
||||
"api_key": "k-yaml",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -566,7 +566,7 @@ async def test_tier_a_locks_first_premium_user_skips_or(monkeypatch):
|
|||
},
|
||||
{
|
||||
"id": -2,
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": "openai/gpt-5",
|
||||
"api_key": "k-or",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -608,7 +608,7 @@ async def test_tier_a_falls_through_to_or_when_a_pool_empty_for_user(monkeypatch
|
|||
[
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "AZURE_OPENAI",
|
||||
"litellm_provider": "azure",
|
||||
"model_name": "gpt-5",
|
||||
"api_key": "k-yaml",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -618,7 +618,7 @@ async def test_tier_a_falls_through_to_or_when_a_pool_empty_for_user(monkeypatch
|
|||
},
|
||||
{
|
||||
"id": -2,
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": "google/gemini-flash:free",
|
||||
"api_key": "k-or",
|
||||
"billing_tier": "free",
|
||||
|
|
@ -656,7 +656,7 @@ async def test_top_k_picks_only_high_score_models(monkeypatch):
|
|||
high_score_cfgs = [
|
||||
{
|
||||
"id": -i,
|
||||
"provider": "AZURE_OPENAI",
|
||||
"litellm_provider": "azure",
|
||||
"model_name": f"gpt-x-{i}",
|
||||
"api_key": "k",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -668,7 +668,7 @@ async def test_top_k_picks_only_high_score_models(monkeypatch):
|
|||
]
|
||||
low_score_trap = {
|
||||
"id": -99,
|
||||
"provider": "AZURE_OPENAI",
|
||||
"litellm_provider": "azure",
|
||||
"model_name": "tiny-legacy",
|
||||
"api_key": "k",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -729,7 +729,7 @@ async def test_pin_reuse_survives_health_gating_for_existing_pin(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": "venice/dead-model",
|
||||
"api_key": "k",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -739,7 +739,7 @@ async def test_pin_reuse_survives_health_gating_for_existing_pin(monkeypatch):
|
|||
},
|
||||
{
|
||||
"id": -2,
|
||||
"provider": "AZURE_OPENAI",
|
||||
"litellm_provider": "azure",
|
||||
"model_name": "gpt-5",
|
||||
"api_key": "k",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -781,7 +781,7 @@ async def test_pin_reuse_regression_existing_healthy_pin(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "AZURE_OPENAI",
|
||||
"litellm_provider": "azure",
|
||||
"model_name": "gpt-5",
|
||||
"api_key": "k",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -791,7 +791,7 @@ async def test_pin_reuse_regression_existing_healthy_pin(monkeypatch):
|
|||
},
|
||||
{
|
||||
"id": -2,
|
||||
"provider": "AZURE_OPENAI",
|
||||
"litellm_provider": "azure",
|
||||
"model_name": "gpt-5-pro",
|
||||
"api_key": "k",
|
||||
"billing_tier": "premium",
|
||||
|
|
@ -839,7 +839,7 @@ async def test_runtime_cooled_down_pin_is_not_reused(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": "google/gemma-4-26b-a4b-it:free",
|
||||
"api_key": "k",
|
||||
"billing_tier": "free",
|
||||
|
|
@ -849,7 +849,7 @@ async def test_runtime_cooled_down_pin_is_not_reused(monkeypatch):
|
|||
},
|
||||
{
|
||||
"id": -2,
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": "google/gemini-2.5-flash:free",
|
||||
"api_key": "k",
|
||||
"billing_tier": "free",
|
||||
|
|
@ -892,7 +892,7 @@ async def test_clearing_runtime_cooldown_restores_pin_reuse(monkeypatch):
|
|||
[
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": "google/gemma-4-26b-a4b-it:free",
|
||||
"api_key": "k",
|
||||
"billing_tier": "free",
|
||||
|
|
@ -937,7 +937,7 @@ async def test_auto_pin_repin_excludes_previous_config_on_runtime_retry(monkeypa
|
|||
[
|
||||
{
|
||||
"id": -1,
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": "google/gemma-4-26b-a4b-it:free",
|
||||
"api_key": "k",
|
||||
"billing_tier": "free",
|
||||
|
|
@ -947,7 +947,7 @@ async def test_auto_pin_repin_excludes_previous_config_on_runtime_retry(monkeypa
|
|||
},
|
||||
{
|
||||
"id": -2,
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": "google/gemini-2.5-flash:free",
|
||||
"api_key": "k",
|
||||
"billing_tier": "free",
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ def _thread(*, pinned: int | None = None):
|
|||
def _vision_cfg(id_: int, *, tier: str = "free", quality: int = 80) -> dict:
|
||||
return {
|
||||
"id": id_,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": f"vision-{id_}",
|
||||
"api_key": "k",
|
||||
"billing_tier": tier,
|
||||
|
|
@ -87,7 +87,7 @@ def _vision_cfg(id_: int, *, tier: str = "free", quality: int = 80) -> dict:
|
|||
def _text_only_cfg(id_: int, *, tier: str = "free", quality: int = 90) -> dict:
|
||||
return {
|
||||
"id": id_,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": f"text-{id_}",
|
||||
"api_key": "k",
|
||||
"billing_tier": tier,
|
||||
|
|
@ -261,7 +261,7 @@ async def test_image_turn_unannotated_cfg_resolves_via_helper(monkeypatch):
|
|||
session = _FakeSession(_thread())
|
||||
cfg_unannotated_vision = {
|
||||
"id": -2,
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": "gpt-4o", # known vision model in LiteLLM map
|
||||
"api_key": "k",
|
||||
"billing_tier": "free",
|
||||
|
|
|
|||
|
|
@ -25,10 +25,10 @@ def _fake_yaml_config(
|
|||
return {
|
||||
"id": id,
|
||||
"name": f"yaml-{id}",
|
||||
"provider": "OPENAI",
|
||||
"litellm_provider": "openai",
|
||||
"model_name": model_name,
|
||||
"api_key": "sk-test",
|
||||
"api_base": "",
|
||||
"api_base": "https://api.openai.com/v1",
|
||||
"billing_tier": billing_tier,
|
||||
"rpm": 100,
|
||||
"tpm": 100_000,
|
||||
|
|
@ -54,10 +54,10 @@ def _fake_openrouter_config(
|
|||
return {
|
||||
"id": id,
|
||||
"name": f"or-{id}",
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": model_name,
|
||||
"api_key": "sk-or-test",
|
||||
"api_base": "",
|
||||
"api_base": "https://openrouter.ai/api/v1",
|
||||
"billing_tier": billing_tier,
|
||||
"rpm": 20 if billing_tier == "free" else 200,
|
||||
"tpm": 100_000 if billing_tier == "free" else 1_000_000,
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ def _or_cfg(
|
|||
) -> dict:
|
||||
return {
|
||||
"id": cid,
|
||||
"provider": "OPENROUTER",
|
||||
"litellm_provider": "openrouter",
|
||||
"model_name": model_name,
|
||||
"billing_tier": tier,
|
||||
"auto_pin_tier": "B" if tier == "premium" else "C",
|
||||
|
|
@ -144,7 +144,7 @@ async def test_enrich_health_only_touches_or_provider(monkeypatch):
|
|||
"""YAML cfgs that aren't OPENROUTER must be skipped entirely."""
|
||||
yaml_cfg = {
|
||||
"id": -1,
|
||||
"provider": "AZURE_OPENAI",
|
||||
"litellm_provider": "azure",
|
||||
"model_name": "gpt-5",
|
||||
"billing_tier": "premium",
|
||||
"auto_pin_tier": "A",
|
||||
|
|
@ -313,7 +313,7 @@ async def test_enrich_health_no_or_cfgs_is_noop(monkeypatch):
|
|||
"""When the catalogue has no OR cfgs at all, no HTTP calls fire."""
|
||||
yaml_cfg: dict[str, Any] = {
|
||||
"id": -1,
|
||||
"provider": "AZURE_OPENAI",
|
||||
"litellm_provider": "azure",
|
||||
"model_name": "gpt-5",
|
||||
"billing_tier": "premium",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ def test_safety_net_does_not_fire_for_azure_gpt_4o():
|
|||
it text-only."""
|
||||
assert (
|
||||
is_known_text_only_chat_model(
|
||||
provider="AZURE_OPENAI",
|
||||
litellm_provider="azure",
|
||||
model_name="my-azure-deployment",
|
||||
base_model="gpt-4o",
|
||||
)
|
||||
|
|
@ -49,7 +49,7 @@ def test_safety_net_does_not_fire_for_unknown_model():
|
|||
LiteLLM doesn't know about must flow through to the provider."""
|
||||
assert (
|
||||
is_known_text_only_chat_model(
|
||||
provider="CUSTOM",
|
||||
litellm_provider="custom",
|
||||
custom_provider="brand_new_proxy",
|
||||
model_name="brand-new-model-x9",
|
||||
)
|
||||
|
|
@ -69,7 +69,7 @@ def test_safety_net_does_not_fire_when_lookup_raises(monkeypatch):
|
|||
|
||||
assert (
|
||||
is_known_text_only_chat_model(
|
||||
provider="OPENAI",
|
||||
litellm_provider="openai",
|
||||
model_name="gpt-4o",
|
||||
)
|
||||
is False
|
||||
|
|
@ -88,7 +88,7 @@ def test_safety_net_fires_only_on_explicit_false(monkeypatch):
|
|||
monkeypatch.setattr(pc.litellm, "get_model_info", _info_explicit_false)
|
||||
assert (
|
||||
is_known_text_only_chat_model(
|
||||
provider="OPENAI",
|
||||
litellm_provider="openai",
|
||||
model_name="text-only-stub",
|
||||
)
|
||||
is True
|
||||
|
|
@ -100,7 +100,7 @@ def test_safety_net_fires_only_on_explicit_false(monkeypatch):
|
|||
monkeypatch.setattr(pc.litellm, "get_model_info", _info_true)
|
||||
assert (
|
||||
is_known_text_only_chat_model(
|
||||
provider="OPENAI",
|
||||
litellm_provider="openai",
|
||||
model_name="vision-stub",
|
||||
)
|
||||
is False
|
||||
|
|
@ -112,7 +112,7 @@ def test_safety_net_fires_only_on_explicit_false(monkeypatch):
|
|||
monkeypatch.setattr(pc.litellm, "get_model_info", _info_missing)
|
||||
assert (
|
||||
is_known_text_only_chat_model(
|
||||
provider="OPENAI",
|
||||
litellm_provider="openai",
|
||||
model_name="missing-key-stub",
|
||||
)
|
||||
is False
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue