feat(chat): route models by provider capabilities

2026-06-22 21:28:12 +02:00 · 2026-06-11 18:22:23 +05:30 · 2026-06-11 18:22:23 +05:30 · c28c4f5785
commit c28c4f5785
parent 8f20a32571
18 changed files with 429 additions and 319 deletions
--- a/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
+++ b/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
@ -75,10 +75,10 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
        config,
        "GLOBAL_LLM_CONFIGS",
        [
-            {"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"},
+            {"id": -2, "litellm_provider": "openai", "model_name": "gpt-free", "api_key": "k1"},
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -117,7 +117,7 @@ async def test_premium_eligible_auto_prefers_premium_over_free(monkeypatch):
        [
            {
                "id": -2,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-free",
                "api_key": "k1",
                "billing_tier": "free",
@ -125,7 +125,7 @@ async def test_premium_eligible_auto_prefers_premium_over_free(monkeypatch):
            },
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -164,7 +164,7 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
        [
            {
                "id": -1,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5.1",
                "api_key": "k1",
                "billing_tier": "premium",
@ -173,7 +173,7 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
            },
            {
                "id": -2,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5.4",
                "api_key": "k2",
                "billing_tier": "premium",
@ -182,7 +182,7 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
            },
            {
                "id": -3,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "openai/gpt-5.4",
                "api_key": "k3",
                "billing_tier": "premium",
@ -222,7 +222,7 @@ async def test_next_turn_reuses_existing_pin(monkeypatch):
        [
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -263,7 +263,7 @@ async def test_premium_eligible_auto_can_pin_premium(monkeypatch):
        [
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -301,14 +301,14 @@ async def test_premium_ineligible_auto_pins_free_only(monkeypatch):
        [
            {
                "id": -2,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-free",
                "api_key": "k1",
                "billing_tier": "free",
            },
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -346,14 +346,14 @@ async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
        [
            {
                "id": -2,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-free",
                "api_key": "k1",
                "billing_tier": "free",
            },
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -391,14 +391,14 @@ async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
        [
            {
                "id": -2,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-free",
                "api_key": "k1",
                "billing_tier": "free",
            },
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -437,7 +437,7 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
        config,
        "GLOBAL_LLM_CONFIGS",
        [
-            {"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"},
+            {"id": -2, "litellm_provider": "openai", "model_name": "gpt-free", "api_key": "k1"},
        ],
    )

@ -462,7 +462,7 @@ async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
        config,
        "GLOBAL_LLM_CONFIGS",
        [
-            {"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"},
+            {"id": -2, "litellm_provider": "openai", "model_name": "gpt-free", "api_key": "k1"},
        ],
    )

@ -504,7 +504,7 @@ async def test_health_gated_config_is_excluded_from_selection(monkeypatch):
        [
            {
                "id": -1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "venice/dead-model",
                "api_key": "k1",
                "billing_tier": "free",
@ -514,7 +514,7 @@ async def test_health_gated_config_is_excluded_from_selection(monkeypatch):
            },
            {
                "id": -2,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemini-flash",
                "api_key": "k1",
                "billing_tier": "free",
@ -556,7 +556,7 @@ async def test_tier_a_locks_first_premium_user_skips_or(monkeypatch):
        [
            {
                "id": -1,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5",
                "api_key": "k-yaml",
                "billing_tier": "premium",
@ -566,7 +566,7 @@ async def test_tier_a_locks_first_premium_user_skips_or(monkeypatch):
            },
            {
                "id": -2,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "openai/gpt-5",
                "api_key": "k-or",
                "billing_tier": "premium",
@ -608,7 +608,7 @@ async def test_tier_a_falls_through_to_or_when_a_pool_empty_for_user(monkeypatch
        [
            {
                "id": -1,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5",
                "api_key": "k-yaml",
                "billing_tier": "premium",
@ -618,7 +618,7 @@ async def test_tier_a_falls_through_to_or_when_a_pool_empty_for_user(monkeypatch
            },
            {
                "id": -2,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemini-flash:free",
                "api_key": "k-or",
                "billing_tier": "free",
@ -656,7 +656,7 @@ async def test_top_k_picks_only_high_score_models(monkeypatch):
    high_score_cfgs = [
        {
            "id": -i,
-            "provider": "AZURE_OPENAI",
+            "litellm_provider": "azure",
            "model_name": f"gpt-x-{i}",
            "api_key": "k",
            "billing_tier": "premium",
@ -668,7 +668,7 @@ async def test_top_k_picks_only_high_score_models(monkeypatch):
    ]
    low_score_trap = {
        "id": -99,
-        "provider": "AZURE_OPENAI",
+        "litellm_provider": "azure",
        "model_name": "tiny-legacy",
        "api_key": "k",
        "billing_tier": "premium",
@ -729,7 +729,7 @@ async def test_pin_reuse_survives_health_gating_for_existing_pin(monkeypatch):
        [
            {
                "id": -1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "venice/dead-model",
                "api_key": "k",
                "billing_tier": "premium",
@ -739,7 +739,7 @@ async def test_pin_reuse_survives_health_gating_for_existing_pin(monkeypatch):
            },
            {
                "id": -2,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5",
                "api_key": "k",
                "billing_tier": "premium",
@ -781,7 +781,7 @@ async def test_pin_reuse_regression_existing_healthy_pin(monkeypatch):
        [
            {
                "id": -1,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5",
                "api_key": "k",
                "billing_tier": "premium",
@ -791,7 +791,7 @@ async def test_pin_reuse_regression_existing_healthy_pin(monkeypatch):
            },
            {
                "id": -2,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5-pro",
                "api_key": "k",
                "billing_tier": "premium",
@ -839,7 +839,7 @@ async def test_runtime_cooled_down_pin_is_not_reused(monkeypatch):
        [
            {
                "id": -1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemma-4-26b-a4b-it:free",
                "api_key": "k",
                "billing_tier": "free",
@ -849,7 +849,7 @@ async def test_runtime_cooled_down_pin_is_not_reused(monkeypatch):
            },
            {
                "id": -2,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemini-2.5-flash:free",
                "api_key": "k",
                "billing_tier": "free",
@ -892,7 +892,7 @@ async def test_clearing_runtime_cooldown_restores_pin_reuse(monkeypatch):
        [
            {
                "id": -1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemma-4-26b-a4b-it:free",
                "api_key": "k",
                "billing_tier": "free",
@ -937,7 +937,7 @@ async def test_auto_pin_repin_excludes_previous_config_on_runtime_retry(monkeypa
        [
            {
                "id": -1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemma-4-26b-a4b-it:free",
                "api_key": "k",
                "billing_tier": "free",
@ -947,7 +947,7 @@ async def test_auto_pin_repin_excludes_previous_config_on_runtime_retry(monkeypa
            },
            {
                "id": -2,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemini-2.5-flash:free",
                "api_key": "k",
                "billing_tier": "free",
--- a/surfsense_backend/tests/unit/services/test_auto_pin_image_aware.py
+++ b/surfsense_backend/tests/unit/services/test_auto_pin_image_aware.py
@ -74,7 +74,7 @@ def _thread(*, pinned: int | None = None):
 def _vision_cfg(id_: int, *, tier: str = "free", quality: int = 80) -> dict:
    return {
        "id": id_,
-        "provider": "OPENAI",
+        "litellm_provider": "openai",
        "model_name": f"vision-{id_}",
        "api_key": "k",
        "billing_tier": tier,
@ -87,7 +87,7 @@ def _vision_cfg(id_: int, *, tier: str = "free", quality: int = 80) -> dict:
 def _text_only_cfg(id_: int, *, tier: str = "free", quality: int = 90) -> dict:
    return {
        "id": id_,
-        "provider": "OPENAI",
+        "litellm_provider": "openai",
        "model_name": f"text-{id_}",
        "api_key": "k",
        "billing_tier": tier,
@ -261,7 +261,7 @@ async def test_image_turn_unannotated_cfg_resolves_via_helper(monkeypatch):
    session = _FakeSession(_thread())
    cfg_unannotated_vision = {
        "id": -2,
-        "provider": "OPENAI",
+        "litellm_provider": "openai",
        "model_name": "gpt-4o",  # known vision model in LiteLLM map
        "api_key": "k",
        "billing_tier": "free",
--- a/surfsense_backend/tests/unit/services/test_llm_router_pool_filter.py
+++ b/surfsense_backend/tests/unit/services/test_llm_router_pool_filter.py
@ -25,10 +25,10 @@ def _fake_yaml_config(
    return {
        "id": id,
        "name": f"yaml-{id}",
-        "provider": "OPENAI",
+        "litellm_provider": "openai",
        "model_name": model_name,
        "api_key": "sk-test",
-        "api_base": "",
+        "api_base": "https://api.openai.com/v1",
        "billing_tier": billing_tier,
        "rpm": 100,
        "tpm": 100_000,
@ -54,10 +54,10 @@ def _fake_openrouter_config(
    return {
        "id": id,
        "name": f"or-{id}",
-        "provider": "OPENROUTER",
+        "litellm_provider": "openrouter",
        "model_name": model_name,
        "api_key": "sk-or-test",
-        "api_base": "",
+        "api_base": "https://openrouter.ai/api/v1",
        "billing_tier": billing_tier,
        "rpm": 20 if billing_tier == "free" else 200,
        "tpm": 100_000 if billing_tier == "free" else 1_000_000,
--- a/surfsense_backend/tests/unit/services/test_or_health_enrichment.py
+++ b/surfsense_backend/tests/unit/services/test_or_health_enrichment.py
@ -25,7 +25,7 @@ def _or_cfg(
 ) -> dict:
    return {
        "id": cid,
-        "provider": "OPENROUTER",
+        "litellm_provider": "openrouter",
        "model_name": model_name,
        "billing_tier": tier,
        "auto_pin_tier": "B" if tier == "premium" else "C",
@ -144,7 +144,7 @@ async def test_enrich_health_only_touches_or_provider(monkeypatch):
    """YAML cfgs that aren't OPENROUTER must be skipped entirely."""
    yaml_cfg = {
        "id": -1,
-        "provider": "AZURE_OPENAI",
+        "litellm_provider": "azure",
        "model_name": "gpt-5",
        "billing_tier": "premium",
        "auto_pin_tier": "A",
@ -313,7 +313,7 @@ async def test_enrich_health_no_or_cfgs_is_noop(monkeypatch):
    """When the catalogue has no OR cfgs at all, no HTTP calls fire."""
    yaml_cfg: dict[str, Any] = {
        "id": -1,
-        "provider": "AZURE_OPENAI",
+        "litellm_provider": "azure",
        "model_name": "gpt-5",
        "billing_tier": "premium",
    }
--- a/surfsense_backend/tests/unit/tasks/test_stream_new_chat_image_safety_net.py
+++ b/surfsense_backend/tests/unit/tasks/test_stream_new_chat_image_safety_net.py
@ -35,7 +35,7 @@ def test_safety_net_does_not_fire_for_azure_gpt_4o():
    it text-only."""
    assert (
        is_known_text_only_chat_model(
-            provider="AZURE_OPENAI",
+            litellm_provider="azure",
            model_name="my-azure-deployment",
            base_model="gpt-4o",
        )
@ -49,7 +49,7 @@ def test_safety_net_does_not_fire_for_unknown_model():
    LiteLLM doesn't know about must flow through to the provider."""
    assert (
        is_known_text_only_chat_model(
-            provider="CUSTOM",
+            litellm_provider="custom",
            custom_provider="brand_new_proxy",
            model_name="brand-new-model-x9",
        )
@ -69,7 +69,7 @@ def test_safety_net_does_not_fire_when_lookup_raises(monkeypatch):

    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            litellm_provider="openai",
            model_name="gpt-4o",
        )
        is False
@ -88,7 +88,7 @@ def test_safety_net_fires_only_on_explicit_false(monkeypatch):
    monkeypatch.setattr(pc.litellm, "get_model_info", _info_explicit_false)
    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            litellm_provider="openai",
            model_name="text-only-stub",
        )
        is True
@ -100,7 +100,7 @@ def test_safety_net_fires_only_on_explicit_false(monkeypatch):
    monkeypatch.setattr(pc.litellm, "get_model_info", _info_true)
    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            litellm_provider="openai",
            model_name="vision-stub",
        )
        is False
@ -112,7 +112,7 @@ def test_safety_net_fires_only_on_explicit_false(monkeypatch):
    monkeypatch.setattr(pc.litellm, "get_model_info", _info_missing)
    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            litellm_provider="openai",
            model_name="missing-key-stub",
        )
        is False