Merge remote-tracking branch 'upstream/dev' into features/documents-injestion-layered-cached

2026-06-30 21:59:46 +02:00 · 2026-06-14 11:30:33 +02:00 · 2026-06-14 11:30:33 +02:00 · 32a6e54ce6
commit 32a6e54ce6
parent dcebfc4756 7654e909cf
215 changed files with 9532 additions and 15405 deletions
--- a/surfsense_backend/tests/e2e/fixtures/global_llm_config.yaml
+++ b/surfsense_backend/tests/e2e/fixtures/global_llm_config.yaml
@ -19,7 +19,7 @@
 #   so the resolved auto-pin id is never sent to a real LLM provider.
 #   The values below only need to pass
 #   auto_model_pin_service._is_usable_global_config()
-#   which requires id / model_name / provider / api_key all truthy.
+#   which requires id / model_name / litellm_provider / api_key all truthy.
 #
 # Why TWO entries (premium + free):
 #   auto_model_pin_service.resolve_or_get_pinned_llm_config_id() splits
@ -44,9 +44,10 @@ global_llm_configs:
    anonymous_enabled: false
    seo_enabled: false
    quality_score: 1.0
-    provider: "OPENAI"
+    litellm_provider: "openai"
    model_name: "fake-e2e-model-premium"
    api_key: "fake-e2e-api-key-not-for-production"
+    api_base: "https://api.openai.com/v1"
    supports_image_input: false
    quota_reserve_tokens: 1024
    rpm: 1000
@ -60,9 +61,10 @@ global_llm_configs:
    anonymous_enabled: false
    seo_enabled: false
    quality_score: 1.0
-    provider: "OPENAI"
+    litellm_provider: "openai"
    model_name: "fake-e2e-model-free"
    api_key: "fake-e2e-api-key-not-for-production"
+    api_base: "https://api.openai.com/v1"
    supports_image_input: false
    quota_reserve_tokens: 1024
    rpm: 1000
--- a/surfsense_backend/tests/unit/agents/chat/runtime/test_llm_config_sanitizer.py
+++ b/surfsense_backend/tests/unit/agents/chat/runtime/test_llm_config_sanitizer.py
@ -0,0 +1,39 @@
+"""Regression tests for model-boundary message sanitization."""
+
+from __future__ import annotations
+
+import pytest
+from langchain_core.messages import AIMessage
+
+from app.agents.chat.runtime.llm_config import _sanitize_messages
+
+pytestmark = pytest.mark.unit
+
+
+def test_sanitize_messages_strips_provider_specific_thinking_blocks() -> None:
+    original = AIMessage(
+        content=[
+            {"type": "thinking", "thinking": "private reasoning"},
+            {"type": "text", "text": "visible answer"},
+        ]
+    )
+
+    sanitized = _sanitize_messages([original])
+
+    assert sanitized[0].content == "visible answer"
+    assert original.content == [
+        {"type": "thinking", "thinking": "private reasoning"},
+        {"type": "text", "text": "visible answer"},
+    ]
+
+
+def test_sanitize_messages_sets_tool_only_ai_content_to_none() -> None:
+    message = AIMessage(
+        content="",
+        tool_calls=[{"name": "search", "args": {"q": "x"}, "id": "call_1"}],
+    )
+
+    sanitized = _sanitize_messages([message])
+
+    assert sanitized[0].content is None
+    assert message.content == ""
--- a/surfsense_backend/tests/unit/automations/actions/builtin/agent_task/test_dependencies.py
+++ b/surfsense_backend/tests/unit/automations/actions/builtin/agent_task/test_dependencies.py
@ -1,6 +1,6 @@
 """Lock the runtime model-policy backstop in ``build_dependencies``.

-Automations resolve their LLM from the *captured* ``agent_llm_id`` snapshot (so
+Automations resolve their LLM from the *captured* ``chat_model_id`` snapshot (so
 runs are insulated from later chat/search-space model changes), and the model
 policy is re-checked at run time so a captured model that is no longer billable
 fails the run clearly. When no snapshot is present, resolution falls back to the
@ -45,10 +45,10 @@ def patched_side_effects(monkeypatch: pytest.MonkeyPatch):
    return None


-async def test_build_dependencies_resolves_captured_agent_llm_id(
+async def test_build_dependencies_resolves_captured_chat_model_id(
    monkeypatch: pytest.MonkeyPatch, patched_side_effects
 ) -> None:
-    """The bundle loads with the *captured* ``agent_llm_id``, not the live search space."""
+    """The bundle loads with the *captured* ``chat_model_id``, not the live search space."""
    captured: dict[str, Any] = {}

    async def _fake_load(_session, *, config_id, search_space_id):
@ -67,13 +67,13 @@ async def test_build_dependencies_resolves_captured_agent_llm_id(
        lambda _ss: pytest.fail("search-space policy should not run on captured path"),
    )

-    search_space = SimpleNamespace(agent_llm_id=-99)
+    search_space = SimpleNamespace(chat_model_id=-99)
    result = await build_dependencies(
        session=_FakeSession(search_space),
        search_space_id=42,
-        agent_llm_id=-7,
-        image_generation_config_id=5,
-        vision_llm_config_id=-1,
+        chat_model_id=-7,
+        image_gen_model_id=5,
+        vision_model_id=-1,
    )

    assert captured == {"config_id": -7, "search_space_id": 42}
@ -98,17 +98,17 @@ async def test_build_dependencies_validates_captured_ids(
    monkeypatch.setattr(deps_mod, "load_llm_bundle", _fake_load)

    await build_dependencies(
-        session=_FakeSession(SimpleNamespace(agent_llm_id=0)),
+        session=_FakeSession(SimpleNamespace(chat_model_id=0)),
        search_space_id=42,
-        agent_llm_id=-7,
-        image_generation_config_id=5,
-        vision_llm_config_id=-1,
+        chat_model_id=-7,
+        image_gen_model_id=5,
+        vision_model_id=-1,
    )

    assert seen == {
-        "agent_llm_id": -7,
-        "image_generation_config_id": 5,
-        "vision_llm_config_id": -1,
+        "chat_model_id": -7,
+        "image_gen_model_id": 5,
+        "vision_model_id": -1,
    }


@ -119,7 +119,7 @@ async def test_build_dependencies_raises_on_captured_policy_violation(

    def _raise(**_kw):
        raise AutomationModelPolicyError(
-            [{"kind": "image", "config_id": -2, "reason": "free model"}]
+            [{"kind": "image", "model_id": -2, "reason": "free model"}]
        )

    monkeypatch.setattr(deps_mod, "assert_models_billable", _raise)
@ -131,11 +131,11 @@ async def test_build_dependencies_raises_on_captured_policy_violation(

    with pytest.raises(DependencyError):
        await build_dependencies(
-            session=_FakeSession(SimpleNamespace(agent_llm_id=-7)),
+            session=_FakeSession(SimpleNamespace(chat_model_id=-7)),
            search_space_id=42,
-            agent_llm_id=-7,
-            image_generation_config_id=-2,
-            vision_llm_config_id=-1,
+            chat_model_id=-7,
+            image_gen_model_id=-2,
+            vision_model_id=-1,
        )


@ -157,7 +157,7 @@ async def test_build_dependencies_falls_back_to_search_space(
        lambda **_kw: pytest.fail("captured policy should not run on fallback path"),
    )

-    search_space = SimpleNamespace(agent_llm_id=-7)
+    search_space = SimpleNamespace(chat_model_id=-7)
    result = await build_dependencies(
        session=_FakeSession(search_space), search_space_id=42
    )
--- a/surfsense_backend/tests/unit/automations/runtime/test_executor_action_ctx.py
+++ b/surfsense_backend/tests/unit/automations/runtime/test_executor_action_ctx.py
@ -28,9 +28,9 @@ def _run() -> SimpleNamespace:
 def test_build_action_ctx_propagates_captured_models() -> None:
    """``definition.models`` flows onto the ActionContext model fields."""
    models = AutomationModels(
-        agent_llm_id=-1,
-        image_generation_config_id=5,
-        vision_llm_config_id=-1,
+        chat_model_id=-1,
+        image_gen_model_id=5,
+        vision_model_id=-1,
    )
    ctx = _build_action_ctx(
        cast(AsyncSession, None),
@ -40,9 +40,9 @@ def test_build_action_ctx_propagates_captured_models() -> None:
    )

    assert ctx.search_space_id == 42
-    assert ctx.agent_llm_id == -1
-    assert ctx.image_generation_config_id == 5
-    assert ctx.vision_llm_config_id == -1
+    assert ctx.chat_model_id == -1
+    assert ctx.image_gen_model_id == 5
+    assert ctx.vision_model_id == -1


 def test_build_action_ctx_none_models_leaves_fields_none() -> None:
@ -54,6 +54,6 @@ def test_build_action_ctx_none_models_leaves_fields_none() -> None:
        None,
    )

-    assert ctx.agent_llm_id is None
-    assert ctx.image_generation_config_id is None
-    assert ctx.vision_llm_config_id is None
+    assert ctx.chat_model_id is None
+    assert ctx.image_gen_model_id is None
+    assert ctx.vision_model_id is None
--- a/surfsense_backend/tests/unit/automations/schemas/definition/test_envelope.py
+++ b/surfsense_backend/tests/unit/automations/schemas/definition/test_envelope.py
@ -40,24 +40,24 @@ def test_automation_definition_models_round_trip() -> None:
        name="Daily digest",
        plan=[PlanStep(step_id="s1", action="agent_task")],
        models=AutomationModels(
-            agent_llm_id=-1,
-            image_generation_config_id=5,
-            vision_llm_config_id=-1,
+            chat_model_id=-1,
+            image_gen_model_id=5,
+            vision_model_id=-1,
        ),
    )

    dumped = definition.model_dump(mode="json", by_alias=True)
    assert dumped["models"] == {
-        "agent_llm_id": -1,
-        "image_generation_config_id": 5,
-        "vision_llm_config_id": -1,
+        "chat_model_id": -1,
+        "image_gen_model_id": 5,
+        "vision_model_id": -1,
    }

    restored = AutomationDefinition.model_validate(dumped)
    assert restored.models is not None
-    assert restored.models.agent_llm_id == -1
-    assert restored.models.image_generation_config_id == 5
-    assert restored.models.vision_llm_config_id == -1
+    assert restored.models.chat_model_id == -1
+    assert restored.models.image_gen_model_id == 5
+    assert restored.models.vision_model_id == -1


 def test_automation_definition_rejects_unknown_top_level_field() -> None:
--- a/surfsense_backend/tests/unit/automations/services/test_automation_service_policy.py
+++ b/surfsense_backend/tests/unit/automations/services/test_automation_service_policy.py
@ -64,12 +64,12 @@ async def test_assert_models_billable_raises_422_on_violation(

    def _raise(_ss):
        raise AutomationModelPolicyError(
-            [{"kind": "llm", "config_id": 0, "reason": "Auto mode"}]
+            [{"kind": "llm", "model_id": 0, "reason": "Auto mode"}]
        )

    monkeypatch.setattr(automation_mod, "assert_automation_models_billable", _raise)

-    service = _service(SimpleNamespace(agent_llm_id=0))
+    service = _service(SimpleNamespace(chat_model_id=0))
    with pytest.raises(HTTPException) as exc_info:
        await service._assert_models_billable(1)

@ -99,7 +99,7 @@ async def test_assert_models_billable_returns_search_space_when_ok(
        automation_mod, "assert_automation_models_billable", lambda _ss: None
    )

-    search_space = SimpleNamespace(agent_llm_id=-1)
+    search_space = SimpleNamespace(chat_model_id=-1)
    service = _service(search_space)
    assert await service._assert_models_billable(1) is search_space

@ -123,9 +123,9 @@ async def test_create_injects_captured_models_from_search_space(
    monkeypatch.setattr(AutomationService, "_get_with_triggers_or_raise", _return_added)

    search_space = SimpleNamespace(
-        agent_llm_id=-1,
-        image_generation_config_id=5,
-        vision_llm_config_id=-1,
+        chat_model_id=-1,
+        image_gen_model_id=5,
+        vision_model_id=-1,
    )
    service = _service(search_space)
    payload = AutomationCreate(
@ -137,9 +137,9 @@ async def test_create_injects_captured_models_from_search_space(
    automation = await service.create(payload)

    assert automation.definition["models"] == {
-        "agent_llm_id": -1,
-        "image_generation_config_id": 5,
-        "vision_llm_config_id": -1,
+        "chat_model_id": -1,
+        "image_gen_model_id": 5,
+        "vision_model_id": -1,
    }


@ -162,9 +162,9 @@ async def test_create_treats_unset_prefs_as_auto_zero(
    monkeypatch.setattr(AutomationService, "_get_with_triggers_or_raise", _return_added)

    search_space = SimpleNamespace(
-        agent_llm_id=None,
-        image_generation_config_id=None,
-        vision_llm_config_id=None,
+        chat_model_id=None,
+        image_gen_model_id=None,
+        vision_model_id=None,
    )
    service = _service(search_space)
    payload = AutomationCreate(search_space_id=1, name="A", definition=_definition())
@ -172,9 +172,9 @@ async def test_create_treats_unset_prefs_as_auto_zero(
    automation = await service.create(payload)

    assert automation.definition["models"] == {
-        "agent_llm_id": 0,
-        "image_generation_config_id": 0,
-        "vision_llm_config_id": 0,
+        "chat_model_id": 0,
+        "image_gen_model_id": 0,
+        "vision_model_id": 0,
    }


@ -195,11 +195,11 @@ async def test_create_honors_selected_models_when_provided(
    )
    validated: dict[str, Any] = {}

-    def _assert_ok(*, agent_llm_id, image_generation_config_id, vision_llm_config_id):
+    def _assert_ok(*, chat_model_id, image_gen_model_id, vision_model_id):
        validated["ids"] = (
-            agent_llm_id,
-            image_generation_config_id,
-            vision_llm_config_id,
+            chat_model_id,
+            image_gen_model_id,
+            vision_model_id,
        )

    monkeypatch.setattr(automation_mod, "assert_models_billable", _assert_ok)
@ -213,15 +213,15 @@ async def test_create_honors_selected_models_when_provided(
    monkeypatch.setattr(AutomationService, "_authorize", _noop_authorize)
    monkeypatch.setattr(AutomationService, "_get_with_triggers_or_raise", _return_added)

-    service = _service(SimpleNamespace(agent_llm_id=-99))
+    service = _service(SimpleNamespace(chat_model_id=-99))
    payload = AutomationCreate(
        search_space_id=1,
        name="A",
        definition=_definition(
            models=AutomationModels(
-                agent_llm_id=-1,
-                image_generation_config_id=7,
-                vision_llm_config_id=-2,
+                chat_model_id=-1,
+                image_gen_model_id=7,
+                vision_model_id=-2,
            )
        ),
    )
@ -230,9 +230,9 @@ async def test_create_honors_selected_models_when_provided(

    assert validated["ids"] == (-1, 7, -2)
    assert automation.definition["models"] == {
-        "agent_llm_id": -1,
-        "image_generation_config_id": 7,
-        "vision_llm_config_id": -2,
+        "chat_model_id": -1,
+        "image_gen_model_id": 7,
+        "vision_model_id": -2,
    }


@ -241,9 +241,9 @@ async def test_create_rejects_unbillable_selected_models(
 ) -> None:
    """A non-billable explicit selection maps the policy error to HTTP 422."""

-    def _raise(*, agent_llm_id, image_generation_config_id, vision_llm_config_id):
+    def _raise(*, chat_model_id, image_gen_model_id, vision_model_id):
        raise AutomationModelPolicyError(
-            [{"kind": "llm", "config_id": -3, "reason": "free model"}]
+            [{"kind": "llm", "model_id": -3, "reason": "free model"}]
        )

    monkeypatch.setattr(automation_mod, "assert_models_billable", _raise)
@ -253,15 +253,15 @@ async def test_create_rejects_unbillable_selected_models(

    monkeypatch.setattr(AutomationService, "_authorize", _noop_authorize)

-    service = _service(SimpleNamespace(agent_llm_id=-3))
+    service = _service(SimpleNamespace(chat_model_id=-3))
    payload = AutomationCreate(
        search_space_id=1,
        name="A",
        definition=_definition(
            models=AutomationModels(
-                agent_llm_id=-3,
-                image_generation_config_id=7,
-                vision_llm_config_id=-2,
+                chat_model_id=-3,
+                image_gen_model_id=7,
+                vision_model_id=-2,
            )
        ),
    )
@ -277,9 +277,9 @@ async def test_update_preserves_captured_models(
 ) -> None:
    """A definition edit carries over the previously captured ``models``."""
    captured = {
-        "agent_llm_id": -1,
-        "image_generation_config_id": 5,
-        "vision_llm_config_id": -1,
+        "chat_model_id": -1,
+        "image_gen_model_id": 5,
+        "vision_model_id": -1,
    }
    existing = SimpleNamespace(
        search_space_id=1,
@ -318,20 +318,20 @@ async def test_update_honors_changed_models_when_valid(
            "name": "A",
            "plan": [],
            "models": {
-                "agent_llm_id": -1,
-                "image_generation_config_id": 5,
-                "vision_llm_config_id": -1,
+                "chat_model_id": -1,
+                "image_gen_model_id": 5,
+                "vision_model_id": -1,
            },
        },
        version=3,
    )
    validated: dict[str, Any] = {}

-    def _assert_ok(*, agent_llm_id, image_generation_config_id, vision_llm_config_id):
+    def _assert_ok(*, chat_model_id, image_gen_model_id, vision_model_id):
        validated["ids"] = (
-            agent_llm_id,
-            image_generation_config_id,
-            vision_llm_config_id,
+            chat_model_id,
+            image_gen_model_id,
+            vision_model_id,
        )

    monkeypatch.setattr(automation_mod, "assert_models_billable", _assert_ok)
@ -351,9 +351,9 @@ async def test_update_honors_changed_models_when_valid(
    patch = AutomationUpdate(
        definition=_definition(
            models=AutomationModels(
-                agent_llm_id=-2,
-                image_generation_config_id=9,
-                vision_llm_config_id=-2,
+                chat_model_id=-2,
+                image_gen_model_id=9,
+                vision_model_id=-2,
            )
        )
    )
@ -362,9 +362,9 @@ async def test_update_honors_changed_models_when_valid(

    assert validated["ids"] == (-2, 9, -2)
    assert result.definition["models"] == {
-        "agent_llm_id": -2,
-        "image_generation_config_id": 9,
-        "vision_llm_config_id": -2,
+        "chat_model_id": -2,
+        "image_gen_model_id": 9,
+        "vision_model_id": -2,
    }
    assert result.version == 4

@ -379,17 +379,17 @@ async def test_update_rejects_changed_unbillable_models(
            "name": "A",
            "plan": [],
            "models": {
-                "agent_llm_id": -1,
-                "image_generation_config_id": 5,
-                "vision_llm_config_id": -1,
+                "chat_model_id": -1,
+                "image_gen_model_id": 5,
+                "vision_model_id": -1,
            },
        },
        version=3,
    )

-    def _raise(*, agent_llm_id, image_generation_config_id, vision_llm_config_id):
+    def _raise(*, chat_model_id, image_gen_model_id, vision_model_id):
        raise AutomationModelPolicyError(
-            [{"kind": "llm", "config_id": -7, "reason": "free model"}]
+            [{"kind": "llm", "model_id": -7, "reason": "free model"}]
        )

    monkeypatch.setattr(automation_mod, "assert_models_billable", _raise)
@ -409,9 +409,9 @@ async def test_update_rejects_changed_unbillable_models(
    patch = AutomationUpdate(
        definition=_definition(
            models=AutomationModels(
-                agent_llm_id=-7,
-                image_generation_config_id=5,
-                vision_llm_config_id=-1,
+                chat_model_id=-7,
+                image_gen_model_id=5,
+                vision_model_id=-1,
            )
        )
    )
@ -431,9 +431,9 @@ async def test_update_keeps_unchanged_models_without_revalidation(
    premium without an unrelated edit tripping the policy check.
    """
    captured = {
-        "agent_llm_id": -1,
-        "image_generation_config_id": 5,
-        "vision_llm_config_id": -1,
+        "chat_model_id": -1,
+        "image_gen_model_id": 5,
+        "vision_model_id": -1,
    }
    existing = SimpleNamespace(
        search_space_id=1,
@ -485,7 +485,7 @@ async def test_model_eligibility_authorizes_and_returns_payload(
        lambda _ss: {"allowed": False, "violations": [{"kind": "image"}]},
    )

-    service = _service(SimpleNamespace(agent_llm_id=-2))
+    service = _service(SimpleNamespace(chat_model_id=-2))
    result = await service.model_eligibility(search_space_id=5)

    assert result == {"allowed": False, "violations": [{"kind": "image"}]}
--- a/surfsense_backend/tests/unit/automations/services/test_model_policy.py
+++ b/surfsense_backend/tests/unit/automations/services/test_model_policy.py
@ -27,9 +27,9 @@ pytestmark = pytest.mark.unit
 def _search_space(*, llm: int | None, image: int | None, vision: int | None):
    """Minimal stand-in for the ``SearchSpace`` ORM row the policy reads."""
    return SimpleNamespace(
-        agent_llm_id=llm,
-        image_generation_config_id=image,
-        vision_llm_config_id=vision,
+        chat_model_id=llm,
+        image_gen_model_id=image,
+        vision_model_id=vision,
    )


@ -39,29 +39,11 @@ def patched_globals(monkeypatch: pytest.MonkeyPatch):

    Negative ids: -1 is premium, -2 is free, for each of llm/image/vision.
    """
-    llm_configs = {
-        -1: {"id": -1, "billing_tier": "premium"},
-        -2: {"id": -2, "billing_tier": "free"},
-    }
-    monkeypatch.setattr(
-        "app.agents.chat.runtime.llm_config.load_global_llm_config_by_id",
-        lambda cid: llm_configs.get(cid),
-    )
-
    from app.config import config as app_config

    monkeypatch.setattr(
        app_config,
-        "GLOBAL_IMAGE_GEN_CONFIGS",
-        [
-            {"id": -1, "billing_tier": "premium"},
-            {"id": -2, "billing_tier": "free"},
-        ],
-        raising=False,
-    )
-    monkeypatch.setattr(
-        app_config,
-        "GLOBAL_VISION_LLM_CONFIGS",
+        "GLOBAL_MODELS",
        [
            {"id": -1, "billing_tier": "premium"},
            {"id": -2, "billing_tier": "free"},
@ -71,7 +53,7 @@ def patched_globals(monkeypatch: pytest.MonkeyPatch):
    return None


-@pytest.mark.parametrize("kind", ["llm", "image", "vision"])
+@pytest.mark.parametrize("kind", ["chat", "image", "vision"])
 def test_byok_positive_id_is_allowed(kind: str, patched_globals) -> None:
    """A positive config id is a user-owned BYOK model — always billable."""
    allowed, reason = model_policy._classify(kind, 7)
@ -79,7 +61,7 @@ def test_byok_positive_id_is_allowed(kind: str, patched_globals) -> None:
    assert reason == ""


-@pytest.mark.parametrize("kind", ["llm", "image", "vision"])
+@pytest.mark.parametrize("kind", ["chat", "image", "vision"])
@pytest.mark.parametrize("config_id", [0, None])
 def test_auto_mode_is_blocked(kind: str, config_id, patched_globals) -> None:
    """Auto mode (id 0) and an unset slot (None) are blocked."""
@ -88,7 +70,7 @@ def test_auto_mode_is_blocked(kind: str, config_id, patched_globals) -> None:
    assert "Auto mode" in reason


-@pytest.mark.parametrize("kind", ["llm", "image", "vision"])
+@pytest.mark.parametrize("kind", ["chat", "image", "vision"])
 def test_premium_global_is_allowed(kind: str, patched_globals) -> None:
    """A negative (global) id with premium billing tier is allowed."""
    allowed, reason = model_policy._classify(kind, -1)
@ -96,7 +78,7 @@ def test_premium_global_is_allowed(kind: str, patched_globals) -> None:
    assert reason == ""


-@pytest.mark.parametrize("kind", ["llm", "image", "vision"])
+@pytest.mark.parametrize("kind", ["chat", "image", "vision"])
 def test_free_global_is_blocked(kind: str, patched_globals) -> None:
    """A negative (global) id with a free billing tier is blocked."""
    allowed, reason = model_policy._classify(kind, -2)
@ -104,7 +86,7 @@ def test_free_global_is_blocked(kind: str, patched_globals) -> None:
    assert "free model" in reason


-@pytest.mark.parametrize("kind", ["llm", "image", "vision"])
+@pytest.mark.parametrize("kind", ["chat", "image", "vision"])
 def test_unknown_global_id_is_blocked(kind: str, patched_globals) -> None:
    """A negative id that resolves to no config is treated as not premium."""
    allowed, _ = model_policy._classify(kind, -999)
@ -125,10 +107,10 @@ def test_eligibility_reports_each_violation(patched_globals) -> None:

    assert result["allowed"] is False
    kinds = {v["kind"] for v in result["violations"]}
-    assert kinds == {"llm", "image", "vision"}
-    # config_id is echoed back for the UI / settings deep-link.
-    by_kind = {v["kind"]: v["config_id"] for v in result["violations"]}
-    assert by_kind == {"llm": -2, "image": 0, "vision": -2}
+    assert kinds == {"chat", "image", "vision"}
+    # model_id is echoed back for the UI / settings deep-link.
+    by_kind = {v["kind"]: v["model_id"] for v in result["violations"]}
+    assert by_kind == {"chat": -2, "image": 0, "vision": -2}


 def test_assert_raises_with_violations(patched_globals) -> None:
@ -138,7 +120,7 @@ def test_assert_raises_with_violations(patched_globals) -> None:
        assert_automation_models_billable(search_space)

    assert len(exc_info.value.violations) == 1
-    assert exc_info.value.violations[0]["kind"] == "llm"
+    assert exc_info.value.violations[0]["kind"] == "chat"


 def test_assert_passes_when_all_billable(patched_globals) -> None:
@ -153,7 +135,7 @@ def test_assert_passes_when_all_billable(patched_globals) -> None:
 def test_get_model_eligibility_all_billable(patched_globals) -> None:
    """Premium LLM + BYOK image + premium vision (explicit ids) → allowed."""
    result = get_model_eligibility(
-        agent_llm_id=-1, image_generation_config_id=5, vision_llm_config_id=-1
+        chat_model_id=-1, image_gen_model_id=5, vision_model_id=-1
    )
    assert result == {"allowed": True, "violations": []}

@ -161,28 +143,28 @@ def test_get_model_eligibility_all_billable(patched_globals) -> None:
 def test_get_model_eligibility_reports_each_violation(patched_globals) -> None:
    """Free LLM, Auto image, free vision (explicit ids) each produce a violation."""
    result = get_model_eligibility(
-        agent_llm_id=-2, image_generation_config_id=0, vision_llm_config_id=-2
+        chat_model_id=-2, image_gen_model_id=0, vision_model_id=-2
    )
    assert result["allowed"] is False
-    by_kind = {v["kind"]: v["config_id"] for v in result["violations"]}
-    assert by_kind == {"llm": -2, "image": 0, "vision": -2}
+    by_kind = {v["kind"]: v["model_id"] for v in result["violations"]}
+    assert by_kind == {"chat": -2, "image": 0, "vision": -2}


 def test_assert_models_billable_raises(patched_globals) -> None:
    """``assert_models_billable`` raises when any explicit id is blocked."""
    with pytest.raises(AutomationModelPolicyError) as exc_info:
        assert_models_billable(
-            agent_llm_id=0, image_generation_config_id=5, vision_llm_config_id=-1
+            chat_model_id=0, image_gen_model_id=5, vision_model_id=-1
        )
    assert len(exc_info.value.violations) == 1
-    assert exc_info.value.violations[0]["kind"] == "llm"
+    assert exc_info.value.violations[0]["kind"] == "chat"


 def test_assert_models_billable_passes(patched_globals) -> None:
    """No exception when every explicit id is premium or BYOK."""
    assert (
        assert_models_billable(
-            agent_llm_id=3, image_generation_config_id=-1, vision_llm_config_id=4
+            chat_model_id=3, image_gen_model_id=-1, vision_model_id=4
        )
        is None
    )
@ -192,5 +174,5 @@ def test_search_space_wrapper_delegates_to_core(patched_globals) -> None:
    """The search-space wrapper produces the same result as the ID core."""
    search_space = _search_space(llm=-2, image=0, vision=-2)
    assert get_automation_model_eligibility(search_space) == get_model_eligibility(
-        agent_llm_id=-2, image_generation_config_id=0, vision_llm_config_id=-2
+        chat_model_id=-2, image_gen_model_id=0, vision_model_id=-2
    )
--- a/surfsense_backend/tests/unit/routes/test_byok_supports_image_input.py
+++ b/surfsense_backend/tests/unit/routes/test_byok_supports_image_input.py
@ -1,110 +0,0 @@
-"""Unit tests for ``supports_image_input`` derivation on BYOK chat config
-endpoints (``GET /new-llm-configs`` list, ``GET /new-llm-configs/{id}``).
-
-There is no DB column for ``supports_image_input`` on
-``NewLLMConfig`` — the value is resolved at the API boundary by
-``derive_supports_image_input`` so the new-chat selector / streaming
-task can read the same field shape regardless of source (BYOK vs YAML
-vs OpenRouter dynamic). Default-allow on unknown so we don't lock the
-user out of their own model choice.
-"""
-
-from __future__ import annotations
-
-from datetime import UTC, datetime
-from types import SimpleNamespace
-from uuid import uuid4
-
-import pytest
-
-from app.db import LiteLLMProvider
-from app.routes import new_llm_config_routes
-
-pytestmark = pytest.mark.unit
-
-
-def _byok_row(
-    *,
-    id_: int,
-    model_name: str,
-    base_model: str | None = None,
-    provider: LiteLLMProvider = LiteLLMProvider.OPENAI,
-    custom_provider: str | None = None,
-) -> object:
-    """Mimic the SQLAlchemy row's attribute surface; ``model_validate``
-    walks ``from_attributes=True`` so a ``SimpleNamespace`` is enough.
-
-    ``provider`` is a real ``LiteLLMProvider`` enum value so Pydantic's
-    enum validator accepts it — same as the ORM row would carry."""
-    return SimpleNamespace(
-        id=id_,
-        name=f"BYOK-{id_}",
-        description=None,
-        provider=provider,
-        custom_provider=custom_provider,
-        model_name=model_name,
-        api_key="sk-byok",
-        api_base=None,
-        litellm_params={"base_model": base_model} if base_model else None,
-        system_instructions="",
-        use_default_system_instructions=True,
-        citations_enabled=True,
-        created_at=datetime.now(tz=UTC),
-        search_space_id=42,
-        user_id=uuid4(),
-    )
-
-
-def test_serialize_byok_known_vision_model_resolves_true():
-    """The catalog resolver consults LiteLLM's map for ``gpt-4o`` ->
-    True. The serialized row carries that value through to the
-    ``NewLLMConfigRead`` schema."""
-    row = _byok_row(id_=1, model_name="gpt-4o")
-    serialized = new_llm_config_routes._serialize_byok_config(row)
-
-    assert serialized.supports_image_input is True
-    assert serialized.id == 1
-    assert serialized.model_name == "gpt-4o"
-
-
-def test_serialize_byok_unknown_model_default_allows():
-    """Unknown / unmapped: default-allow. The streaming-task safety net
-    is the actual block, and it requires LiteLLM to *explicitly* say
-    text-only — so a brand new BYOK model should not be pre-judged."""
-    row = _byok_row(
-        id_=2,
-        model_name="brand-new-model-x9-unmapped",
-        provider=LiteLLMProvider.CUSTOM,
-        custom_provider="brand_new_proxy",
-    )
-    serialized = new_llm_config_routes._serialize_byok_config(row)
-
-    assert serialized.supports_image_input is True
-
-
-def test_serialize_byok_uses_base_model_when_present():
-    """Azure-style: ``model_name`` is the deployment id, ``base_model``
-    inside ``litellm_params`` is the canonical sku LiteLLM knows. The
-    helper must consult ``base_model`` first or unrecognised deployment
-    ids would shadow the real capability."""
-    row = _byok_row(
-        id_=3,
-        model_name="my-azure-deployment-id-no-litellm-knows-this",
-        base_model="gpt-4o",
-        provider=LiteLLMProvider.AZURE_OPENAI,
-    )
-    serialized = new_llm_config_routes._serialize_byok_config(row)
-
-    assert serialized.supports_image_input is True
-
-
-def test_serialize_byok_returns_pydantic_read_model():
-    """The route now returns ``NewLLMConfigRead`` (not the raw ORM) so
-    the schema additions are guaranteed to be present in the API
-    surface. This guards against a future regression where someone
-    deletes the augmentation step and falls back to ORM passthrough."""
-    from app.schemas import NewLLMConfigRead
-
-    row = _byok_row(id_=4, model_name="gpt-4o")
-    serialized = new_llm_config_routes._serialize_byok_config(row)
-    assert isinstance(serialized, NewLLMConfigRead)
--- a/surfsense_backend/tests/unit/routes/test_global_configs_is_premium.py
+++ b/surfsense_backend/tests/unit/routes/test_global_configs_is_premium.py
@ -1,184 +0,0 @@
-"""Unit tests for ``is_premium`` derivation on the global image-gen and
-vision-LLM list endpoints.
-
-Chat globals (``GET /global-llm-configs``) already emit
-``is_premium = (billing_tier == "premium")``. Image and vision did not,
-which made the new-chat ``model-selector`` render the Free/Premium badge
-on the Chat tab but skip it on the Image and Vision tabs (the selector
-keys its badge logic off ``is_premium``). These tests pin parity:
-
-* YAML free entry → ``is_premium=False``
-* YAML premium entry → ``is_premium=True``
-* OpenRouter dynamic premium entry → ``is_premium=True``
-* Auto stub (always emitted when at least one config is present)
-  → ``is_premium=False``
-"""
-
-from __future__ import annotations
-
-import pytest
-
-pytestmark = pytest.mark.unit
-
-
-_IMAGE_FIXTURE: list[dict] = [
-    {
-        "id": -1,
-        "name": "DALL-E 3",
-        "provider": "OPENAI",
-        "model_name": "dall-e-3",
-        "api_key": "sk-test",
-        "billing_tier": "free",
-    },
-    {
-        "id": -2,
-        "name": "GPT-Image 1 (premium)",
-        "provider": "OPENAI",
-        "model_name": "gpt-image-1",
-        "api_key": "sk-test",
-        "billing_tier": "premium",
-    },
-    {
-        "id": -20_001,
-        "name": "google/gemini-2.5-flash-image (OpenRouter)",
-        "provider": "OPENROUTER",
-        "model_name": "google/gemini-2.5-flash-image",
-        "api_key": "sk-or-test",
-        "api_base": "https://openrouter.ai/api/v1",
-        "billing_tier": "premium",
-    },
-]
-
-
-_VISION_FIXTURE: list[dict] = [
-    {
-        "id": -1,
-        "name": "GPT-4o Vision",
-        "provider": "OPENAI",
-        "model_name": "gpt-4o",
-        "api_key": "sk-test",
-        "billing_tier": "free",
-    },
-    {
-        "id": -2,
-        "name": "Claude 3.5 Sonnet (premium)",
-        "provider": "ANTHROPIC",
-        "model_name": "claude-3-5-sonnet",
-        "api_key": "sk-ant-test",
-        "billing_tier": "premium",
-    },
-    {
-        "id": -30_001,
-        "name": "openai/gpt-4o (OpenRouter)",
-        "provider": "OPENROUTER",
-        "model_name": "openai/gpt-4o",
-        "api_key": "sk-or-test",
-        "api_base": "https://openrouter.ai/api/v1",
-        "billing_tier": "premium",
-    },
-]
-
-
-# =============================================================================
-# Image generation
-# =============================================================================
-
-
-@pytest.mark.asyncio
-async def test_global_image_gen_configs_emit_is_premium(monkeypatch):
-    """Each emitted config must carry ``is_premium`` derived server-side
-    from ``billing_tier``. The Auto stub is always free.
-    """
-    from app.config import config
-    from app.routes import image_generation_routes
-
-    monkeypatch.setattr(
-        config, "GLOBAL_IMAGE_GEN_CONFIGS", _IMAGE_FIXTURE, raising=False
-    )
-
-    payload = await image_generation_routes.get_global_image_gen_configs(user=None)
-
-    by_id = {c["id"]: c for c in payload}
-
-    # Auto stub is always emitted when at least one global config exists,
-    # and it must always declare itself free (Auto-mode billing-tier
-    # surfacing is a separate follow-up).
-    assert 0 in by_id, "Auto stub should be emitted when at least one config exists"
-    assert by_id[0]["is_premium"] is False
-    assert by_id[0]["billing_tier"] == "free"
-
-    # YAML free entry — ``is_premium=False``
-    assert by_id[-1]["is_premium"] is False
-    assert by_id[-1]["billing_tier"] == "free"
-
-    # YAML premium entry — ``is_premium=True``
-    assert by_id[-2]["is_premium"] is True
-    assert by_id[-2]["billing_tier"] == "premium"
-
-    # OpenRouter dynamic premium entry — same field, same derivation
-    assert by_id[-20_001]["is_premium"] is True
-    assert by_id[-20_001]["billing_tier"] == "premium"
-
-    # Every emitted dict (including Auto) must have the field — never missing.
-    for cfg in payload:
-        assert "is_premium" in cfg, f"is_premium missing from {cfg.get('id')}"
-        assert isinstance(cfg["is_premium"], bool)
-
-
-@pytest.mark.asyncio
-async def test_global_image_gen_configs_no_globals_no_auto_stub(monkeypatch):
-    """When there are no global configs at all, the endpoint emits an
-    empty list (no Auto stub) — Auto mode would have nothing to route to.
-    """
-    from app.config import config
-    from app.routes import image_generation_routes
-
-    monkeypatch.setattr(config, "GLOBAL_IMAGE_GEN_CONFIGS", [], raising=False)
-    payload = await image_generation_routes.get_global_image_gen_configs(user=None)
-    assert payload == []
-
-
-# =============================================================================
-# Vision LLM
-# =============================================================================
-
-
-@pytest.mark.asyncio
-async def test_global_vision_llm_configs_emit_is_premium(monkeypatch):
-    from app.config import config
-    from app.routes import vision_llm_routes
-
-    monkeypatch.setattr(
-        config, "GLOBAL_VISION_LLM_CONFIGS", _VISION_FIXTURE, raising=False
-    )
-
-    payload = await vision_llm_routes.get_global_vision_llm_configs(user=None)
-
-    by_id = {c["id"]: c for c in payload}
-
-    assert 0 in by_id, "Auto stub should be emitted when at least one config exists"
-    assert by_id[0]["is_premium"] is False
-    assert by_id[0]["billing_tier"] == "free"
-
-    assert by_id[-1]["is_premium"] is False
-    assert by_id[-1]["billing_tier"] == "free"
-
-    assert by_id[-2]["is_premium"] is True
-    assert by_id[-2]["billing_tier"] == "premium"
-
-    assert by_id[-30_001]["is_premium"] is True
-    assert by_id[-30_001]["billing_tier"] == "premium"
-
-    for cfg in payload:
-        assert "is_premium" in cfg, f"is_premium missing from {cfg.get('id')}"
-        assert isinstance(cfg["is_premium"], bool)
-
-
-@pytest.mark.asyncio
-async def test_global_vision_llm_configs_no_globals_no_auto_stub(monkeypatch):
-    from app.config import config
-    from app.routes import vision_llm_routes
-
-    monkeypatch.setattr(config, "GLOBAL_VISION_LLM_CONFIGS", [], raising=False)
-    payload = await vision_llm_routes.get_global_vision_llm_configs(user=None)
-    assert payload == []
--- a/surfsense_backend/tests/unit/routes/test_global_new_llm_configs_supports_image.py
+++ b/surfsense_backend/tests/unit/routes/test_global_new_llm_configs_supports_image.py
@ -1,106 +0,0 @@
-"""Unit tests for ``supports_image_input`` derivation on the chat global
-config endpoint (``GET /global-new-llm-configs``).
-
-Resolution order (matches ``new_llm_config_routes.get_global_new_llm_configs``):
-
-1. Explicit ``supports_image_input`` on the cfg dict (set by the YAML
-   loader for operator overrides, or by the OpenRouter integration from
-   ``architecture.input_modalities``) — wins.
-2. ``derive_supports_image_input`` helper — default-allow on unknown
-   models, only False when LiteLLM / OR modalities are definitive.
-
-The flag is purely informational at the API boundary. The streaming
-task safety net (``is_known_text_only_chat_model``) is the actual block,
-and it requires LiteLLM to *explicitly* mark the model as text-only.
-"""
-
-from __future__ import annotations
-
-import pytest
-
-pytestmark = pytest.mark.unit
-
-
-_FIXTURE: list[dict] = [
-    {
-        "id": -1,
-        "name": "GPT-4o (explicit true)",
-        "description": "vision-capable, explicit YAML override",
-        "provider": "OPENAI",
-        "model_name": "gpt-4o",
-        "api_key": "sk-test",
-        "billing_tier": "free",
-        "supports_image_input": True,
-    },
-    {
-        "id": -2,
-        "name": "DeepSeek V3 (explicit false)",
-        "description": "OpenRouter dynamic — modality-derived false",
-        "provider": "OPENROUTER",
-        "model_name": "deepseek/deepseek-v3.2-exp",
-        "api_key": "sk-or-test",
-        "api_base": "https://openrouter.ai/api/v1",
-        "billing_tier": "free",
-        "supports_image_input": False,
-    },
-    {
-        "id": -10_010,
-        "name": "Unannotated GPT-4o",
-        "description": "no flag set — resolver should derive True via LiteLLM",
-        "provider": "OPENAI",
-        "model_name": "gpt-4o",
-        "api_key": "sk-test",
-        "billing_tier": "free",
-        # supports_image_input intentionally absent
-    },
-    {
-        "id": -10_011,
-        "name": "Unannotated unknown model",
-        "description": "unmapped — default-allow True",
-        "provider": "CUSTOM",
-        "custom_provider": "brand_new_proxy",
-        "model_name": "brand-new-model-x9",
-        "api_key": "sk-test",
-        "billing_tier": "free",
-    },
-]
-
-
-@pytest.mark.asyncio
-async def test_global_new_llm_configs_emit_supports_image_input(monkeypatch):
-    """Each emitted chat config carries ``supports_image_input`` as a
-    bool. Explicit values win; unannotated entries are resolved via the
-    helper (default-allow True)."""
-    from app.config import config
-    from app.routes import new_llm_config_routes
-
-    monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", _FIXTURE, raising=False)
-
-    payload = await new_llm_config_routes.get_global_new_llm_configs(user=None)
-    by_id = {c["id"]: c for c in payload}
-
-    # Auto stub: optimistic True so the user can keep Auto selected with
-    # vision-capable deployments somewhere in the pool.
-    assert 0 in by_id, "Auto stub should be emitted when configs exist"
-    assert by_id[0]["supports_image_input"] is True
-    assert by_id[0]["is_auto_mode"] is True
-
-    # Explicit True is preserved.
-    assert by_id[-1]["supports_image_input"] is True
-
-    # Explicit False is preserved (the exact failure mode the safety net
-    # guards against — DeepSeek V3 over OpenRouter would 404 with "No
-    # endpoints found that support image input").
-    assert by_id[-2]["supports_image_input"] is False
-
-    # Unannotated GPT-4o: resolver consults LiteLLM, which says vision.
-    assert by_id[-10_010]["supports_image_input"] is True
-
-    # Unknown / unmapped model: default-allow rather than pre-judge.
-    assert by_id[-10_011]["supports_image_input"] is True
-
-    for cfg in payload:
-        assert "supports_image_input" in cfg, (
-            f"supports_image_input missing from {cfg.get('id')}"
-        )
-        assert isinstance(cfg["supports_image_input"], bool)
--- a/surfsense_backend/tests/unit/routes/test_image_gen_quota.py
+++ b/surfsense_backend/tests/unit/routes/test_image_gen_quota.py
@ -27,9 +27,18 @@ async def test_resolve_billing_for_auto_mode(monkeypatch):
    from app.routes import image_generation_routes
    from app.services.billable_calls import DEFAULT_IMAGE_RESERVE_MICROS

-    search_space = SimpleNamespace(image_generation_config_id=None)
+    async def _no_auto_candidates(*_args, **_kwargs):
+        return []
+
+    monkeypatch.setattr(
+        image_generation_routes,
+        "auto_model_candidates",
+        _no_auto_candidates,
+    )
+
+    search_space = SimpleNamespace(id=1, user_id=None, image_gen_model_id=None)
    tier, model, reserve = await image_generation_routes._resolve_billing_for_image_gen(
-        session=None,  # Not consumed on this code path.
+        session=None,
        config_id=0,  # IMAGE_GEN_AUTO_MODE_ID
        search_space=search_space,
    )
@ -45,26 +54,48 @@ async def test_resolve_billing_for_premium_global_config(monkeypatch):

    monkeypatch.setattr(
        config,
-        "GLOBAL_IMAGE_GEN_CONFIGS",
+        "GLOBAL_MODELS",
        [
            {
                "id": -1,
-                "provider": "OPENAI",
-                "model_name": "gpt-image-1",
+                "connection_id": -101,
+                "model_id": "gpt-image-1",
                "billing_tier": "premium",
-                "quota_reserve_micros": 75_000,
+                "catalog": {"quota_reserve_micros": 75_000},
            },
            {
                "id": -2,
-                "provider": "OPENROUTER",
-                "model_name": "google/gemini-2.5-flash-image",
+                "connection_id": -102,
+                "model_id": "google/gemini-2.5-flash-image",
                "billing_tier": "free",
+                "catalog": {},
+            },
+        ],
+        raising=False,
+    )
+    monkeypatch.setattr(
+        config,
+        "GLOBAL_CONNECTIONS",
+        [
+            {
+                "id": -101,
+                "provider": "openai",
+                "api_key": "sk-test",
+                "base_url": None,
+                "extra": {},
+            },
+            {
+                "id": -102,
+                "provider": "openrouter",
+                "api_key": "sk-or-test",
+                "base_url": "https://openrouter.ai/api/v1",
+                "extra": {},
            },
        ],
        raising=False,
    )

-    search_space = SimpleNamespace(image_generation_config_id=None)
+    search_space = SimpleNamespace(id=1, user_id=None, image_gen_model_id=None)

    # Premium with override.
    tier, model, reserve = await image_generation_routes._resolve_billing_for_image_gen(
@ -94,7 +125,7 @@ async def test_resolve_billing_for_user_owned_byok_is_free():
    from app.routes import image_generation_routes
    from app.services.billable_calls import DEFAULT_IMAGE_RESERVE_MICROS

-    search_space = SimpleNamespace(image_generation_config_id=None)
+    search_space = SimpleNamespace(id=1, user_id=None, image_gen_model_id=None)
    tier, model, reserve = await image_generation_routes._resolve_billing_for_image_gen(
        session=None, config_id=42, search_space=search_space
    )
@ -105,7 +136,7 @@ async def test_resolve_billing_for_user_owned_byok_is_free():

@pytest.mark.asyncio
 async def test_resolve_billing_falls_back_to_search_space_default(monkeypatch):
-    """When the request omits ``image_generation_config_id``, the helper
+    """When the request omits ``image_gen_model_id``, the helper
    must consult the search space's default — so a search space pinned
    to a premium global config still gates new requests by quota.
    """
@ -114,19 +145,34 @@ async def test_resolve_billing_falls_back_to_search_space_default(monkeypatch):

    monkeypatch.setattr(
        config,
-        "GLOBAL_IMAGE_GEN_CONFIGS",
+        "GLOBAL_MODELS",
        [
            {
                "id": -7,
-                "provider": "OPENAI",
-                "model_name": "gpt-image-1",
+                "connection_id": -101,
+                "model_id": "gpt-image-1",
                "billing_tier": "premium",
+                "catalog": {},
+            }
+        ],
+        raising=False,
+    )
+    monkeypatch.setattr(
+        config,
+        "GLOBAL_CONNECTIONS",
+        [
+            {
+                "id": -101,
+                "provider": "openai",
+                "api_key": "sk-test",
+                "base_url": None,
+                "extra": {},
            }
        ],
        raising=False,
    )

-    search_space = SimpleNamespace(image_generation_config_id=-7)
+    search_space = SimpleNamespace(id=1, user_id=None, image_gen_model_id=-7)
    (
        tier,
        model,
--- a/surfsense_backend/tests/unit/services/test_agent_billing_resolver.py
+++ b/surfsense_backend/tests/unit/services/test_agent_billing_resolver.py
@ -1,27 +1,4 @@
-"""Unit tests for ``_resolve_agent_billing_for_search_space``.
-
-Validates the resolver used by Celery podcast/video tasks to compute
-``(owner_user_id, billing_tier, base_model)`` from a search space and its
-agent LLM config. The resolver mirrors chat's billing-resolution pattern at
-``stream_new_chat.py:2294-2351`` and is the single integration point that
-prevents Auto-mode podcast/video from leaking premium credit.
-
-Coverage:
-
-* Auto mode + ``thread_id`` set, pin resolves to a negative-id premium
-  global → returns ``("premium", <base_model>)``.
-* Auto mode + ``thread_id`` set, pin resolves to a negative-id free
-  global → returns ``("free", <base_model>)``.
-* Auto mode + ``thread_id`` set, pin resolves to a positive-id BYOK config
-  → always ``"free"``.
-* Auto mode + ``thread_id=None`` → fallback to ``("free", "auto")`` without
-  hitting the pin service.
-* Negative id (no Auto) → uses ``get_global_llm_config``'s
-  ``billing_tier``.
-* Positive id (user BYOK) → always ``"free"``.
-* Search space not found → raises ``ValueError``.
-* ``agent_llm_id`` is None → raises ``ValueError``.
-"""
+"""Unit tests for ``_resolve_agent_billing_for_search_space``."""

 from __future__ import annotations

@ -34,11 +11,6 @@ import pytest
 pytestmark = pytest.mark.unit


-# ---------------------------------------------------------------------------
-# Fakes
-# ---------------------------------------------------------------------------
-
-
 class _FakeExecResult:
    def __init__(self, obj):
        self._obj = obj
@ -51,14 +23,6 @@ class _FakeExecResult:


 class _FakeSession:
-    """Tiny AsyncSession stub.
-
-    ``responses`` is a list of objects to return from successive
-    ``execute()`` calls (in order). The resolver makes at most two
-    ``execute()`` calls (search-space lookup, then optionally NewLLMConfig
-    lookup), so two queued responses cover the matrix.
-    """
-
    def __init__(self, responses: list):
        self._responses = list(responses)

@ -67,9 +31,6 @@ class _FakeSession:
            return _FakeExecResult(None)
        return _FakeExecResult(self._responses.pop(0))

-    async def commit(self) -> None:
-        pass
-

@dataclass
 class _FakePinResolution:
@ -78,53 +39,33 @@ class _FakePinResolution:
    from_existing_pin: bool = False


-def _make_search_space(*, agent_llm_id: int | None, user_id: UUID) -> SimpleNamespace:
-    return SimpleNamespace(
-        id=42,
-        agent_llm_id=agent_llm_id,
-        user_id=user_id,
-    )
+def _make_search_space(*, chat_model_id: int | None, user_id: UUID) -> SimpleNamespace:
+    return SimpleNamespace(id=42, chat_model_id=chat_model_id, user_id=user_id)


-def _make_byok_config(
-    *, id_: int, base_model: str | None = None, model_name: str = "gpt-byok"
+def _make_byok_model(
+    *, id_: int, base_model: str | None = None, model_id: str = "gpt-byok"
 ) -> SimpleNamespace:
    return SimpleNamespace(
        id=id_,
-        model_name=model_name,
-        litellm_params={"base_model": base_model} if base_model else {},
+        model_id=model_id,
+        catalog={"base_model": base_model} if base_model else {},
+        connection=SimpleNamespace(enabled=True, search_space_id=42, user_id=None),
    )


-# ---------------------------------------------------------------------------
-# Tests
-# ---------------------------------------------------------------------------
-
-
@pytest.mark.asyncio
 async def test_auto_mode_with_thread_id_resolves_to_premium_global(monkeypatch):
-    """Auto + thread → pin service resolves to negative-id premium config →
-    resolver returns ``("premium", <base_model>)``."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=0, user_id=user_id)])

-    # Mock the pin service to return a concrete premium config id.
-    async def _fake_resolve_pin(
-        sess,
-        *,
-        thread_id,
-        search_space_id,
-        user_id,
-        selected_llm_config_id,
-        force_repin_free=False,
-    ):
-        assert selected_llm_config_id == 0
-        assert thread_id == 99
+    async def _fake_resolve_pin(*_args, **kwargs):
+        assert kwargs["selected_llm_config_id"] == 0
+        assert kwargs["thread_id"] == 99
        return _FakePinResolution(resolved_llm_config_id=-1, resolved_tier="premium")

-    # Mock global config lookup to return a premium entry.
    def _fake_get_global(cfg_id):
        if cfg_id == -1:
            return {
@ -135,8 +76,6 @@ async def test_auto_mode_with_thread_id_resolves_to_premium_global(monkeypatch):
            }
        return None

-    # Lazy imports inside the resolver — patch the *target* modules so the
-    # imported names resolve to our fakes.
    import app.services.auto_model_pin_service as pin_module
    import app.services.llm_service as llm_module

@ -154,77 +93,18 @@ async def test_auto_mode_with_thread_id_resolves_to_premium_global(monkeypatch):
    assert base_model == "gpt-5.4"


-@pytest.mark.asyncio
-async def test_auto_mode_with_thread_id_resolves_to_free_global(monkeypatch):
-    """Auto + thread → pin returns negative-id free config → resolver
-    returns ``("free", <base_model>)``. Same path the pin service takes for
-    out-of-credit users (graceful degradation)."""
-    from app.services.billable_calls import _resolve_agent_billing_for_search_space
-
-    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
-
-    async def _fake_resolve_pin(
-        sess,
-        *,
-        thread_id,
-        search_space_id,
-        user_id,
-        selected_llm_config_id,
-        force_repin_free=False,
-    ):
-        return _FakePinResolution(resolved_llm_config_id=-3, resolved_tier="free")
-
-    def _fake_get_global(cfg_id):
-        if cfg_id == -3:
-            return {
-                "id": -3,
-                "model_name": "openrouter/free-model",
-                "billing_tier": "free",
-                "litellm_params": {"base_model": "openrouter/free-model"},
-            }
-        return None
-
-    import app.services.auto_model_pin_service as pin_module
-    import app.services.llm_service as llm_module
-
-    monkeypatch.setattr(
-        pin_module, "resolve_or_get_pinned_llm_config_id", _fake_resolve_pin
-    )
-    monkeypatch.setattr(llm_module, "get_global_llm_config", _fake_get_global)
-
-    owner, tier, base_model = await _resolve_agent_billing_for_search_space(
-        session, search_space_id=42, thread_id=99
-    )
-
-    assert owner == user_id
-    assert tier == "free"
-    assert base_model == "openrouter/free-model"
-
-
@pytest.mark.asyncio
 async def test_auto_mode_with_thread_id_resolves_to_byok_is_free(monkeypatch):
-    """Auto + thread → pin returns positive-id BYOK config → resolver
-    returns ``("free", ...)`` (BYOK is always free per
-    ``AgentConfig.from_new_llm_config``)."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    search_space = _make_search_space(agent_llm_id=0, user_id=user_id)
-    byok_cfg = _make_byok_config(
-        id_=17, base_model="anthropic/claude-3-haiku", model_name="my-claude"
+    search_space = _make_search_space(chat_model_id=0, user_id=user_id)
+    byok_model = _make_byok_model(
+        id_=17, base_model="anthropic/claude-3-haiku", model_id="my-claude"
    )
-    session = _FakeSession([search_space, byok_cfg])
+    session = _FakeSession([search_space, byok_model])

-    async def _fake_resolve_pin(
-        sess,
-        *,
-        thread_id,
-        search_space_id,
-        user_id,
-        selected_llm_config_id,
-        force_repin_free=False,
-    ):
+    async def _fake_resolve_pin(*_args, **_kwargs):
        return _FakePinResolution(resolved_llm_config_id=17, resolved_tier="free")

    import app.services.auto_model_pin_service as pin_module
@ -244,13 +124,10 @@ async def test_auto_mode_with_thread_id_resolves_to_byok_is_free(monkeypatch):

@pytest.mark.asyncio
 async def test_auto_mode_without_thread_id_falls_back_to_free():
-    """Auto + ``thread_id=None`` → ``("free", "auto")`` without invoking
-    the pin service. Forward-compat fallback for any future direct-API
-    entrypoint that doesn't have a chat thread."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=0, user_id=user_id)])

    owner, tier, base_model = await _resolve_agent_billing_for_search_space(
        session, search_space_id=42, thread_id=None
@ -263,13 +140,10 @@ async def test_auto_mode_without_thread_id_falls_back_to_free():

@pytest.mark.asyncio
 async def test_auto_mode_pin_failure_falls_back_to_free(monkeypatch):
-    """If the pin service raises ``ValueError`` (thread missing /
-    mismatched search space), the resolver should log and return free
-    rather than killing the whole task."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=0, user_id=user_id)])

    async def _fake_resolve_pin(*args, **kwargs):
        raise ValueError("thread missing")
@ -291,12 +165,10 @@ async def test_auto_mode_pin_failure_falls_back_to_free(monkeypatch):

@pytest.mark.asyncio
 async def test_negative_id_premium_global_returns_premium(monkeypatch):
-    """Explicit negative agent_llm_id → ``get_global_llm_config`` →
-    return its ``billing_tier``."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=-1, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=-1, user_id=user_id)])

    def _fake_get_global(cfg_id):
        return {
@ -319,50 +191,15 @@ async def test_negative_id_premium_global_returns_premium(monkeypatch):
    assert base_model == "gpt-5.4"


-@pytest.mark.asyncio
-async def test_negative_id_free_global_returns_free(monkeypatch):
-    from app.services.billable_calls import _resolve_agent_billing_for_search_space
-
-    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=-2, user_id=user_id)])
-
-    def _fake_get_global(cfg_id):
-        return {
-            "id": cfg_id,
-            "model_name": "openrouter/some-free",
-            "billing_tier": "free",
-            "litellm_params": {"base_model": "openrouter/some-free"},
-        }
-
-    import app.services.llm_service as llm_module
-
-    monkeypatch.setattr(llm_module, "get_global_llm_config", _fake_get_global)
-
-    owner, tier, base_model = await _resolve_agent_billing_for_search_space(
-        session, search_space_id=42, thread_id=None
-    )
-
-    assert owner == user_id
-    assert tier == "free"
-    assert base_model == "openrouter/some-free"
-
-
@pytest.mark.asyncio
 async def test_negative_id_missing_base_model_falls_back_to_model_name(monkeypatch):
-    """When the global config has no ``litellm_params.base_model``, the
-    resolver falls back to ``model_name`` — matching chat's behavior."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=-5, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=-5, user_id=user_id)])

    def _fake_get_global(cfg_id):
-        return {
-            "id": cfg_id,
-            "model_name": "fallback-model",
-            "billing_tier": "premium",
-            # No litellm_params.
-        }
+        return {"id": cfg_id, "model_name": "fallback-model", "billing_tier": "premium"}

    import app.services.llm_service as llm_module

@ -378,14 +215,12 @@ async def test_negative_id_missing_base_model_falls_back_to_model_name(monkeypat

@pytest.mark.asyncio
 async def test_positive_id_byok_is_always_free():
-    """Positive agent_llm_id → user-owned BYOK NewLLMConfig → always free,
-    regardless of underlying provider tier."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    search_space = _make_search_space(agent_llm_id=23, user_id=user_id)
-    byok_cfg = _make_byok_config(id_=23, base_model="anthropic/claude-3.5-sonnet")
-    session = _FakeSession([search_space, byok_cfg])
+    search_space = _make_search_space(chat_model_id=23, user_id=user_id)
+    byok_model = _make_byok_model(id_=23, base_model="anthropic/claude-3.5-sonnet")
+    session = _FakeSession([search_space, byok_model])

    owner, tier, base_model = await _resolve_agent_billing_for_search_space(
        session, search_space_id=42
@ -398,13 +233,10 @@ async def test_positive_id_byok_is_always_free():

@pytest.mark.asyncio
 async def test_positive_id_byok_missing_returns_free_with_empty_base_model():
-    """If the BYOK config row is missing/deleted but the search space still
-    points at it, the resolver still returns free (no debit) with an empty
-    base_model — billable_call's premium path is skipped, no harm done."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=99, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=99, user_id=user_id)])

    owner, tier, base_model = await _resolve_agent_billing_for_search_space(
        session, search_space_id=42
@ -419,18 +251,18 @@ async def test_positive_id_byok_missing_returns_free_with_empty_base_model():
 async def test_search_space_not_found_raises_value_error():
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

-    session = _FakeSession([None])
-
    with pytest.raises(ValueError, match="Search space"):
-        await _resolve_agent_billing_for_search_space(session, search_space_id=999)
+        await _resolve_agent_billing_for_search_space(
+            _FakeSession([None]), search_space_id=999
+        )


@pytest.mark.asyncio
-async def test_agent_llm_id_none_raises_value_error():
+async def test_chat_model_id_none_raises_value_error():
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=None, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=None, user_id=user_id)])

-    with pytest.raises(ValueError, match="agent_llm_id"):
+    with pytest.raises(ValueError, match="chat_model_id"):
        await _resolve_agent_billing_for_search_space(session, search_space_id=42)
--- a/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
+++ b/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
@ -17,8 +17,39 @@ from app.services.auto_model_pin_service import (
 pytestmark = pytest.mark.unit


+class _FakeRedis:
+    def __init__(self):
+        self.values: dict[str, str] = {}
+        self.ttls: dict[str, int] = {}
+
+    def set(self, key: str, value: str, *, ex: int | None = None):
+        self.values[key] = value
+        if ex is not None:
+            self.ttls[key] = ex
+        return True
+
+    def mget(self, keys: list[str]):
+        return [self.values.get(key) for key in keys]
+
+    def delete(self, *keys: str):
+        removed = 0
+        for key in keys:
+            if key in self.values:
+                removed += 1
+            self.values.pop(key, None)
+            self.ttls.pop(key, None)
+        return removed
+
+    def scan_iter(self, pattern: str):
+        prefix = pattern.removesuffix("*")
+        return (key for key in list(self.values) if key.startswith(prefix))
+
+
@pytest.fixture(autouse=True)
-def _clear_runtime_cooldown_map():
+def _clear_runtime_cooldown_map(monkeypatch):
+    import app.services.auto_model_pin_service as svc
+
+    monkeypatch.setattr(svc, "_runtime_cooldown_redis", _FakeRedis())
    clear_runtime_cooldown()
    clear_healthy()
    yield
@ -32,8 +63,9 @@ class _FakeQuotaResult:


 class _FakeExecResult:
-    def __init__(self, thread):
+    def __init__(self, *, thread=None, scalars=None):
        self._thread = thread
+        self._scalars = scalars or []

    def unique(self):
        return self
@ -41,19 +73,71 @@ class _FakeExecResult:
    def scalar_one_or_none(self):
        return self._thread

+    def scalars(self):
+        return SimpleNamespace(all=lambda: self._scalars)
+

 class _FakeSession:
-    def __init__(self, thread):
+    def __init__(self, thread, *, models=None):
        self.thread = thread
+        self.models = models or []
        self.commit_count = 0
+        self.execute_count = 0

    async def execute(self, _stmt):
-        return _FakeExecResult(self.thread)
+        self.execute_count += 1
+        if self.execute_count == 1:
+            return _FakeExecResult(thread=self.thread)
+        return _FakeExecResult(scalars=self.models)

    async def commit(self):
        self.commit_count += 1


+def _set_global_llm_configs(monkeypatch, config, configs: list[dict]):
+    """Patch the new global model catalog shape from compact legacy cfg fixtures."""
+    connections = []
+    models = []
+    for cfg in configs:
+        config_id = int(cfg["id"])
+        connection_id = config_id - 100_000
+        provider = cfg.get("provider") or cfg.get("litellm_provider")
+        model_name = cfg["model_name"]
+        connections.append(
+            {
+                "id": connection_id,
+                "provider": provider,
+                "scope": "GLOBAL",
+                "enabled": True,
+            }
+        )
+        models.append(
+            {
+                "id": config_id,
+                "connection_id": connection_id,
+                "model_id": model_name,
+                "display_name": cfg.get("name") or model_name,
+                "supports_chat": cfg.get("supports_chat", True),
+                "supports_image_input": cfg.get("supports_image_input", True),
+                "supports_tools": cfg.get("supports_tools", True),
+                "supports_image_generation": cfg.get(
+                    "supports_image_generation", False
+                ),
+                "capabilities_override": cfg.get("capabilities_override") or {},
+                "billing_tier": cfg.get("billing_tier", "free"),
+                "catalog": {
+                    "auto_pin_tier": cfg.get("auto_pin_tier"),
+                    "quality_score": cfg.get("quality_score")
+                    or cfg.get("quality_score_static"),
+                },
+            }
+        )
+
+    monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", configs)
+    monkeypatch.setattr(config, "GLOBAL_CONNECTIONS", connections)
+    monkeypatch.setattr(config, "GLOBAL_MODELS", models)
+
+
 def _thread(
    *,
    search_space_id: int = 10,
@ -71,14 +155,19 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
-            {"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"},
+            {
+                "id": -2,
+                "litellm_provider": "openai",
+                "model_name": "gpt-free",
+                "api_key": "k1",
+            },
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -111,13 +200,13 @@ async def test_premium_eligible_auto_prefers_premium_over_free(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -2,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-free",
                "api_key": "k1",
                "billing_tier": "free",
@ -125,7 +214,7 @@ async def test_premium_eligible_auto_prefers_premium_over_free(monkeypatch):
            },
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -154,17 +243,19 @@ async def test_premium_eligible_auto_prefers_premium_over_free(monkeypatch):


@pytest.mark.asyncio
-async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
+async def test_premium_eligible_auto_uses_quality_pool_not_single_preferred_model(
+    monkeypatch,
+):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5.1",
                "api_key": "k1",
                "billing_tier": "premium",
@ -173,7 +264,7 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
            },
            {
                "id": -2,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5.4",
                "api_key": "k2",
                "billing_tier": "premium",
@ -182,12 +273,39 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
            },
            {
                "id": -3,
-                "provider": "OPENROUTER",
-                "model_name": "openai/gpt-5.4",
+                "litellm_provider": "anthropic",
+                "model_name": "claude-opus",
                "api_key": "k3",
                "billing_tier": "premium",
-                "auto_pin_tier": "B",
-                "quality_score": 100,
+                "auto_pin_tier": "A",
+                "quality_score": 99,
+            },
+            {
+                "id": -4,
+                "litellm_provider": "openai",
+                "model_name": "gpt-5.3",
+                "api_key": "k4",
+                "billing_tier": "premium",
+                "auto_pin_tier": "A",
+                "quality_score": 98,
+            },
+            {
+                "id": -5,
+                "litellm_provider": "gemini",
+                "model_name": "gemini-3-pro",
+                "api_key": "k5",
+                "billing_tier": "premium",
+                "auto_pin_tier": "A",
+                "quality_score": 97,
+            },
+            {
+                "id": -6,
+                "litellm_provider": "xai",
+                "model_name": "grok-5",
+                "api_key": "k6",
+                "billing_tier": "premium",
+                "auto_pin_tier": "A",
+                "quality_score": 96,
            },
        ],
    )
@ -207,7 +325,7 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
        user_id="00000000-0000-0000-0000-000000000001",
        selected_llm_config_id=0,
    )
-    assert result.resolved_llm_config_id == -2
+    assert result.resolved_llm_config_id in {-1, -3, -4, -5, -6}
    assert result.resolved_tier == "premium"


@ -216,13 +334,13 @@ async def test_next_turn_reuses_existing_pin(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -257,13 +375,13 @@ async def test_premium_eligible_auto_can_pin_premium(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -295,20 +413,20 @@ async def test_premium_ineligible_auto_pins_free_only(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -2,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-free",
                "api_key": "k1",
                "billing_tier": "free",
            },
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -340,20 +458,20 @@ async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -2,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-free",
                "api_key": "k1",
                "billing_tier": "free",
            },
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -385,20 +503,20 @@ async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -2,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-free",
                "api_key": "k1",
                "billing_tier": "free",
            },
            {
                "id": -1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-prem",
                "api_key": "k2",
                "billing_tier": "premium",
@ -433,11 +551,16 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-2))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
-            {"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"},
+            {
+                "id": -2,
+                "litellm_provider": "openai",
+                "model_name": "gpt-free",
+                "api_key": "k1",
+            },
        ],
    )

@ -458,11 +581,16 @@ async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-999))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
-            {"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"},
+            {
+                "id": -2,
+                "litellm_provider": "openai",
+                "model_name": "gpt-free",
+                "api_key": "k1",
+            },
        ],
    )

@ -487,7 +615,7 @@ async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):


 # ---------------------------------------------------------------------------
-# Quality-aware pin selection (Auto Fastest upgrade)
+# Quality-aware pin selection (Auto upgrade)
 # ---------------------------------------------------------------------------


@ -498,13 +626,13 @@ async def test_health_gated_config_is_excluded_from_selection(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "venice/dead-model",
                "api_key": "k1",
                "billing_tier": "free",
@ -514,7 +642,7 @@ async def test_health_gated_config_is_excluded_from_selection(monkeypatch):
            },
            {
                "id": -2,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemini-flash",
                "api_key": "k1",
                "billing_tier": "free",
@ -550,13 +678,13 @@ async def test_tier_a_locks_first_premium_user_skips_or(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5",
                "api_key": "k-yaml",
                "billing_tier": "premium",
@ -566,7 +694,7 @@ async def test_tier_a_locks_first_premium_user_skips_or(monkeypatch):
            },
            {
                "id": -2,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "openai/gpt-5",
                "api_key": "k-or",
                "billing_tier": "premium",
@ -602,13 +730,13 @@ async def test_tier_a_falls_through_to_or_when_a_pool_empty_for_user(monkeypatch
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5",
                "api_key": "k-yaml",
                "billing_tier": "premium",
@ -618,7 +746,7 @@ async def test_tier_a_falls_through_to_or_when_a_pool_empty_for_user(monkeypatch
            },
            {
                "id": -2,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemini-flash:free",
                "api_key": "k-or",
                "billing_tier": "free",
@ -656,7 +784,7 @@ async def test_top_k_picks_only_high_score_models(monkeypatch):
    high_score_cfgs = [
        {
            "id": -i,
-            "provider": "AZURE_OPENAI",
+            "litellm_provider": "azure",
            "model_name": f"gpt-x-{i}",
            "api_key": "k",
            "billing_tier": "premium",
@ -668,7 +796,7 @@ async def test_top_k_picks_only_high_score_models(monkeypatch):
    ]
    low_score_trap = {
        "id": -99,
-        "provider": "AZURE_OPENAI",
+        "litellm_provider": "azure",
        "model_name": "tiny-legacy",
        "api_key": "k",
        "billing_tier": "premium",
@ -676,9 +804,9 @@ async def test_top_k_picks_only_high_score_models(monkeypatch):
        "quality_score": 10,
        "health_gated": False,
    }
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [*high_score_cfgs, low_score_trap],
    )

@ -723,13 +851,13 @@ async def test_pin_reuse_survives_health_gating_for_existing_pin(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "venice/dead-model",
                "api_key": "k",
                "billing_tier": "premium",
@ -739,7 +867,7 @@ async def test_pin_reuse_survives_health_gating_for_existing_pin(monkeypatch):
            },
            {
                "id": -2,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5",
                "api_key": "k",
                "billing_tier": "premium",
@ -775,13 +903,13 @@ async def test_pin_reuse_regression_existing_healthy_pin(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5",
                "api_key": "k",
                "billing_tier": "premium",
@ -791,7 +919,7 @@ async def test_pin_reuse_regression_existing_healthy_pin(monkeypatch):
            },
            {
                "id": -2,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5-pro",
                "api_key": "k",
                "billing_tier": "premium",
@ -833,13 +961,13 @@ async def test_runtime_cooled_down_pin_is_not_reused(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemma-4-26b-a4b-it:free",
                "api_key": "k",
                "billing_tier": "free",
@ -849,7 +977,7 @@ async def test_runtime_cooled_down_pin_is_not_reused(monkeypatch):
            },
            {
                "id": -2,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemini-2.5-flash:free",
                "api_key": "k",
                "billing_tier": "free",
@ -881,18 +1009,86 @@ async def test_runtime_cooled_down_pin_is_not_reused(monkeypatch):
    assert result.from_existing_pin is False


+def test_mark_runtime_cooldown_writes_shared_redis(monkeypatch):
+    import app.services.auto_model_pin_service as svc
+
+    mark_runtime_cooldown(-9, reason="provider_rate_limited", cooldown_seconds=123)
+
+    redis_client = svc._runtime_cooldown_redis
+    assert redis_client.values["auto:cooldown:llm:-9"] == "provider_rate_limited"
+    assert redis_client.ttls["auto:cooldown:llm:-9"] == 123
+
+
+@pytest.mark.asyncio
+async def test_shared_runtime_cooldown_blocks_pin_across_workers(monkeypatch):
+    """A Redis cooldown written by another worker should invalidate local pins."""
+    import app.services.auto_model_pin_service as svc
+    from app.config import config
+
+    session = _FakeSession(_thread(pinned_llm_config_id=-1))
+    _set_global_llm_configs(
+        monkeypatch,
+        config,
+        [
+            {
+                "id": -1,
+                "litellm_provider": "openrouter",
+                "model_name": "google/gemma-4-26b-a4b-it:free",
+                "api_key": "k",
+                "billing_tier": "free",
+                "auto_pin_tier": "C",
+                "quality_score": 90,
+                "health_gated": False,
+            },
+            {
+                "id": -2,
+                "litellm_provider": "openrouter",
+                "model_name": "google/gemini-2.5-flash:free",
+                "api_key": "k",
+                "billing_tier": "free",
+                "auto_pin_tier": "C",
+                "quality_score": 80,
+                "health_gated": False,
+            },
+        ],
+    )
+    svc._runtime_cooldown_redis.set(
+        "auto:cooldown:llm:-1",
+        "provider_rate_limited",
+        ex=600,
+    )
+
+    async def _blocked(*_args, **_kwargs):
+        return _FakeQuotaResult(allowed=False)
+
+    monkeypatch.setattr(
+        "app.services.auto_model_pin_service.TokenQuotaService.credit_get_usage",
+        _blocked,
+    )
+
+    result = await resolve_or_get_pinned_llm_config_id(
+        session,
+        thread_id=1,
+        search_space_id=10,
+        user_id="00000000-0000-0000-0000-000000000001",
+        selected_llm_config_id=0,
+    )
+    assert result.resolved_llm_config_id == -2
+    assert result.from_existing_pin is False
+
+
@pytest.mark.asyncio
 async def test_clearing_runtime_cooldown_restores_pin_reuse(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemma-4-26b-a4b-it:free",
                "api_key": "k",
                "billing_tier": "free",
@ -931,13 +1127,13 @@ async def test_auto_pin_repin_excludes_previous_config_on_runtime_retry(monkeypa
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemma-4-26b-a4b-it:free",
                "api_key": "k",
                "billing_tier": "free",
@ -947,7 +1143,7 @@ async def test_auto_pin_repin_excludes_previous_config_on_runtime_retry(monkeypa
            },
            {
                "id": -2,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "google/gemini-2.5-flash:free",
                "api_key": "k",
                "billing_tier": "free",
--- a/surfsense_backend/tests/unit/services/test_auto_pin_image_aware.py
+++ b/surfsense_backend/tests/unit/services/test_auto_pin_image_aware.py
@ -45,8 +45,9 @@ class _FakeQuotaResult:


 class _FakeExecResult:
-    def __init__(self, thread):
+    def __init__(self, *, thread=None, scalars=None):
        self._thread = thread
+        self._scalars = scalars or []

    def unique(self):
        return self
@ -54,14 +55,21 @@ class _FakeExecResult:
    def scalar_one_or_none(self):
        return self._thread

+    def scalars(self):
+        return SimpleNamespace(all=lambda: self._scalars)
+

 class _FakeSession:
    def __init__(self, thread):
        self.thread = thread
        self.commit_count = 0
+        self.execute_count = 0

    async def execute(self, _stmt):
-        return _FakeExecResult(self.thread)
+        self.execute_count += 1
+        if self.execute_count == 1:
+            return _FakeExecResult(thread=self.thread)
+        return _FakeExecResult(scalars=[])

    async def commit(self):
        self.commit_count += 1
@ -71,10 +79,64 @@ def _thread(*, pinned: int | None = None):
    return SimpleNamespace(id=1, search_space_id=10, pinned_llm_config_id=pinned)


+def _set_global_llm_configs(monkeypatch, config, configs: list[dict]):
+    from app.services.provider_capabilities import derive_supports_image_input
+
+    connections = []
+    models = []
+    for cfg in configs:
+        config_id = int(cfg["id"])
+        connection_id = config_id - 100_000
+        provider = cfg.get("provider") or cfg.get("litellm_provider")
+        model_name = cfg["model_name"]
+        if "supports_image_input" not in cfg:
+            litellm_params = cfg.get("litellm_params") or {}
+            base_model = (
+                litellm_params.get("base_model")
+                if isinstance(litellm_params, dict)
+                else None
+            )
+            cfg["supports_image_input"] = derive_supports_image_input(
+                provider=provider,
+                model_name=model_name,
+                base_model=base_model,
+                custom_provider=cfg.get("custom_provider"),
+            )
+        connections.append(
+            {
+                "id": connection_id,
+                "provider": provider,
+                "scope": "GLOBAL",
+                "enabled": True,
+            }
+        )
+        model = {
+            "id": config_id,
+            "connection_id": connection_id,
+            "model_id": model_name,
+            "display_name": cfg.get("name") or model_name,
+            "supports_chat": cfg.get("supports_chat", True),
+            "supports_tools": cfg.get("supports_tools", True),
+            "supports_image_generation": cfg.get("supports_image_generation", False),
+            "capabilities_override": cfg.get("capabilities_override") or {},
+            "billing_tier": cfg.get("billing_tier", "free"),
+            "catalog": {
+                "auto_pin_tier": cfg.get("auto_pin_tier"),
+                "quality_score": cfg.get("quality_score"),
+            },
+            "supports_image_input": cfg["supports_image_input"],
+        }
+        models.append(model)
+
+    monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", configs)
+    monkeypatch.setattr(config, "GLOBAL_CONNECTIONS", connections)
+    monkeypatch.setattr(config, "GLOBAL_MODELS", models)
+
+
 def _vision_cfg(id_: int, *, tier: str = "free", quality: int = 80) -> dict:
    return {
        "id": id_,
-        "provider": "OPENAI",
+        "litellm_provider": "openai",
        "model_name": f"vision-{id_}",
        "api_key": "k",
        "billing_tier": tier,
@ -87,7 +149,7 @@ def _vision_cfg(id_: int, *, tier: str = "free", quality: int = 80) -> dict:
 def _text_only_cfg(id_: int, *, tier: str = "free", quality: int = 90) -> dict:
    return {
        "id": id_,
-        "provider": "OPENAI",
+        "litellm_provider": "openai",
        "model_name": f"text-{id_}",
        "api_key": "k",
        "billing_tier": tier,
@ -108,11 +170,7 @@ async def test_image_turn_filters_out_text_only_candidates(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
-        config,
-        "GLOBAL_LLM_CONFIGS",
-        [_text_only_cfg(-1), _vision_cfg(-2)],
-    )
+    _set_global_llm_configs(monkeypatch, config, [_text_only_cfg(-1), _vision_cfg(-2)])
    monkeypatch.setattr(
        "app.services.auto_model_pin_service.TokenQuotaService.credit_get_usage",
        _premium_allowed,
@ -140,11 +198,7 @@ async def test_image_turn_force_repins_stale_text_only_pin(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned=-1))
-    monkeypatch.setattr(
-        config,
-        "GLOBAL_LLM_CONFIGS",
-        [_text_only_cfg(-1), _vision_cfg(-2)],
-    )
+    _set_global_llm_configs(monkeypatch, config, [_text_only_cfg(-1), _vision_cfg(-2)])
    monkeypatch.setattr(
        "app.services.auto_model_pin_service.TokenQuotaService.credit_get_usage",
        _premium_allowed,
@ -172,9 +226,9 @@ async def test_image_turn_reuses_existing_vision_pin(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned=-2))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [_text_only_cfg(-1), _vision_cfg(-2), _vision_cfg(-3, quality=70)],
    )
    monkeypatch.setattr(
@ -203,10 +257,8 @@ async def test_image_turn_with_no_vision_candidates_raises(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
-        config,
-        "GLOBAL_LLM_CONFIGS",
-        [_text_only_cfg(-1), _text_only_cfg(-2)],
+    _set_global_llm_configs(
+        monkeypatch, config, [_text_only_cfg(-1), _text_only_cfg(-2)]
    )
    monkeypatch.setattr(
        "app.services.auto_model_pin_service.TokenQuotaService.credit_get_usage",
@ -231,11 +283,7 @@ async def test_non_image_turn_keeps_text_only_in_pool(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
-        config,
-        "GLOBAL_LLM_CONFIGS",
-        [_text_only_cfg(-1)],
-    )
+    _set_global_llm_configs(monkeypatch, config, [_text_only_cfg(-1)])
    monkeypatch.setattr(
        "app.services.auto_model_pin_service.TokenQuotaService.credit_get_usage",
        _premium_allowed,
@ -261,7 +309,7 @@ async def test_image_turn_unannotated_cfg_resolves_via_helper(monkeypatch):
    session = _FakeSession(_thread())
    cfg_unannotated_vision = {
        "id": -2,
-        "provider": "OPENAI",
+        "litellm_provider": "openai",
        "model_name": "gpt-4o",  # known vision model in LiteLLM map
        "api_key": "k",
        "billing_tier": "free",
@ -269,7 +317,7 @@ async def test_image_turn_unannotated_cfg_resolves_via_helper(monkeypatch):
        "quality_score": 80,
        # NOTE: no supports_image_input key
    }
-    monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [cfg_unannotated_vision])
+    _set_global_llm_configs(monkeypatch, config, [cfg_unannotated_vision])
    monkeypatch.setattr(
        "app.services.auto_model_pin_service.TokenQuotaService.credit_get_usage",
        _premium_allowed,
--- a/surfsense_backend/tests/unit/services/test_image_gen_api_base_defense.py
+++ b/surfsense_backend/tests/unit/services/test_image_gen_api_base_defense.py
@ -1,19 +1,4 @@
-"""Defense-in-depth: image-gen call sites must not let an empty
-``api_base`` fall through to LiteLLM's module-global ``litellm.api_base``.
-
-The bug repro: an OpenRouter image-gen config ships
-``api_base=""``. The pre-fix call site in
-``image_generation_routes._execute_image_generation`` did
-``if cfg.get("api_base"): kwargs["api_base"] = cfg["api_base"]`` which
-silently dropped the empty string. LiteLLM then fell back to
-``litellm.api_base`` (commonly inherited from ``AZURE_OPENAI_ENDPOINT``)
-and OpenRouter's ``image_generation/transformation`` appended
-``/chat/completions`` to it → 404 ``Resource not found``.
-
-This test pins the post-fix behaviour: with an empty ``api_base`` in
-the config, the call site MUST set ``api_base`` to OpenRouter's public
-URL instead of leaving it unset.
-"""
+"""Image-gen call sites must pass each config's explicit ``api_base``."""

 from __future__ import annotations

@ -26,22 +11,23 @@ pytestmark = pytest.mark.unit


@pytest.mark.asyncio
-async def test_global_openrouter_image_gen_sets_api_base_when_config_empty():
-    """The global-config branch (``config_id < 0``) of
-    ``_execute_image_generation`` must apply the resolver and pin
-    ``api_base`` to OpenRouter when the config ships an empty string.
-    """
+async def test_global_openrouter_image_gen_sets_explicit_api_base():
+    """The global-config branch forwards the explicit OpenRouter base."""
    from app.routes import image_generation_routes

-    cfg = {
+    global_model = {
        "id": -20_001,
-        "name": "GPT Image 1 (OpenRouter)",
-        "provider": "OPENROUTER",
-        "model_name": "openai/gpt-image-1",
+        "connection_id": -101,
+        "model_id": "openai/gpt-image-1",
+        "supports_image_generation": True,
+        "capabilities_override": {},
+    }
+    global_connection = {
+        "id": -101,
+        "provider": "openrouter",
        "api_key": "sk-or-test",
-        "api_base": "",  # the original bug shape
-        "api_version": None,
-        "litellm_params": {},
+        "base_url": "https://openrouter.ai/api/v1",
+        "extra": {},
    }

    captured: dict = {}
@ -51,7 +37,7 @@ async def test_global_openrouter_image_gen_sets_api_base_when_config_empty():
        return MagicMock(model_dump=lambda: {"data": []}, _hidden_params={})

    image_gen = MagicMock()
-    image_gen.image_generation_config_id = cfg["id"]
+    image_gen.image_gen_model_id = global_model["id"]
    image_gen.prompt = "test"
    image_gen.n = 1
    image_gen.quality = None
@ -61,14 +47,19 @@ async def test_global_openrouter_image_gen_sets_api_base_when_config_empty():
    image_gen.model = None

    search_space = MagicMock()
-    search_space.image_generation_config_id = cfg["id"]
+    search_space.image_gen_model_id = global_model["id"]
    session = MagicMock()

    with (
        patch.object(
            image_generation_routes,
-            "_get_global_image_gen_config",
-            return_value=cfg,
+            "_get_global_model",
+            return_value=global_model,
+        ),
+        patch.object(
+            image_generation_routes,
+            "_get_global_connection",
+            return_value=global_connection,
        ),
        patch.object(
            image_generation_routes,
@ -80,30 +71,31 @@ async def test_global_openrouter_image_gen_sets_api_base_when_config_empty():
            session=session, image_gen=image_gen, search_space=search_space
        )

-    # The whole point of the fix: even with empty ``api_base`` in the
-    # config, we forward OpenRouter's public URL so the call doesn't
-    # inherit an Azure endpoint.
    assert captured.get("api_base") == "https://openrouter.ai/api/v1"
    assert captured["model"] == "openrouter/openai/gpt-image-1"


@pytest.mark.asyncio
-async def test_generate_image_tool_global_sets_api_base_when_config_empty():
-    """Same defense at the agent tool entry point — both surfaces share
+async def test_generate_image_tool_global_sets_explicit_api_base():
+    """Same explicit-base behavior at the agent tool entry point — both surfaces share
    the same OpenRouter config payloads."""
    from app.agents.chat.multi_agent_chat.subagents.builtins.deliverables.tools import (
        generate_image as gi_module,
    )

-    cfg = {
+    global_model = {
        "id": -20_001,
-        "name": "GPT Image 1 (OpenRouter)",
-        "provider": "OPENROUTER",
-        "model_name": "openai/gpt-image-1",
+        "connection_id": -101,
+        "model_id": "openai/gpt-image-1",
+        "supports_image_generation": True,
+        "capabilities_override": {},
+    }
+    global_connection = {
+        "id": -101,
+        "provider": "openrouter",
        "api_key": "sk-or-test",
-        "api_base": "",
-        "api_version": None,
-        "litellm_params": {},
+        "base_url": "https://openrouter.ai/api/v1",
+        "extra": {},
    }

    captured: dict = {}
@ -119,7 +111,7 @@ async def test_generate_image_tool_global_sets_api_base_when_config_empty():

    search_space = MagicMock()
    search_space.id = 1
-    search_space.image_generation_config_id = cfg["id"]
+    search_space.image_gen_model_id = global_model["id"]

    session_cm = AsyncMock()
    session = AsyncMock()
@ -142,7 +134,10 @@ async def test_generate_image_tool_global_sets_api_base_when_config_empty():

    with (
        patch.object(gi_module, "shielded_async_session", return_value=session_cm),
-        patch.object(gi_module, "_get_global_image_gen_config", return_value=cfg),
+        patch.object(gi_module, "_get_global_model", return_value=global_model),
+        patch.object(
+            gi_module, "_get_global_connection", return_value=global_connection
+        ),
        patch.object(
            gi_module, "aimage_generation", side_effect=fake_aimage_generation
        ),
@ -171,20 +166,16 @@ async def test_generate_image_tool_global_sets_api_base_when_config_empty():
    assert captured["model"] == "openrouter/openai/gpt-image-1"


-def test_image_gen_router_deployment_sets_api_base_when_config_empty():
-    """The Auto-mode router pool must also resolve ``api_base`` when an
-    OpenRouter config ships an empty string. The deployment dict is fed
-    straight to ``litellm.Router``, so a missing ``api_base`` would
-    leak the same way as the direct call sites.
-    """
+def test_image_gen_router_deployment_sets_explicit_api_base():
+    """The Auto-mode router pool carries explicit api_base into deployments."""
    from app.services.image_gen_router_service import ImageGenRouterService

    deployment = ImageGenRouterService._config_to_deployment(
        {
            "model_name": "openai/gpt-image-1",
-            "provider": "OPENROUTER",
+            "litellm_provider": "openrouter",
            "api_key": "sk-or-test",
-            "api_base": "",
+            "api_base": "https://openrouter.ai/api/v1",
        }
    )
    assert deployment is not None
--- a/surfsense_backend/tests/unit/services/test_llm_router_pool_filter.py
+++ b/surfsense_backend/tests/unit/services/test_llm_router_pool_filter.py
@ -25,10 +25,10 @@ def _fake_yaml_config(
    return {
        "id": id,
        "name": f"yaml-{id}",
-        "provider": "OPENAI",
+        "litellm_provider": "openai",
        "model_name": model_name,
        "api_key": "sk-test",
-        "api_base": "",
+        "api_base": "https://api.openai.com/v1",
        "billing_tier": billing_tier,
        "rpm": 100,
        "tpm": 100_000,
@ -54,10 +54,10 @@ def _fake_openrouter_config(
    return {
        "id": id,
        "name": f"or-{id}",
-        "provider": "OPENROUTER",
+        "litellm_provider": "openrouter",
        "model_name": model_name,
        "api_key": "sk-or-test",
-        "api_base": "",
+        "api_base": "https://openrouter.ai/api/v1",
        "billing_tier": billing_tier,
        "rpm": 20 if billing_tier == "free" else 200,
        "tpm": 100_000 if billing_tier == "free" else 1_000_000,
@ -217,10 +217,64 @@ def test_auto_model_pin_candidates_include_dynamic_openrouter():
        model_name="meta-llama/llama-3.3-70b:free",
        billing_tier="free",
    )
-    original = config.GLOBAL_LLM_CONFIGS
+    global_connections = [
+        {
+            "id": -110_001,
+            "provider": "openrouter",
+            "scope": "GLOBAL",
+            "enabled": True,
+        },
+        {
+            "id": -110_002,
+            "provider": "openrouter",
+            "scope": "GLOBAL",
+            "enabled": True,
+        },
+    ]
+    global_models = [
+        {
+            "id": or_premium["id"],
+            "connection_id": -110_001,
+            "model_id": or_premium["model_name"],
+            "display_name": or_premium["name"],
+            "supports_chat": True,
+            "supports_image_input": True,
+            "supports_tools": True,
+            "supports_image_generation": False,
+            "capabilities_override": {},
+            "billing_tier": or_premium["billing_tier"],
+            "catalog": {
+                "auto_pin_tier": "A",
+                "quality_score": 50,
+            },
+        },
+        {
+            "id": or_free["id"],
+            "connection_id": -110_002,
+            "model_id": or_free["model_name"],
+            "display_name": or_free["name"],
+            "supports_chat": True,
+            "supports_image_input": True,
+            "supports_tools": True,
+            "supports_image_generation": False,
+            "capabilities_override": {},
+            "billing_tier": or_free["billing_tier"],
+            "catalog": {
+                "auto_pin_tier": "A",
+                "quality_score": 50,
+            },
+        },
+    ]
+    original_configs = config.GLOBAL_LLM_CONFIGS
+    original_connections = config.GLOBAL_CONNECTIONS
+    original_models = config.GLOBAL_MODELS
    try:
        config.GLOBAL_LLM_CONFIGS = [or_premium, or_free]
+        config.GLOBAL_CONNECTIONS = global_connections
+        config.GLOBAL_MODELS = global_models
        candidate_ids = {c["id"] for c in _global_candidates()}
        assert candidate_ids == {-10_001, -10_002}
    finally:
-        config.GLOBAL_LLM_CONFIGS = original
+        config.GLOBAL_LLM_CONFIGS = original_configs
+        config.GLOBAL_CONNECTIONS = original_connections
+        config.GLOBAL_MODELS = original_models
--- a/surfsense_backend/tests/unit/services/test_model_connections.py
+++ b/surfsense_backend/tests/unit/services/test_model_connections.py
@ -0,0 +1,78 @@
+from app.services.global_model_catalog import materialize_global_model_catalog
+from app.services.model_resolver import ensure_v1, to_litellm
+
+
+def test_openai_compatible_resolver_uses_explicit_api_base() -> None:
+    model, kwargs = to_litellm(
+        {
+            "protocol": "OPENAI_COMPATIBLE",
+            "provider": "openai",
+            "base_url": "http://host.docker.internal:1234/v1",
+            "api_key": "local-key",
+            "extra": {},
+        },
+        "qwen/qwen3",
+    )
+
+    assert model == "openai/qwen/qwen3"
+    assert kwargs["api_base"] == "http://host.docker.internal:1234/v1"
+    assert kwargs["api_key"] == "local-key"
+    assert ensure_v1("http://example.com/v1") == "http://example.com/v1"
+
+
+def test_ollama_resolver_uses_native_api_base() -> None:
+    model, kwargs = to_litellm(
+        {
+            "protocol": "OLLAMA",
+            "provider": "ollama_chat",
+            "base_url": "http://host.docker.internal:11434",
+            "api_key": None,
+            "extra": {},
+        },
+        "llama3.2",
+    )
+
+    assert model == "ollama_chat/llama3.2"
+    assert kwargs["api_base"] == "http://host.docker.internal:11434"
+
+
+def test_global_materialization_preserves_tier_and_keeps_key_server_side() -> None:
+    connections, models = materialize_global_model_catalog(
+        chat_configs=[
+            {
+                "id": -101,
+                "name": "OpenRouter Free",
+                "litellm_provider": "openrouter",
+                "model_name": "meta-llama/llama-3.1-8b-instruct:free",
+                "api_key": "sk-global-secret",
+                "api_base": "https://openrouter.ai/api/v1",
+                "billing_tier": "free",
+                "anonymous_enabled": True,
+                "seo_enabled": True,
+                "rpm": 10,
+                "tpm": 1000,
+            },
+            {
+                "id": -102,
+                "name": "OpenRouter Premium",
+                "litellm_provider": "openrouter",
+                "model_name": "anthropic/claude-sonnet-4",
+                "api_key": "sk-global-secret",
+                "api_base": "https://openrouter.ai/api/v1",
+                "billing_tier": "premium",
+            },
+        ],
+        image_configs=[],
+    )
+
+    assert len(connections) == 1
+    assert connections[0]["api_key"] == "sk-global-secret"
+    assert {model["billing_tier"] for model in models} == {"free", "premium"}
+    assert models[0]["catalog"]["anonymous_enabled"] is True
+    assert models[0]["catalog"]["rpm"] == 10
+
+    public_connections = [
+        {key: value for key, value in connection.items() if key != "api_key"}
+        for connection in connections
+    ]
+    assert "sk-" not in repr(public_connections)
--- a/surfsense_backend/tests/unit/services/test_openrouter_integration_service.py
+++ b/surfsense_backend/tests/unit/services/test_openrouter_integration_service.py
@ -217,7 +217,7 @@ def test_generate_configs_drops_non_text_and_non_tool_models():


 # ---------------------------------------------------------------------------
-# _generate_image_gen_configs / _generate_vision_llm_configs
+# _generate_image_gen_configs
 # ---------------------------------------------------------------------------


@ -263,18 +263,15 @@ def test_generate_image_gen_configs_filters_by_image_output():
    # Each config must carry ``billing_tier`` for routing in image_generation_routes.
    for c in cfgs:
        assert c["billing_tier"] in {"free", "premium"}
-        assert c["provider"] == "OPENROUTER"
+        assert c["provider"] == "openrouter"
        assert c[_OPENROUTER_DYNAMIC_MARKER] is True
-        # Defense-in-depth: emit the OpenRouter base URL at source so a
-        # downstream call site that forgets ``resolve_api_base`` still
-        # doesn't 404 against an inherited Azure endpoint.
+        # Emit the OpenRouter base URL at source so every call path passes an
+        # explicit api_base and cannot inherit a process-global endpoint.
        assert c["api_base"] == "https://openrouter.ai/api/v1"


 def test_generate_image_gen_configs_assigns_image_id_offset():
-    """Image configs use a different id_offset (-20000) so their negative
-    IDs don't collide with chat configs (-10000) or vision configs (-30000).
-    """
+    """Image configs use their own id_offset (-20000)."""
    from app.services.openrouter_integration_service import (
        _generate_image_gen_configs,
    )
@ -291,90 +288,3 @@ def test_generate_image_gen_configs_assigns_image_id_offset():
    cfgs = _generate_image_gen_configs(raw, dict(_SETTINGS_BASE))
    assert all(c["id"] < -20_000 + 1 for c in cfgs)
    assert all(c["id"] > -29_000_000 for c in cfgs)
-
-
-def test_generate_vision_llm_configs_filters_by_image_input_text_output():
-    """Vision LLMs must accept image input AND emit text — pure image-gen
-    (no text out) and text-only (no image in) models are excluded.
-    """
-    from app.services.openrouter_integration_service import (
-        _generate_vision_llm_configs,
-    )
-
-    raw = [
-        # GPT-4o: vision LLM (image in, text out) — must emit.
-        {
-            "id": "openai/gpt-4o",
-            "architecture": {
-                "input_modalities": ["text", "image"],
-                "output_modalities": ["text"],
-            },
-            "context_length": 128_000,
-            "pricing": {"prompt": "0.000005", "completion": "0.000015"},
-        },
-        # Pure image generator — image *output*, no text out. Must NOT emit.
-        {
-            "id": "openai/gpt-image-1",
-            "architecture": {
-                "input_modalities": ["text"],
-                "output_modalities": ["image"],
-            },
-            "context_length": 4_000,
-            "pricing": {"prompt": "0", "completion": "0"},
-        },
-        # Pure text model (no image in). Must NOT emit.
-        {
-            "id": "anthropic/claude-3-haiku",
-            "architecture": {
-                "input_modalities": ["text"],
-                "output_modalities": ["text"],
-            },
-            "context_length": 200_000,
-            "pricing": {"prompt": "0.000001", "completion": "0.000005"},
-        },
-    ]
-
-    cfgs = _generate_vision_llm_configs(raw, dict(_SETTINGS_BASE))
-    names = {c["model_name"] for c in cfgs}
-    assert names == {"openai/gpt-4o"}
-
-    cfg = cfgs[0]
-    assert cfg["billing_tier"] == "premium"
-    # Pricing carried inline so pricing_registration can register vision
-    # under ``openrouter/openai/gpt-4o`` even if the chat catalogue cache
-    # is cleared.
-    assert cfg["input_cost_per_token"] == pytest.approx(5e-6)
-    assert cfg["output_cost_per_token"] == pytest.approx(15e-6)
-    assert cfg[_OPENROUTER_DYNAMIC_MARKER] is True
-    # Defense-in-depth: emit the OpenRouter base URL at source so a
-    # downstream call site that forgets ``resolve_api_base`` still
-    # doesn't inherit an Azure endpoint.
-    assert cfg["api_base"] == "https://openrouter.ai/api/v1"
-
-
-def test_generate_vision_llm_configs_drops_chat_only_filters():
-    """A small-context vision model that doesn't advertise tool calling is
-    still a valid vision LLM for "describe this image" prompts. The chat
-    filters (``supports_tool_calling``, ``has_sufficient_context``) must
-    NOT be applied to vision emission.
-    """
-    from app.services.openrouter_integration_service import (
-        _generate_vision_llm_configs,
-    )
-
-    raw = [
-        {
-            "id": "tiny/vision-mini",
-            "architecture": {
-                "input_modalities": ["text", "image"],
-                "output_modalities": ["text"],
-            },
-            "supported_parameters": [],  # no tools
-            "context_length": 4_000,  # well below MIN_CONTEXT_LENGTH
-            "pricing": {"prompt": "0.0000001", "completion": "0.0000005"},
-        }
-    ]
-
-    cfgs = _generate_vision_llm_configs(raw, dict(_SETTINGS_BASE))
-    assert len(cfgs) == 1
-    assert cfgs[0]["model_name"] == "tiny/vision-mini"
--- a/surfsense_backend/tests/unit/services/test_or_health_enrichment.py
+++ b/surfsense_backend/tests/unit/services/test_or_health_enrichment.py
@ -25,7 +25,7 @@ def _or_cfg(
 ) -> dict:
    return {
        "id": cid,
-        "provider": "OPENROUTER",
+        "provider": "openrouter",
        "model_name": model_name,
        "billing_tier": tier,
        "auto_pin_tier": "B" if tier == "premium" else "C",
@ -144,7 +144,7 @@ async def test_enrich_health_only_touches_or_provider(monkeypatch):
    """YAML cfgs that aren't OPENROUTER must be skipped entirely."""
    yaml_cfg = {
        "id": -1,
-        "provider": "AZURE_OPENAI",
+        "litellm_provider": "azure",
        "model_name": "gpt-5",
        "billing_tier": "premium",
        "auto_pin_tier": "A",
@ -313,7 +313,7 @@ async def test_enrich_health_no_or_cfgs_is_noop(monkeypatch):
    """When the catalogue has no OR cfgs at all, no HTTP calls fire."""
    yaml_cfg: dict[str, Any] = {
        "id": -1,
-        "provider": "AZURE_OPENAI",
+        "litellm_provider": "azure",
        "model_name": "gpt-5",
        "billing_tier": "premium",
    }
--- a/surfsense_backend/tests/unit/services/test_pricing_registration.py
+++ b/surfsense_backend/tests/unit/services/test_pricing_registration.py
@ -186,7 +186,7 @@ def test_openrouter_models_register_under_aliases(monkeypatch):
        [
            {
                "id": 1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "anthropic/claude-3-5-sonnet",
            }
        ],
@ -228,7 +228,7 @@ def test_yaml_override_registers_under_alias_set(monkeypatch):
        [
            {
                "id": 1,
-                "provider": "AZURE_OPENAI",
+                "litellm_provider": "azure",
                "model_name": "gpt-5.4",
                "litellm_params": {
                    "base_model": "gpt-5.4",
@ -243,7 +243,6 @@ def test_yaml_override_registers_under_alias_set(monkeypatch):

    keys = spy.all_keys
    assert "gpt-5.4" in keys
-    assert "azure_openai/gpt-5.4" in keys
    assert "azure/gpt-5.4" in keys

    payload = spy.calls[0]
@ -271,7 +270,7 @@ def test_no_override_means_no_registration(monkeypatch):
        [
            {
                "id": 1,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "gpt-4o",
                "litellm_params": {"base_model": "gpt-4o"},
            }
@ -302,7 +301,7 @@ def test_openrouter_skipped_when_pricing_missing(monkeypatch):
        [
            {
                "id": 1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "anthropic/claude-3-5-sonnet",
            }
        ],
@ -349,12 +348,12 @@ def test_register_continues_after_individual_failure(monkeypatch, caplog):
        [
            {
                "id": 1,
-                "provider": "OPENROUTER",
+                "litellm_provider": "openrouter",
                "model_name": "anthropic/claude-3-5-sonnet",
            },
            {
                "id": 2,
-                "provider": "OPENAI",
+                "litellm_provider": "openai",
                "model_name": "custom-deployment",
                "litellm_params": {
                    "base_model": "custom-deployment",
@ -369,79 +368,3 @@ def test_register_continues_after_individual_failure(monkeypatch, caplog):

    # The good config still registered.
    assert any("custom-deployment" in payload for payload in successful_calls)
-
-
-def test_vision_configs_registered_with_chat_shape(monkeypatch):
-    """``register_pricing_from_global_configs`` walks
-    ``GLOBAL_VISION_LLM_CONFIGS`` in addition to the chat configs so vision
-    calls (during indexing) bill correctly. Vision configs use the same
-    chat-shape token prices, but image-gen pricing is intentionally NOT
-    registered here (handled via ``response_cost`` in LiteLLM).
-    """
-    from app.config import config
-    from app.services.pricing_registration import register_pricing_from_global_configs
-
-    spy = _patch_register(monkeypatch)
-    _patch_openrouter_pricing(
-        monkeypatch,
-        {"openai/gpt-4o": {"prompt": "0.000005", "completion": "0.000015"}},
-    )
-
-    # No chat configs — only vision. Proves the vision walk is a separate
-    # iteration, not piggy-backed on the chat list.
-    monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [])
-    monkeypatch.setattr(
-        config,
-        "GLOBAL_VISION_LLM_CONFIGS",
-        [
-            {
-                "id": -1,
-                "provider": "OPENROUTER",
-                "model_name": "openai/gpt-4o",
-                "billing_tier": "premium",
-                "input_cost_per_token": 5e-6,
-                "output_cost_per_token": 15e-6,
-            }
-        ],
-    )
-
-    register_pricing_from_global_configs()
-
-    assert "openrouter/openai/gpt-4o" in spy.all_keys
-    payload_value = spy.calls[0]["openrouter/openai/gpt-4o"]
-    assert payload_value["mode"] == "chat"
-    assert payload_value["litellm_provider"] == "openrouter"
-    assert payload_value["input_cost_per_token"] == pytest.approx(5e-6)
-    assert payload_value["output_cost_per_token"] == pytest.approx(15e-6)
-
-
-def test_vision_with_inline_pricing_when_or_cache_missing(monkeypatch):
-    """If the OpenRouter pricing cache misses a vision model (different
-    catalogue surface), the vision walk falls back to inline
-    ``input_cost_per_token``/``output_cost_per_token`` on the cfg itself.
-    """
-    from app.config import config
-    from app.services.pricing_registration import register_pricing_from_global_configs
-
-    spy = _patch_register(monkeypatch)
-    _patch_openrouter_pricing(monkeypatch, {})
-
-    monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [])
-    monkeypatch.setattr(
-        config,
-        "GLOBAL_VISION_LLM_CONFIGS",
-        [
-            {
-                "id": -1,
-                "provider": "OPENROUTER",
-                "model_name": "google/gemini-2.5-flash",
-                "billing_tier": "premium",
-                "input_cost_per_token": 1e-6,
-                "output_cost_per_token": 4e-6,
-            }
-        ],
-    )
-
-    register_pricing_from_global_configs()
-
-    assert "openrouter/google/gemini-2.5-flash" in spy.all_keys
--- a/surfsense_backend/tests/unit/services/test_provider_api_base.py
+++ b/surfsense_backend/tests/unit/services/test_provider_api_base.py
@ -1,107 +0,0 @@
-"""Unit tests for the shared ``api_base`` resolver.
-
-The cascade exists so vision and image-gen call sites can't silently
-inherit ``litellm.api_base`` (commonly set by ``AZURE_OPENAI_ENDPOINT``)
-when an OpenRouter / Groq / etc. config ships an empty string. See
-``provider_api_base`` module docstring for the original repro
-(OpenRouter image-gen 404-ing against an Azure endpoint).
-"""
-
-from __future__ import annotations
-
-import pytest
-
-from app.services.provider_api_base import (
-    PROVIDER_DEFAULT_API_BASE,
-    PROVIDER_KEY_DEFAULT_API_BASE,
-    resolve_api_base,
-)
-
-pytestmark = pytest.mark.unit
-
-
-def test_config_value_wins_over_defaults():
-    """A non-empty config value is always returned verbatim, even when the
-    provider has a default — the operator gets the last word."""
-    result = resolve_api_base(
-        provider="OPENROUTER",
-        provider_prefix="openrouter",
-        config_api_base="https://my-openrouter-mirror.example.com/v1",
-    )
-    assert result == "https://my-openrouter-mirror.example.com/v1"
-
-
-def test_provider_key_default_when_config_missing():
-    """``DEEPSEEK`` shares the ``openai`` LiteLLM prefix but has its own
-    base URL — the provider-key map must take precedence over the prefix
-    map so DeepSeek requests don't go to OpenAI."""
-    result = resolve_api_base(
-        provider="DEEPSEEK",
-        provider_prefix="openai",
-        config_api_base=None,
-    )
-    assert result == PROVIDER_KEY_DEFAULT_API_BASE["DEEPSEEK"]
-
-
-def test_provider_prefix_default_when_no_key_default():
-    result = resolve_api_base(
-        provider="OPENROUTER",
-        provider_prefix="openrouter",
-        config_api_base=None,
-    )
-    assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
-
-
-def test_unknown_provider_returns_none():
-    """When neither map matches we return ``None`` so the caller can let
-    LiteLLM apply its own provider-integration default (Azure deployment
-    URL, custom-provider URL, etc.)."""
-    result = resolve_api_base(
-        provider="SOMETHING_NEW",
-        provider_prefix="something_new",
-        config_api_base=None,
-    )
-    assert result is None
-
-
-def test_empty_string_config_treated_as_missing():
-    """The original bug: OpenRouter dynamic configs ship ``api_base=""``
-    and downstream call sites use ``if cfg.get("api_base"):`` — empty
-    strings are falsy in Python but the cascade has to step in anyway."""
-    result = resolve_api_base(
-        provider="OPENROUTER",
-        provider_prefix="openrouter",
-        config_api_base="",
-    )
-    assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
-
-
-def test_whitespace_only_config_treated_as_missing():
-    """A config value of ``"   "`` is a configuration mistake — treat it
-    as missing instead of forwarding whitespace to LiteLLM (which would
-    almost certainly 404)."""
-    result = resolve_api_base(
-        provider="OPENROUTER",
-        provider_prefix="openrouter",
-        config_api_base="   ",
-    )
-    assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
-
-
-def test_provider_case_insensitive():
-    """Some call sites pass the provider lowercase (DB enum value), others
-    uppercase (YAML key). Both must resolve."""
-    upper = resolve_api_base(
-        provider="DEEPSEEK", provider_prefix="openai", config_api_base=None
-    )
-    lower = resolve_api_base(
-        provider="deepseek", provider_prefix="openai", config_api_base=None
-    )
-    assert upper == lower == PROVIDER_KEY_DEFAULT_API_BASE["DEEPSEEK"]
-
-
-def test_all_inputs_none_returns_none():
-    assert (
-        resolve_api_base(provider=None, provider_prefix=None, config_api_base=None)
-        is None
-    )
--- a/surfsense_backend/tests/unit/services/test_provider_capabilities.py
+++ b/surfsense_backend/tests/unit/services/test_provider_capabilities.py
@ -32,7 +32,7 @@ pytestmark = pytest.mark.unit
 def test_or_modalities_with_image_returns_true():
    assert (
        derive_supports_image_input(
-            provider="OPENROUTER",
+            provider="openrouter",
            model_name="openai/gpt-4o",
            openrouter_input_modalities=["text", "image"],
        )
@ -43,7 +43,7 @@ def test_or_modalities_with_image_returns_true():
 def test_or_modalities_text_only_returns_false():
    assert (
        derive_supports_image_input(
-            provider="OPENROUTER",
+            provider="openrouter",
            model_name="deepseek/deepseek-v3.2-exp",
            openrouter_input_modalities=["text"],
        )
@ -57,7 +57,7 @@ def test_or_modalities_empty_list_returns_false():
    to LiteLLM."""
    assert (
        derive_supports_image_input(
-            provider="OPENROUTER",
+            provider="openrouter",
            model_name="weird/empty-modalities",
            openrouter_input_modalities=[],
        )
@ -70,7 +70,7 @@ def test_or_modalities_none_falls_through_to_litellm():
    to LiteLLM. Using ``openai/gpt-4o`` which is in LiteLLM's map."""
    assert (
        derive_supports_image_input(
-            provider="OPENAI",
+            provider="openai",
            model_name="gpt-4o",
            openrouter_input_modalities=None,
        )
@ -86,7 +86,7 @@ def test_or_modalities_none_falls_through_to_litellm():
 def test_litellm_known_vision_model_returns_true():
    assert (
        derive_supports_image_input(
-            provider="OPENAI",
+            provider="openai",
            model_name="gpt-4o",
        )
        is True
@ -100,7 +100,7 @@ def test_litellm_base_model_wins_over_model_name():
    doesn't know) would shadow the real capability."""
    assert (
        derive_supports_image_input(
-            provider="AZURE_OPENAI",
+            provider="azure",
            model_name="my-azure-deployment-id",
            base_model="gpt-4o",
        )
@ -112,7 +112,7 @@ def test_litellm_unknown_model_default_allows():
    """Default-allow on unknown — the safety net is the actual block."""
    assert (
        derive_supports_image_input(
-            provider="CUSTOM",
+            provider="custom",
            model_name="brand-new-model-x9-unmapped",
            custom_provider="brand_new_proxy",
        )
@ -128,7 +128,7 @@ def test_litellm_known_text_only_returns_false():
    # Sanity: confirm the helper's negative path. We use a small model
    # known not to support vision per the map.
    result = derive_supports_image_input(
-        provider="DEEPSEEK",
+        provider="openai",
        model_name="deepseek-chat",
    )
    # We accept either False (LiteLLM said explicit no) or True
@ -147,7 +147,7 @@ def test_litellm_known_text_only_returns_false():
 def test_is_known_text_only_returns_false_for_vision_model():
    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            provider="openai",
            model_name="gpt-4o",
        )
        is False
@ -160,7 +160,7 @@ def test_is_known_text_only_returns_false_for_unknown_model():
    fixing."""
    assert (
        is_known_text_only_chat_model(
-            provider="CUSTOM",
+            provider="custom",
            model_name="brand-new-model-x9-unmapped",
            custom_provider="brand_new_proxy",
        )
@ -181,7 +181,7 @@ def test_is_known_text_only_returns_false_when_lookup_raises(monkeypatch):

    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            provider="openai",
            model_name="gpt-4o",
        )
        is False
@ -201,7 +201,7 @@ def test_is_known_text_only_returns_true_on_explicit_false(monkeypatch):

    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            provider="openai",
            model_name="any-model",
        )
        is True
@ -218,7 +218,7 @@ def test_is_known_text_only_returns_false_on_supports_vision_true(monkeypatch):

    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            provider="openai",
            model_name="any-model",
        )
        is False
@ -237,7 +237,7 @@ def test_is_known_text_only_returns_false_on_missing_key(monkeypatch):

    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            provider="openai",
            model_name="any-model",
        )
        is False
--- a/surfsense_backend/tests/unit/services/test_quality_score.py
+++ b/surfsense_backend/tests/unit/services/test_quality_score.py
@ -1,4 +1,4 @@
-"""Unit tests for the Auto (Fastest) quality scoring module."""
+"""Unit tests for the Auto quality scoring module."""

 from __future__ import annotations

@ -228,7 +228,7 @@ def test_static_score_or_recent_release_beats_year_old_same_provider():

 def test_static_score_yaml_includes_operator_bonus():
    cfg = {
-        "provider": "AZURE_OPENAI",
+        "litellm_provider": "azure",
        "model_name": "gpt-5",
        "litellm_params": {"base_model": "azure/gpt-5"},
    }
@ -238,7 +238,7 @@ def test_static_score_yaml_includes_operator_bonus():

 def test_static_score_yaml_unknown_provider_still_carries_bonus():
    cfg = {
-        "provider": "SOME_NEW_PROVIDER",
+        "litellm_provider": "some_new_provider",
        "model_name": "weird-model",
    }
    score = static_score_yaml(cfg)
@ -247,7 +247,7 @@ def test_static_score_yaml_unknown_provider_still_carries_bonus():

 def test_static_score_yaml_clamped_0_to_100():
    cfg = {
-        "provider": "AZURE_OPENAI",
+        "litellm_provider": "azure",
        "model_name": "gpt-5",
        "litellm_params": {"base_model": "azure/gpt-5"},
    }
--- a/surfsense_backend/tests/unit/services/test_token_quota_service_cost.py
+++ b/surfsense_backend/tests/unit/services/test_token_quota_service_cost.py
@ -131,6 +131,10 @@ def test_serialized_calls_includes_cost_micros():
    assert serialized == [
        {
            "model": "m",
+            "model_ref": None,
+            "model_id": None,
+            "display_name": None,
+            "provider": None,
            "prompt_tokens": 1,
            "completion_tokens": 1,
            "total_tokens": 2,
--- a/surfsense_backend/tests/unit/services/test_vision_llm_api_base_defense.py
+++ b/surfsense_backend/tests/unit/services/test_vision_llm_api_base_defense.py
@ -1,89 +0,0 @@
-"""Defense-in-depth: vision-LLM resolution must not leak ``api_base``
-defaults from ``litellm.api_base`` either.
-
-Vision shares the same shape as image-gen — global YAML / OpenRouter
-dynamic configs ship ``api_base=""`` and the pre-fix ``get_vision_llm``
-call sites would silently drop the empty string and inherit
-``AZURE_OPENAI_ENDPOINT``. ``ChatLiteLLM(...)`` doesn't 404 on
-construction so we test the kwargs we hand to it instead.
-"""
-
-from __future__ import annotations
-
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-pytestmark = pytest.mark.unit
-
-
-@pytest.mark.asyncio
-async def test_get_vision_llm_global_openrouter_sets_api_base():
-    """Global negative-ID branch: an OpenRouter vision config with
-    ``api_base=""`` must end up calling ``SanitizedChatLiteLLM`` with
-    ``api_base="https://openrouter.ai/api/v1"`` — never an empty string,
-    never silently absent."""
-    from app.services import llm_service
-
-    cfg = {
-        "id": -30_001,
-        "name": "GPT-4o Vision (OpenRouter)",
-        "provider": "OPENROUTER",
-        "model_name": "openai/gpt-4o",
-        "api_key": "sk-or-test",
-        "api_base": "",
-        "api_version": None,
-        "litellm_params": {},
-        "billing_tier": "free",
-    }
-
-    search_space = MagicMock()
-    search_space.id = 1
-    search_space.user_id = "user-x"
-    search_space.vision_llm_config_id = cfg["id"]
-
-    session = AsyncMock()
-    scalars = MagicMock()
-    scalars.first.return_value = search_space
-    result = MagicMock()
-    result.scalars.return_value = scalars
-    session.execute.return_value = result
-
-    captured: dict = {}
-
-    class FakeSanitized:
-        def __init__(self, **kwargs):
-            captured.update(kwargs)
-
-    with (
-        patch(
-            "app.services.vision_llm_router_service.get_global_vision_llm_config",
-            return_value=cfg,
-        ),
-        patch(
-            "app.agents.chat.runtime.llm_config.SanitizedChatLiteLLM",
-            new=FakeSanitized,
-        ),
-    ):
-        await llm_service.get_vision_llm(session=session, search_space_id=1)
-
-    assert captured.get("api_base") == "https://openrouter.ai/api/v1"
-    assert captured["model"] == "openrouter/openai/gpt-4o"
-
-
-def test_vision_router_deployment_sets_api_base_when_config_empty():
-    """Auto-mode vision router: deployments are fed to ``litellm.Router``,
-    so the resolver has to apply at deployment construction time too."""
-    from app.services.vision_llm_router_service import VisionLLMRouterService
-
-    deployment = VisionLLMRouterService._config_to_deployment(
-        {
-            "model_name": "openai/gpt-4o",
-            "provider": "OPENROUTER",
-            "api_key": "sk-or-test",
-            "api_base": "",
-        }
-    )
-    assert deployment is not None
-    assert deployment["litellm_params"]["api_base"] == "https://openrouter.ai/api/v1"
-    assert deployment["litellm_params"]["model"] == "openrouter/openai/gpt-4o"
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_error_classifier.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_error_classifier.py
@ -0,0 +1,79 @@
+from __future__ import annotations
+
+import pytest
+
+from app.services.llm_error_adapter import LLMErrorCategory, adapt_llm_exception
+from app.tasks.chat.streaming.errors.classifier import classify_stream_exception
+
+pytestmark = pytest.mark.unit
+
+
+def _exception_named(name: str, message: str) -> Exception:
+    return type(name, (Exception,), {})(message)
+
+
+def test_adapter_classifies_authentication_error_by_class_name() -> None:
+    exc = _exception_named("AuthenticationError", "provider rejected credentials")
+
+    adapted = adapt_llm_exception(exc)
+
+    assert adapted.category is LLMErrorCategory.AUTH_FAILED
+    assert adapted.retryable is False
+    assert adapted.user_message == "LLM authentication failed. Check your API key."
+
+
+def test_adapter_classifies_embedded_provider_401_payload() -> None:
+    exc = RuntimeError(
+        'litellm.AuthenticationError: OpenrouterException - {"error":{"message":"User not found.","code":401}}'
+    )
+
+    adapted = adapt_llm_exception(exc)
+
+    assert adapted.category is LLMErrorCategory.AUTH_FAILED
+    assert adapted.provider_status_code == 401
+
+
+def test_adapter_preserves_rate_limit_classification() -> None:
+    exc = RuntimeError('{"error":{"message":"Slow down","code":429}}')
+
+    adapted = adapt_llm_exception(exc)
+
+    assert adapted.category is LLMErrorCategory.RATE_LIMITED
+    assert adapted.retryable is True
+
+
+def test_stream_classifier_maps_model_auth_to_stable_code() -> None:
+    exc = RuntimeError(
+        'litellm.AuthenticationError: OpenrouterException - {"error":{"message":"User not found.","code":401}}'
+    )
+
+    kind, code, severity, expected, message, extra = classify_stream_exception(
+        exc,
+        flow_label="chat",
+    )
+
+    assert kind == "model_auth_failed"
+    assert code == "MODEL_AUTH_FAILED"
+    assert severity == "warn"
+    assert expected is True
+    assert "API key" in message
+    assert extra == {
+        "provider_error_category": "auth_failed",
+        "provider_status_code": 401,
+    }
+
+
+def test_stream_classifier_keeps_unknown_errors_generic() -> None:
+    exc = RuntimeError("database exploded")
+
+    kind, code, severity, expected, message, extra = classify_stream_exception(
+        exc,
+        flow_label="chat",
+    )
+
+    assert kind == "server_error"
+    assert code == "SERVER_ERROR"
+    assert severity == "error"
+    assert expected is False
+    assert message == "Error during chat: database exploded"
+    assert extra is None
--- a/surfsense_backend/tests/unit/tasks/chat/test_llm_history_normalizer.py
+++ b/surfsense_backend/tests/unit/tasks/chat/test_llm_history_normalizer.py
@ -0,0 +1,61 @@
+"""Unit tests for provider-safe LLM history normalization."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.tasks.chat.llm_history_normalizer import (
+    assistant_content_to_llm_text,
+    user_content_to_llm_content,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def test_assistant_ui_parts_drop_thinking_steps_for_llm_history() -> None:
+    content = [
+        {"type": "data-thinking-steps", "data": [{"id": "thinking-1"}]},
+        {"type": "text", "text": "visible answer"},
+    ]
+
+    assert assistant_content_to_llm_text(content) == "visible answer"
+
+
+def test_provider_thinking_blocks_are_not_replayed_to_llm() -> None:
+    content = [
+        {"type": "thinking", "thinking": "private reasoning"},
+        {"type": "text", "text": "final answer"},
+    ]
+
+    assert assistant_content_to_llm_text(content) == "final answer"
+
+
+def test_unknown_assistant_blocks_are_dropped() -> None:
+    content = [
+        {"type": "redacted_thinking", "data": "hidden"},
+        {"type": "tool_use", "name": "search"},
+        {"type": "text", "text": "kept"},
+    ]
+
+    assert assistant_content_to_llm_text(content) == "kept"
+
+
+def test_user_images_convert_to_openai_compatible_image_url_blocks() -> None:
+    content = [
+        {"type": "text", "text": "look"},
+        {"type": "image", "image": "data:image/png;base64,abc"},
+    ]
+
+    assert user_content_to_llm_content(content, allow_images=True) == [
+        {"type": "text", "text": "look"},
+        {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
+    ]
+
+
+def test_user_images_can_be_dropped_for_text_only_history() -> None:
+    content = [
+        {"type": "text", "text": "look"},
+        {"type": "image", "image": "data:image/png;base64,abc"},
+    ]
+
+    assert user_content_to_llm_content(content, allow_images=False) == "look"
--- a/surfsense_backend/tests/unit/tasks/chat/test_message_parts_normalizer.py
+++ b/surfsense_backend/tests/unit/tasks/chat/test_message_parts_normalizer.py
@ -0,0 +1,67 @@
+"""Unit tests for final assistant message part normalization."""
+
+from __future__ import annotations
+
+import pytest
+from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
+
+from app.tasks.chat.message_parts_normalizer import (
+    final_assistant_parts_from_messages,
+    merge_streamed_and_final_parts,
+    normalize_ai_message_to_parts,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def test_string_ai_message_content_becomes_text_part() -> None:
+    assert normalize_ai_message_to_parts(AIMessage(content="hello")) == [
+        {"type": "text", "text": "hello"}
+    ]
+
+
+def test_deepseek_thinking_plus_text_blocks_backfill_only_text() -> None:
+    message = AIMessage(
+        content=[
+            {"type": "thinking", "thinking": "hidden reasoning"},
+            {"type": "text", "text": "Yo bro! What's up?"},
+        ],
+        additional_kwargs={"reasoning_content": "hidden reasoning"},
+    )
+
+    assert normalize_ai_message_to_parts(message) == [
+        {"type": "text", "text": "Yo bro! What's up?"}
+    ]
+
+
+def test_final_parts_use_last_ai_message_and_skip_trailing_tool_messages() -> None:
+    messages = [
+        HumanMessage(content="ask"),
+        AIMessage(content="draft"),
+        ToolMessage(content="tool output", tool_call_id="tc-1"),
+        AIMessage(content=[{"type": "text", "text": "final answer"}]),
+        ToolMessage(content="trailing tool noise", tool_call_id="tc-2"),
+    ]
+
+    assert final_assistant_parts_from_messages(messages) == [
+        {"type": "text", "text": "final answer"}
+    ]
+
+
+def test_merge_adds_final_text_when_stream_only_has_thinking_steps() -> None:
+    streamed = [
+        {
+            "type": "data-thinking-steps",
+            "data": [{"id": "thinking-1", "status": "completed"}],
+        }
+    ]
+    final = [{"type": "text", "text": "visible answer"}]
+
+    assert merge_streamed_and_final_parts(streamed, final) == [*streamed, *final]
+
+
+def test_merge_does_not_duplicate_when_stream_already_has_text() -> None:
+    streamed = [{"type": "text", "text": "streamed answer"}]
+    final = [{"type": "text", "text": "final answer"}]
+
+    assert merge_streamed_and_final_parts(streamed, final) == streamed
--- a/surfsense_backend/tests/unit/tasks/test_stream_new_chat_image_safety_net.py
+++ b/surfsense_backend/tests/unit/tasks/test_stream_new_chat_image_safety_net.py
@ -35,7 +35,7 @@ def test_safety_net_does_not_fire_for_azure_gpt_4o():
    it text-only."""
    assert (
        is_known_text_only_chat_model(
-            provider="AZURE_OPENAI",
+            provider="azure",
            model_name="my-azure-deployment",
            base_model="gpt-4o",
        )
@ -49,7 +49,7 @@ def test_safety_net_does_not_fire_for_unknown_model():
    LiteLLM doesn't know about must flow through to the provider."""
    assert (
        is_known_text_only_chat_model(
-            provider="CUSTOM",
+            provider="custom",
            custom_provider="brand_new_proxy",
            model_name="brand-new-model-x9",
        )
@ -69,7 +69,7 @@ def test_safety_net_does_not_fire_when_lookup_raises(monkeypatch):

    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            provider="openai",
            model_name="gpt-4o",
        )
        is False
@ -88,7 +88,7 @@ def test_safety_net_fires_only_on_explicit_false(monkeypatch):
    monkeypatch.setattr(pc.litellm, "get_model_info", _info_explicit_false)
    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            provider="openai",
            model_name="text-only-stub",
        )
        is True
@ -100,7 +100,7 @@ def test_safety_net_fires_only_on_explicit_false(monkeypatch):
    monkeypatch.setattr(pc.litellm, "get_model_info", _info_true)
    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            provider="openai",
            model_name="vision-stub",
        )
        is False
@ -112,7 +112,7 @@ def test_safety_net_fires_only_on_explicit_false(monkeypatch):
    monkeypatch.setattr(pc.litellm, "get_model_info", _info_missing)
    assert (
        is_known_text_only_chat_model(
-            provider="OPENAI",
+            provider="openai",
            model_name="missing-key-stub",
        )
        is False