feat(database-migrations): add migration to remove legacy model config tables and remove stale model connection code

2026-06-22 21:28:12 +02:00 · 2026-06-13 12:45:43 +05:30 · 2026-06-13 12:45:43 +05:30 · bd4a04f2e7
commit bd4a04f2e7
parent 50668775f8
93 changed files with 956 additions and 11442 deletions
--- a/surfsense_backend/tests/unit/services/test_agent_billing_resolver.py
+++ b/surfsense_backend/tests/unit/services/test_agent_billing_resolver.py
@ -1,27 +1,4 @@
-"""Unit tests for ``_resolve_agent_billing_for_search_space``.
-
-Validates the resolver used by Celery podcast/video tasks to compute
-``(owner_user_id, billing_tier, base_model)`` from a search space and its
-agent LLM config. The resolver mirrors chat's billing-resolution pattern at
-``stream_new_chat.py:2294-2351`` and is the single integration point that
-prevents Auto-mode podcast/video from leaking premium credit.
-
-Coverage:
-
-* Auto mode + ``thread_id`` set, pin resolves to a negative-id premium
-  global → returns ``("premium", <base_model>)``.
-* Auto mode + ``thread_id`` set, pin resolves to a negative-id free
-  global → returns ``("free", <base_model>)``.
-* Auto mode + ``thread_id`` set, pin resolves to a positive-id BYOK config
-  → always ``"free"``.
-* Auto mode + ``thread_id=None`` → fallback to ``("free", "auto")`` without
-  hitting the pin service.
-* Negative id (no Auto) → uses ``get_global_llm_config``'s
-  ``billing_tier``.
-* Positive id (user BYOK) → always ``"free"``.
-* Search space not found → raises ``ValueError``.
-* ``agent_llm_id`` is None → raises ``ValueError``.
-"""
+"""Unit tests for ``_resolve_agent_billing_for_search_space``."""

 from __future__ import annotations

@ -34,11 +11,6 @@ import pytest
 pytestmark = pytest.mark.unit


-# ---------------------------------------------------------------------------
-# Fakes
-# ---------------------------------------------------------------------------
-
-
 class _FakeExecResult:
    def __init__(self, obj):
        self._obj = obj
@ -51,14 +23,6 @@ class _FakeExecResult:


 class _FakeSession:
-    """Tiny AsyncSession stub.
-
-    ``responses`` is a list of objects to return from successive
-    ``execute()`` calls (in order). The resolver makes at most two
-    ``execute()`` calls (search-space lookup, then optionally NewLLMConfig
-    lookup), so two queued responses cover the matrix.
-    """
-
    def __init__(self, responses: list):
        self._responses = list(responses)

@ -67,9 +31,6 @@ class _FakeSession:
            return _FakeExecResult(None)
        return _FakeExecResult(self._responses.pop(0))

-    async def commit(self) -> None:
-        pass
-

@dataclass
 class _FakePinResolution:
@ -78,53 +39,33 @@ class _FakePinResolution:
    from_existing_pin: bool = False


-def _make_search_space(*, agent_llm_id: int | None, user_id: UUID) -> SimpleNamespace:
-    return SimpleNamespace(
-        id=42,
-        agent_llm_id=agent_llm_id,
-        user_id=user_id,
-    )
+def _make_search_space(*, chat_model_id: int | None, user_id: UUID) -> SimpleNamespace:
+    return SimpleNamespace(id=42, chat_model_id=chat_model_id, user_id=user_id)


-def _make_byok_config(
-    *, id_: int, base_model: str | None = None, model_name: str = "gpt-byok"
+def _make_byok_model(
+    *, id_: int, base_model: str | None = None, model_id: str = "gpt-byok"
 ) -> SimpleNamespace:
    return SimpleNamespace(
        id=id_,
-        model_name=model_name,
-        litellm_params={"base_model": base_model} if base_model else {},
+        model_id=model_id,
+        catalog={"base_model": base_model} if base_model else {},
+        connection=SimpleNamespace(enabled=True, search_space_id=42, user_id=None),
    )


-# ---------------------------------------------------------------------------
-# Tests
-# ---------------------------------------------------------------------------
-
-
@pytest.mark.asyncio
 async def test_auto_mode_with_thread_id_resolves_to_premium_global(monkeypatch):
-    """Auto + thread → pin service resolves to negative-id premium config →
-    resolver returns ``("premium", <base_model>)``."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=0, user_id=user_id)])

-    # Mock the pin service to return a concrete premium config id.
-    async def _fake_resolve_pin(
-        sess,
-        *,
-        thread_id,
-        search_space_id,
-        user_id,
-        selected_llm_config_id,
-        force_repin_free=False,
-    ):
-        assert selected_llm_config_id == 0
-        assert thread_id == 99
+    async def _fake_resolve_pin(*_args, **kwargs):
+        assert kwargs["selected_llm_config_id"] == 0
+        assert kwargs["thread_id"] == 99
        return _FakePinResolution(resolved_llm_config_id=-1, resolved_tier="premium")

-    # Mock global config lookup to return a premium entry.
    def _fake_get_global(cfg_id):
        if cfg_id == -1:
            return {
@ -135,8 +76,6 @@ async def test_auto_mode_with_thread_id_resolves_to_premium_global(monkeypatch):
            }
        return None

-    # Lazy imports inside the resolver — patch the *target* modules so the
-    # imported names resolve to our fakes.
    import app.services.auto_model_pin_service as pin_module
    import app.services.llm_service as llm_module

@ -154,77 +93,18 @@ async def test_auto_mode_with_thread_id_resolves_to_premium_global(monkeypatch):
    assert base_model == "gpt-5.4"


-@pytest.mark.asyncio
-async def test_auto_mode_with_thread_id_resolves_to_free_global(monkeypatch):
-    """Auto + thread → pin returns negative-id free config → resolver
-    returns ``("free", <base_model>)``. Same path the pin service takes for
-    out-of-credit users (graceful degradation)."""
-    from app.services.billable_calls import _resolve_agent_billing_for_search_space
-
-    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
-
-    async def _fake_resolve_pin(
-        sess,
-        *,
-        thread_id,
-        search_space_id,
-        user_id,
-        selected_llm_config_id,
-        force_repin_free=False,
-    ):
-        return _FakePinResolution(resolved_llm_config_id=-3, resolved_tier="free")
-
-    def _fake_get_global(cfg_id):
-        if cfg_id == -3:
-            return {
-                "id": -3,
-                "model_name": "openrouter/free-model",
-                "billing_tier": "free",
-                "litellm_params": {"base_model": "openrouter/free-model"},
-            }
-        return None
-
-    import app.services.auto_model_pin_service as pin_module
-    import app.services.llm_service as llm_module
-
-    monkeypatch.setattr(
-        pin_module, "resolve_or_get_pinned_llm_config_id", _fake_resolve_pin
-    )
-    monkeypatch.setattr(llm_module, "get_global_llm_config", _fake_get_global)
-
-    owner, tier, base_model = await _resolve_agent_billing_for_search_space(
-        session, search_space_id=42, thread_id=99
-    )
-
-    assert owner == user_id
-    assert tier == "free"
-    assert base_model == "openrouter/free-model"
-
-
@pytest.mark.asyncio
 async def test_auto_mode_with_thread_id_resolves_to_byok_is_free(monkeypatch):
-    """Auto + thread → pin returns positive-id BYOK config → resolver
-    returns ``("free", ...)`` (BYOK is always free per
-    ``AgentConfig.from_new_llm_config``)."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    search_space = _make_search_space(agent_llm_id=0, user_id=user_id)
-    byok_cfg = _make_byok_config(
-        id_=17, base_model="anthropic/claude-3-haiku", model_name="my-claude"
+    search_space = _make_search_space(chat_model_id=0, user_id=user_id)
+    byok_model = _make_byok_model(
+        id_=17, base_model="anthropic/claude-3-haiku", model_id="my-claude"
    )
-    session = _FakeSession([search_space, byok_cfg])
+    session = _FakeSession([search_space, byok_model])

-    async def _fake_resolve_pin(
-        sess,
-        *,
-        thread_id,
-        search_space_id,
-        user_id,
-        selected_llm_config_id,
-        force_repin_free=False,
-    ):
+    async def _fake_resolve_pin(*_args, **_kwargs):
        return _FakePinResolution(resolved_llm_config_id=17, resolved_tier="free")

    import app.services.auto_model_pin_service as pin_module
@ -244,13 +124,10 @@ async def test_auto_mode_with_thread_id_resolves_to_byok_is_free(monkeypatch):

@pytest.mark.asyncio
 async def test_auto_mode_without_thread_id_falls_back_to_free():
-    """Auto + ``thread_id=None`` → ``("free", "auto")`` without invoking
-    the pin service. Forward-compat fallback for any future direct-API
-    entrypoint that doesn't have a chat thread."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=0, user_id=user_id)])

    owner, tier, base_model = await _resolve_agent_billing_for_search_space(
        session, search_space_id=42, thread_id=None
@ -263,13 +140,10 @@ async def test_auto_mode_without_thread_id_falls_back_to_free():

@pytest.mark.asyncio
 async def test_auto_mode_pin_failure_falls_back_to_free(monkeypatch):
-    """If the pin service raises ``ValueError`` (thread missing /
-    mismatched search space), the resolver should log and return free
-    rather than killing the whole task."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=0, user_id=user_id)])

    async def _fake_resolve_pin(*args, **kwargs):
        raise ValueError("thread missing")
@ -291,12 +165,10 @@ async def test_auto_mode_pin_failure_falls_back_to_free(monkeypatch):

@pytest.mark.asyncio
 async def test_negative_id_premium_global_returns_premium(monkeypatch):
-    """Explicit negative agent_llm_id → ``get_global_llm_config`` →
-    return its ``billing_tier``."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=-1, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=-1, user_id=user_id)])

    def _fake_get_global(cfg_id):
        return {
@ -319,50 +191,15 @@ async def test_negative_id_premium_global_returns_premium(monkeypatch):
    assert base_model == "gpt-5.4"


-@pytest.mark.asyncio
-async def test_negative_id_free_global_returns_free(monkeypatch):
-    from app.services.billable_calls import _resolve_agent_billing_for_search_space
-
-    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=-2, user_id=user_id)])
-
-    def _fake_get_global(cfg_id):
-        return {
-            "id": cfg_id,
-            "model_name": "openrouter/some-free",
-            "billing_tier": "free",
-            "litellm_params": {"base_model": "openrouter/some-free"},
-        }
-
-    import app.services.llm_service as llm_module
-
-    monkeypatch.setattr(llm_module, "get_global_llm_config", _fake_get_global)
-
-    owner, tier, base_model = await _resolve_agent_billing_for_search_space(
-        session, search_space_id=42, thread_id=None
-    )
-
-    assert owner == user_id
-    assert tier == "free"
-    assert base_model == "openrouter/some-free"
-
-
@pytest.mark.asyncio
 async def test_negative_id_missing_base_model_falls_back_to_model_name(monkeypatch):
-    """When the global config has no ``litellm_params.base_model``, the
-    resolver falls back to ``model_name`` — matching chat's behavior."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=-5, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=-5, user_id=user_id)])

    def _fake_get_global(cfg_id):
-        return {
-            "id": cfg_id,
-            "model_name": "fallback-model",
-            "billing_tier": "premium",
-            # No litellm_params.
-        }
+        return {"id": cfg_id, "model_name": "fallback-model", "billing_tier": "premium"}

    import app.services.llm_service as llm_module

@ -378,14 +215,12 @@ async def test_negative_id_missing_base_model_falls_back_to_model_name(monkeypat

@pytest.mark.asyncio
 async def test_positive_id_byok_is_always_free():
-    """Positive agent_llm_id → user-owned BYOK NewLLMConfig → always free,
-    regardless of underlying provider tier."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    search_space = _make_search_space(agent_llm_id=23, user_id=user_id)
-    byok_cfg = _make_byok_config(id_=23, base_model="anthropic/claude-3.5-sonnet")
-    session = _FakeSession([search_space, byok_cfg])
+    search_space = _make_search_space(chat_model_id=23, user_id=user_id)
+    byok_model = _make_byok_model(id_=23, base_model="anthropic/claude-3.5-sonnet")
+    session = _FakeSession([search_space, byok_model])

    owner, tier, base_model = await _resolve_agent_billing_for_search_space(
        session, search_space_id=42
@ -398,13 +233,10 @@ async def test_positive_id_byok_is_always_free():

@pytest.mark.asyncio
 async def test_positive_id_byok_missing_returns_free_with_empty_base_model():
-    """If the BYOK config row is missing/deleted but the search space still
-    points at it, the resolver still returns free (no debit) with an empty
-    base_model — billable_call's premium path is skipped, no harm done."""
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=99, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=99, user_id=user_id)])

    owner, tier, base_model = await _resolve_agent_billing_for_search_space(
        session, search_space_id=42
@ -419,18 +251,18 @@ async def test_positive_id_byok_missing_returns_free_with_empty_base_model():
 async def test_search_space_not_found_raises_value_error():
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

-    session = _FakeSession([None])
-
    with pytest.raises(ValueError, match="Search space"):
-        await _resolve_agent_billing_for_search_space(session, search_space_id=999)
+        await _resolve_agent_billing_for_search_space(
+            _FakeSession([None]), search_space_id=999
+        )


@pytest.mark.asyncio
-async def test_agent_llm_id_none_raises_value_error():
+async def test_chat_model_id_none_raises_value_error():
    from app.services.billable_calls import _resolve_agent_billing_for_search_space

    user_id = uuid4()
-    session = _FakeSession([_make_search_space(agent_llm_id=None, user_id=user_id)])
+    session = _FakeSession([_make_search_space(chat_model_id=None, user_id=user_id)])

-    with pytest.raises(ValueError, match="agent_llm_id"):
+    with pytest.raises(ValueError, match="chat_model_id"):
        await _resolve_agent_billing_for_search_space(session, search_space_id=42)
--- a/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
+++ b/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
@ -32,8 +32,9 @@ class _FakeQuotaResult:


 class _FakeExecResult:
-    def __init__(self, thread):
+    def __init__(self, *, thread=None, scalars=None):
        self._thread = thread
+        self._scalars = scalars or []

    def unique(self):
        return self
@ -41,19 +42,69 @@ class _FakeExecResult:
    def scalar_one_or_none(self):
        return self._thread

+    def scalars(self):
+        return SimpleNamespace(all=lambda: self._scalars)
+

 class _FakeSession:
-    def __init__(self, thread):
+    def __init__(self, thread, *, models=None):
        self.thread = thread
+        self.models = models or []
        self.commit_count = 0
+        self.execute_count = 0

    async def execute(self, _stmt):
-        return _FakeExecResult(self.thread)
+        self.execute_count += 1
+        if self.execute_count == 1:
+            return _FakeExecResult(thread=self.thread)
+        return _FakeExecResult(scalars=self.models)

    async def commit(self):
        self.commit_count += 1


+def _set_global_llm_configs(monkeypatch, config, configs: list[dict]):
+    """Patch the new global model catalog shape from compact legacy cfg fixtures."""
+    connections = []
+    models = []
+    for cfg in configs:
+        config_id = int(cfg["id"])
+        connection_id = config_id - 100_000
+        provider = cfg.get("provider") or cfg.get("litellm_provider")
+        model_name = cfg["model_name"]
+        connections.append(
+            {
+                "id": connection_id,
+                "provider": provider,
+                "scope": "GLOBAL",
+                "enabled": True,
+            }
+        )
+        models.append(
+            {
+                "id": config_id,
+                "connection_id": connection_id,
+                "model_id": model_name,
+                "display_name": cfg.get("name") or model_name,
+                "supports_chat": cfg.get("supports_chat", True),
+                "supports_image_input": cfg.get("supports_image_input", True),
+                "supports_tools": cfg.get("supports_tools", True),
+                "supports_image_generation": cfg.get("supports_image_generation", False),
+                "capabilities_override": cfg.get("capabilities_override") or {},
+                "billing_tier": cfg.get("billing_tier", "free"),
+                "catalog": {
+                    "auto_pin_tier": cfg.get("auto_pin_tier"),
+                    "quality_score": cfg.get("quality_score")
+                    or cfg.get("quality_score_static"),
+                },
+            }
+        )
+
+    monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", configs)
+    monkeypatch.setattr(config, "GLOBAL_CONNECTIONS", connections)
+    monkeypatch.setattr(config, "GLOBAL_MODELS", models)
+
+
 def _thread(
    *,
    search_space_id: int = 10,
@ -71,9 +122,9 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {"id": -2, "litellm_provider": "openai", "model_name": "gpt-free", "api_key": "k1"},
            {
@ -111,9 +162,9 @@ async def test_premium_eligible_auto_prefers_premium_over_free(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -2,
@ -158,9 +209,9 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
@ -216,9 +267,9 @@ async def test_next_turn_reuses_existing_pin(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
@ -257,9 +308,9 @@ async def test_premium_eligible_auto_can_pin_premium(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
@ -295,9 +346,9 @@ async def test_premium_ineligible_auto_pins_free_only(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -2,
@ -340,9 +391,9 @@ async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -2,
@ -385,9 +436,9 @@ async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -2,
@ -433,9 +484,9 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-2))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {"id": -2, "litellm_provider": "openai", "model_name": "gpt-free", "api_key": "k1"},
        ],
@ -458,9 +509,9 @@ async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-999))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {"id": -2, "litellm_provider": "openai", "model_name": "gpt-free", "api_key": "k1"},
        ],
@ -487,7 +538,7 @@ async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):


 # ---------------------------------------------------------------------------
-# Quality-aware pin selection (Auto Fastest upgrade)
+# Quality-aware pin selection (Auto upgrade)
 # ---------------------------------------------------------------------------


@ -498,9 +549,9 @@ async def test_health_gated_config_is_excluded_from_selection(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
@ -550,9 +601,9 @@ async def test_tier_a_locks_first_premium_user_skips_or(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
@ -602,9 +653,9 @@ async def test_tier_a_falls_through_to_or_when_a_pool_empty_for_user(monkeypatch
    from app.config import config

    session = _FakeSession(_thread())
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
@ -676,9 +727,9 @@ async def test_top_k_picks_only_high_score_models(monkeypatch):
        "quality_score": 10,
        "health_gated": False,
    }
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [*high_score_cfgs, low_score_trap],
    )

@ -723,9 +774,9 @@ async def test_pin_reuse_survives_health_gating_for_existing_pin(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
@ -775,9 +826,9 @@ async def test_pin_reuse_regression_existing_healthy_pin(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
@ -833,9 +884,9 @@ async def test_runtime_cooled_down_pin_is_not_reused(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
@ -886,9 +937,9 @@ async def test_clearing_runtime_cooldown_restores_pin_reuse(monkeypatch):
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
@ -931,9 +982,9 @@ async def test_auto_pin_repin_excludes_previous_config_on_runtime_retry(monkeypa
    from app.config import config

    session = _FakeSession(_thread(pinned_llm_config_id=-1))
-    monkeypatch.setattr(
+    _set_global_llm_configs(
+        monkeypatch,
        config,
-        "GLOBAL_LLM_CONFIGS",
        [
            {
                "id": -1,
--- a/surfsense_backend/tests/unit/services/test_image_gen_api_base_defense.py
+++ b/surfsense_backend/tests/unit/services/test_image_gen_api_base_defense.py
@ -15,15 +15,19 @@ async def test_global_openrouter_image_gen_sets_explicit_api_base():
    """The global-config branch forwards the explicit OpenRouter base."""
    from app.routes import image_generation_routes

-    cfg = {
+    global_model = {
        "id": -20_001,
-        "name": "GPT Image 1 (OpenRouter)",
-        "litellm_provider": "openrouter",
-        "model_name": "openai/gpt-image-1",
+        "connection_id": -101,
+        "model_id": "openai/gpt-image-1",
+        "supports_image_generation": True,
+        "capabilities_override": {},
+    }
+    global_connection = {
+        "id": -101,
+        "provider": "openrouter",
        "api_key": "sk-or-test",
-        "api_base": "https://openrouter.ai/api/v1",
-        "api_version": None,
-        "litellm_params": {},
+        "base_url": "https://openrouter.ai/api/v1",
+        "extra": {},
    }

    captured: dict = {}
@ -33,7 +37,7 @@ async def test_global_openrouter_image_gen_sets_explicit_api_base():
        return MagicMock(model_dump=lambda: {"data": []}, _hidden_params={})

    image_gen = MagicMock()
-    image_gen.image_generation_config_id = cfg["id"]
+    image_gen.image_gen_model_id = global_model["id"]
    image_gen.prompt = "test"
    image_gen.n = 1
    image_gen.quality = None
@ -43,14 +47,19 @@ async def test_global_openrouter_image_gen_sets_explicit_api_base():
    image_gen.model = None

    search_space = MagicMock()
-    search_space.image_generation_config_id = cfg["id"]
+    search_space.image_gen_model_id = global_model["id"]
    session = MagicMock()

    with (
        patch.object(
            image_generation_routes,
-            "_get_global_image_gen_config",
-            return_value=cfg,
+            "_get_global_model",
+            return_value=global_model,
+        ),
+        patch.object(
+            image_generation_routes,
+            "_get_global_connection",
+            return_value=global_connection,
        ),
        patch.object(
            image_generation_routes,
@ -74,15 +83,19 @@ async def test_generate_image_tool_global_sets_explicit_api_base():
        generate_image as gi_module,
    )

-    cfg = {
+    global_model = {
        "id": -20_001,
-        "name": "GPT Image 1 (OpenRouter)",
-        "litellm_provider": "openrouter",
-        "model_name": "openai/gpt-image-1",
+        "connection_id": -101,
+        "model_id": "openai/gpt-image-1",
+        "supports_image_generation": True,
+        "capabilities_override": {},
+    }
+    global_connection = {
+        "id": -101,
+        "provider": "openrouter",
        "api_key": "sk-or-test",
-        "api_base": "https://openrouter.ai/api/v1",
-        "api_version": None,
-        "litellm_params": {},
+        "base_url": "https://openrouter.ai/api/v1",
+        "extra": {},
    }

    captured: dict = {}
@ -98,7 +111,7 @@ async def test_generate_image_tool_global_sets_explicit_api_base():

    search_space = MagicMock()
    search_space.id = 1
-    search_space.image_generation_config_id = cfg["id"]
+    search_space.image_gen_model_id = global_model["id"]

    session_cm = AsyncMock()
    session = AsyncMock()
@ -121,7 +134,8 @@ async def test_generate_image_tool_global_sets_explicit_api_base():

    with (
        patch.object(gi_module, "shielded_async_session", return_value=session_cm),
-        patch.object(gi_module, "_get_global_image_gen_config", return_value=cfg),
+        patch.object(gi_module, "_get_global_model", return_value=global_model),
+        patch.object(gi_module, "_get_global_connection", return_value=global_connection),
        patch.object(
            gi_module, "aimage_generation", side_effect=fake_aimage_generation
        ),
--- a/surfsense_backend/tests/unit/services/test_openrouter_integration_service.py
+++ b/surfsense_backend/tests/unit/services/test_openrouter_integration_service.py
@ -217,7 +217,7 @@ def test_generate_configs_drops_non_text_and_non_tool_models():


 # ---------------------------------------------------------------------------
-# _generate_image_gen_configs / _generate_vision_llm_configs
+# _generate_image_gen_configs
 # ---------------------------------------------------------------------------


@ -263,7 +263,7 @@ def test_generate_image_gen_configs_filters_by_image_output():
    # Each config must carry ``billing_tier`` for routing in image_generation_routes.
    for c in cfgs:
        assert c["billing_tier"] in {"free", "premium"}
-        assert c["litellm_provider"] == "openrouter"
+        assert c["provider"] == "openrouter"
        assert c[_OPENROUTER_DYNAMIC_MARKER] is True
        # Emit the OpenRouter base URL at source so every call path passes an
        # explicit api_base and cannot inherit a process-global endpoint.
@ -271,9 +271,7 @@ def test_generate_image_gen_configs_filters_by_image_output():


 def test_generate_image_gen_configs_assigns_image_id_offset():
-    """Image configs use a different id_offset (-20000) so their negative
-    IDs don't collide with chat configs (-10000) or vision configs (-30000).
-    """
+    """Image configs use their own id_offset (-20000)."""
    from app.services.openrouter_integration_service import (
        _generate_image_gen_configs,
    )
@ -291,88 +289,3 @@ def test_generate_image_gen_configs_assigns_image_id_offset():
    assert all(c["id"] < -20_000 + 1 for c in cfgs)
    assert all(c["id"] > -29_000_000 for c in cfgs)

-
-def test_generate_vision_llm_configs_filters_by_image_input_text_output():
-    """Vision LLMs must accept image input AND emit text — pure image-gen
-    (no text out) and text-only (no image in) models are excluded.
-    """
-    from app.services.openrouter_integration_service import (
-        _generate_vision_llm_configs,
-    )
-
-    raw = [
-        # GPT-4o: vision LLM (image in, text out) — must emit.
-        {
-            "id": "openai/gpt-4o",
-            "architecture": {
-                "input_modalities": ["text", "image"],
-                "output_modalities": ["text"],
-            },
-            "context_length": 128_000,
-            "pricing": {"prompt": "0.000005", "completion": "0.000015"},
-        },
-        # Pure image generator — image *output*, no text out. Must NOT emit.
-        {
-            "id": "openai/gpt-image-1",
-            "architecture": {
-                "input_modalities": ["text"],
-                "output_modalities": ["image"],
-            },
-            "context_length": 4_000,
-            "pricing": {"prompt": "0", "completion": "0"},
-        },
-        # Pure text model (no image in). Must NOT emit.
-        {
-            "id": "anthropic/claude-3-haiku",
-            "architecture": {
-                "input_modalities": ["text"],
-                "output_modalities": ["text"],
-            },
-            "context_length": 200_000,
-            "pricing": {"prompt": "0.000001", "completion": "0.000005"},
-        },
-    ]
-
-    cfgs = _generate_vision_llm_configs(raw, dict(_SETTINGS_BASE))
-    names = {c["model_name"] for c in cfgs}
-    assert names == {"openai/gpt-4o"}
-
-    cfg = cfgs[0]
-    assert cfg["billing_tier"] == "premium"
-    # Pricing carried inline so pricing_registration can register vision
-    # under ``openrouter/openai/gpt-4o`` even if the chat catalogue cache
-    # is cleared.
-    assert cfg["input_cost_per_token"] == pytest.approx(5e-6)
-    assert cfg["output_cost_per_token"] == pytest.approx(15e-6)
-    assert cfg[_OPENROUTER_DYNAMIC_MARKER] is True
-    # Emit the OpenRouter base URL at source so every call path passes an
-    # explicit api_base and cannot inherit a process-global endpoint.
-    assert cfg["api_base"] == "https://openrouter.ai/api/v1"
-
-
-def test_generate_vision_llm_configs_drops_chat_only_filters():
-    """A small-context vision model that doesn't advertise tool calling is
-    still a valid vision LLM for "describe this image" prompts. The chat
-    filters (``supports_tool_calling``, ``has_sufficient_context``) must
-    NOT be applied to vision emission.
-    """
-    from app.services.openrouter_integration_service import (
-        _generate_vision_llm_configs,
-    )
-
-    raw = [
-        {
-            "id": "tiny/vision-mini",
-            "architecture": {
-                "input_modalities": ["text", "image"],
-                "output_modalities": ["text"],
-            },
-            "supported_parameters": [],  # no tools
-            "context_length": 4_000,  # well below MIN_CONTEXT_LENGTH
-            "pricing": {"prompt": "0.0000001", "completion": "0.0000005"},
-        }
-    ]
-
-    cfgs = _generate_vision_llm_configs(raw, dict(_SETTINGS_BASE))
-    assert len(cfgs) == 1
-    assert cfgs[0]["model_name"] == "tiny/vision-mini"
--- a/surfsense_backend/tests/unit/services/test_pricing_registration.py
+++ b/surfsense_backend/tests/unit/services/test_pricing_registration.py
@ -370,77 +370,3 @@ def test_register_continues_after_individual_failure(monkeypatch, caplog):
    assert any("custom-deployment" in payload for payload in successful_calls)


-def test_vision_configs_registered_with_chat_shape(monkeypatch):
-    """``register_pricing_from_global_configs`` walks
-    ``GLOBAL_VISION_LLM_CONFIGS`` in addition to the chat configs so vision
-    calls (during indexing) bill correctly. Vision configs use the same
-    chat-shape token prices, but image-gen pricing is intentionally NOT
-    registered here (handled via ``response_cost`` in LiteLLM).
-    """
-    from app.config import config
-    from app.services.pricing_registration import register_pricing_from_global_configs
-
-    spy = _patch_register(monkeypatch)
-    _patch_openrouter_pricing(
-        monkeypatch,
-        {"openai/gpt-4o": {"prompt": "0.000005", "completion": "0.000015"}},
-    )
-
-    # No chat configs — only vision. Proves the vision walk is a separate
-    # iteration, not piggy-backed on the chat list.
-    monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [])
-    monkeypatch.setattr(
-        config,
-        "GLOBAL_VISION_LLM_CONFIGS",
-        [
-            {
-                "id": -1,
-                "litellm_provider": "openrouter",
-                "model_name": "openai/gpt-4o",
-                "billing_tier": "premium",
-                "input_cost_per_token": 5e-6,
-                "output_cost_per_token": 15e-6,
-            }
-        ],
-    )
-
-    register_pricing_from_global_configs()
-
-    assert "openrouter/openai/gpt-4o" in spy.all_keys
-    payload_value = spy.calls[0]["openrouter/openai/gpt-4o"]
-    assert payload_value["mode"] == "chat"
-    assert payload_value["litellm_provider"] == "openrouter"
-    assert payload_value["input_cost_per_token"] == pytest.approx(5e-6)
-    assert payload_value["output_cost_per_token"] == pytest.approx(15e-6)
-
-
-def test_vision_with_inline_pricing_when_or_cache_missing(monkeypatch):
-    """If the OpenRouter pricing cache misses a vision model (different
-    catalogue surface), the vision walk falls back to inline
-    ``input_cost_per_token``/``output_cost_per_token`` on the cfg itself.
-    """
-    from app.config import config
-    from app.services.pricing_registration import register_pricing_from_global_configs
-
-    spy = _patch_register(monkeypatch)
-    _patch_openrouter_pricing(monkeypatch, {})
-
-    monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [])
-    monkeypatch.setattr(
-        config,
-        "GLOBAL_VISION_LLM_CONFIGS",
-        [
-            {
-                "id": -1,
-                "litellm_provider": "openrouter",
-                "model_name": "google/gemini-2.5-flash",
-                "billing_tier": "premium",
-                "input_cost_per_token": 1e-6,
-                "output_cost_per_token": 4e-6,
-            }
-        ],
-    )
-
-    register_pricing_from_global_configs()
-
-    assert "openrouter/google/gemini-2.5-flash" in spy.all_keys
--- a/surfsense_backend/tests/unit/services/test_quality_score.py
+++ b/surfsense_backend/tests/unit/services/test_quality_score.py
@ -1,4 +1,4 @@
-"""Unit tests for the Auto (Fastest) quality scoring module."""
+"""Unit tests for the Auto quality scoring module."""

 from __future__ import annotations

--- a/surfsense_backend/tests/unit/services/test_vision_llm_api_base_defense.py
+++ b/surfsense_backend/tests/unit/services/test_vision_llm_api_base_defense.py
@ -1,77 +0,0 @@
-"""Vision LLM resolution must pass explicit per-config ``api_base``."""
-
-from __future__ import annotations
-
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-pytestmark = pytest.mark.unit
-
-
-@pytest.mark.asyncio
-async def test_get_vision_llm_global_openrouter_sets_api_base():
-    """Global negative-ID branch forwards the explicit OpenRouter base."""
-    from app.services import llm_service
-
-    cfg = {
-        "id": -30_001,
-        "name": "GPT-4o Vision (OpenRouter)",
-        "litellm_provider": "openrouter",
-        "model_name": "openai/gpt-4o",
-        "api_key": "sk-or-test",
-        "api_base": "https://openrouter.ai/api/v1",
-        "api_version": None,
-        "litellm_params": {},
-        "billing_tier": "free",
-    }
-
-    search_space = MagicMock()
-    search_space.id = 1
-    search_space.user_id = "user-x"
-    search_space.vision_llm_config_id = cfg["id"]
-
-    session = AsyncMock()
-    scalars = MagicMock()
-    scalars.first.return_value = search_space
-    result = MagicMock()
-    result.scalars.return_value = scalars
-    session.execute.return_value = result
-
-    captured: dict = {}
-
-    class FakeSanitized:
-        def __init__(self, **kwargs):
-            captured.update(kwargs)
-
-    with (
-        patch(
-            "app.services.vision_llm_router_service.get_global_vision_llm_config",
-            return_value=cfg,
-        ),
-        patch(
-            "app.agents.chat.runtime.llm_config.SanitizedChatLiteLLM",
-            new=FakeSanitized,
-        ),
-    ):
-        await llm_service.get_vision_llm(session=session, search_space_id=1)
-
-    assert captured.get("api_base") == "https://openrouter.ai/api/v1"
-    assert captured["model"] == "openrouter/openai/gpt-4o"
-
-
-def test_vision_router_deployment_sets_api_base_when_config_empty():
-    """Auto-mode vision router carries explicit api_base into deployments."""
-    from app.services.vision_llm_router_service import VisionLLMRouterService
-
-    deployment = VisionLLMRouterService._config_to_deployment(
-        {
-            "model_name": "openai/gpt-4o",
-            "litellm_provider": "openrouter",
-            "api_key": "sk-or-test",
-            "api_base": "https://openrouter.ai/api/v1",
-        }
-    )
-    assert deployment is not None
-    assert deployment["litellm_params"]["api_base"] == "https://openrouter.ai/api/v1"
-    assert deployment["litellm_params"]["model"] == "openrouter/openai/gpt-4o"