feat: fixed vision/image provider specific errors and fixed podcast/video streaming

2026-05-04 05:12:38 +02:00 · 2026-05-02 19:18:53 -07:00 · 2026-05-02 19:18:53 -07:00 · 47b2994ec7
commit 47b2994ec7
parent ae9d36d77f
54 changed files with 4469 additions and 563 deletions
--- a/surfsense_backend/tests/unit/services/test_auto_pin_image_aware.py
+++ b/surfsense_backend/tests/unit/services/test_auto_pin_image_aware.py
@ -0,0 +1,286 @@
+"""Image-aware extension of the Auto-pin resolver.
+
+When the current chat turn carries an ``image_url`` block, the pin
+resolver must:
+
+1. Filter the candidate pool to vision-capable cfgs so a freshly
+   selected pin can never be text-only.
+2. Treat any existing pin whose capability is False as invalid (force
+   re-pin), even when it would otherwise be reused as the thread's
+   stable model.
+3. Raise ``ValueError`` (mapped to the friendly
+   ``MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT`` SSE error in the streaming
+   task) when no vision-capable cfg is available — instead of silently
+   pinning text-only and 404-ing at the provider.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from types import SimpleNamespace
+
+import pytest
+
+from app.services.auto_model_pin_service import (
+    clear_healthy,
+    clear_runtime_cooldown,
+    resolve_or_get_pinned_llm_config_id,
+)
+
+pytestmark = pytest.mark.unit
+
+
+@pytest.fixture(autouse=True)
+def _reset_caches():
+    clear_runtime_cooldown()
+    clear_healthy()
+    yield
+    clear_runtime_cooldown()
+    clear_healthy()
+
+
+@dataclass
+class _FakeQuotaResult:
+    allowed: bool
+
+
+class _FakeExecResult:
+    def __init__(self, thread):
+        self._thread = thread
+
+    def unique(self):
+        return self
+
+    def scalar_one_or_none(self):
+        return self._thread
+
+
+class _FakeSession:
+    def __init__(self, thread):
+        self.thread = thread
+        self.commit_count = 0
+
+    async def execute(self, _stmt):
+        return _FakeExecResult(self.thread)
+
+    async def commit(self):
+        self.commit_count += 1
+
+
+def _thread(*, pinned: int | None = None):
+    return SimpleNamespace(id=1, search_space_id=10, pinned_llm_config_id=pinned)
+
+
+def _vision_cfg(id_: int, *, tier: str = "free", quality: int = 80) -> dict:
+    return {
+        "id": id_,
+        "provider": "OPENAI",
+        "model_name": f"vision-{id_}",
+        "api_key": "k",
+        "billing_tier": tier,
+        "supports_image_input": True,
+        "auto_pin_tier": "A",
+        "quality_score": quality,
+    }
+
+
+def _text_only_cfg(id_: int, *, tier: str = "free", quality: int = 90) -> dict:
+    return {
+        "id": id_,
+        "provider": "OPENAI",
+        "model_name": f"text-{id_}",
+        "api_key": "k",
+        "billing_tier": tier,
+        # Higher quality than the vision cfgs — so a bug that ignores
+        # the image flag would surface as the resolver picking this one.
+        "supports_image_input": False,
+        "auto_pin_tier": "A",
+        "quality_score": quality,
+    }
+
+
+async def _premium_allowed(*_args, **_kwargs):
+    return _FakeQuotaResult(allowed=True)
+
+
+@pytest.mark.asyncio
+async def test_image_turn_filters_out_text_only_candidates(monkeypatch):
+    from app.config import config
+
+    session = _FakeSession(_thread())
+    monkeypatch.setattr(
+        config,
+        "GLOBAL_LLM_CONFIGS",
+        [_text_only_cfg(-1), _vision_cfg(-2)],
+    )
+    monkeypatch.setattr(
+        "app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
+        _premium_allowed,
+    )
+
+    result = await resolve_or_get_pinned_llm_config_id(
+        session,
+        thread_id=1,
+        search_space_id=10,
+        user_id=None,
+        selected_llm_config_id=0,
+        requires_image_input=True,
+    )
+
+    assert result.resolved_llm_config_id == -2
+    # The thread should be pinned to the vision cfg even though the
+    # text-only cfg has a higher quality score.
+    assert session.thread.pinned_llm_config_id == -2
+
+
+@pytest.mark.asyncio
+async def test_image_turn_force_repins_stale_text_only_pin(monkeypatch):
+    """An existing text-only pin must be invalidated when the next turn
+    requires image input. The non-image path would happily reuse it."""
+    from app.config import config
+
+    session = _FakeSession(_thread(pinned=-1))
+    monkeypatch.setattr(
+        config,
+        "GLOBAL_LLM_CONFIGS",
+        [_text_only_cfg(-1), _vision_cfg(-2)],
+    )
+    monkeypatch.setattr(
+        "app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
+        _premium_allowed,
+    )
+
+    result = await resolve_or_get_pinned_llm_config_id(
+        session,
+        thread_id=1,
+        search_space_id=10,
+        user_id=None,
+        selected_llm_config_id=0,
+        requires_image_input=True,
+    )
+
+    assert result.resolved_llm_config_id == -2
+    assert result.from_existing_pin is False
+    assert session.thread.pinned_llm_config_id == -2
+
+
+@pytest.mark.asyncio
+async def test_image_turn_reuses_existing_vision_pin(monkeypatch):
+    """If the thread is already pinned to a vision-capable cfg, reuse it
+    — same as the non-image path. Image-aware filtering must not force
+    spurious re-pins."""
+    from app.config import config
+
+    session = _FakeSession(_thread(pinned=-2))
+    monkeypatch.setattr(
+        config,
+        "GLOBAL_LLM_CONFIGS",
+        [_text_only_cfg(-1), _vision_cfg(-2), _vision_cfg(-3, quality=70)],
+    )
+    monkeypatch.setattr(
+        "app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
+        _premium_allowed,
+    )
+
+    result = await resolve_or_get_pinned_llm_config_id(
+        session,
+        thread_id=1,
+        search_space_id=10,
+        user_id=None,
+        selected_llm_config_id=0,
+        requires_image_input=True,
+    )
+
+    assert result.resolved_llm_config_id == -2
+    assert result.from_existing_pin is True
+
+
+@pytest.mark.asyncio
+async def test_image_turn_with_no_vision_candidates_raises(monkeypatch):
+    """The friendly-error path: no vision-capable cfg in the pool -> raise
+    ``ValueError`` whose message contains ``vision-capable`` so the
+    streaming task can map it to ``MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT``."""
+    from app.config import config
+
+    session = _FakeSession(_thread())
+    monkeypatch.setattr(
+        config,
+        "GLOBAL_LLM_CONFIGS",
+        [_text_only_cfg(-1), _text_only_cfg(-2)],
+    )
+    monkeypatch.setattr(
+        "app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
+        _premium_allowed,
+    )
+
+    with pytest.raises(ValueError, match="vision-capable"):
+        await resolve_or_get_pinned_llm_config_id(
+            session,
+            thread_id=1,
+            search_space_id=10,
+            user_id=None,
+            selected_llm_config_id=0,
+            requires_image_input=True,
+        )
+
+
+@pytest.mark.asyncio
+async def test_non_image_turn_keeps_text_only_in_pool(monkeypatch):
+    """Regression guard: the image flag must default False and not affect
+    a normal text-only turn — text-only cfgs remain selectable."""
+    from app.config import config
+
+    session = _FakeSession(_thread())
+    monkeypatch.setattr(
+        config,
+        "GLOBAL_LLM_CONFIGS",
+        [_text_only_cfg(-1)],
+    )
+    monkeypatch.setattr(
+        "app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
+        _premium_allowed,
+    )
+
+    result = await resolve_or_get_pinned_llm_config_id(
+        session,
+        thread_id=1,
+        search_space_id=10,
+        user_id=None,
+        selected_llm_config_id=0,
+    )
+    assert result.resolved_llm_config_id == -1
+
+
+@pytest.mark.asyncio
+async def test_image_turn_unannotated_cfg_resolves_via_helper(monkeypatch):
+    """A YAML cfg that omits ``supports_image_input`` falls through to
+    ``derive_supports_image_input`` (LiteLLM-driven). For ``gpt-4o``
+    that returns True, so the cfg should be a valid candidate."""
+    from app.config import config
+
+    session = _FakeSession(_thread())
+    cfg_unannotated_vision = {
+        "id": -2,
+        "provider": "OPENAI",
+        "model_name": "gpt-4o",  # known vision model in LiteLLM map
+        "api_key": "k",
+        "billing_tier": "free",
+        "auto_pin_tier": "A",
+        "quality_score": 80,
+        # NOTE: no supports_image_input key
+    }
+    monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [cfg_unannotated_vision])
+    monkeypatch.setattr(
+        "app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
+        _premium_allowed,
+    )
+
+    result = await resolve_or_get_pinned_llm_config_id(
+        session,
+        thread_id=1,
+        search_space_id=10,
+        user_id=None,
+        selected_llm_config_id=0,
+        requires_image_input=True,
+    )
+    assert result.resolved_llm_config_id == -2
--- a/surfsense_backend/tests/unit/services/test_billable_call.py
+++ b/surfsense_backend/tests/unit/services/test_billable_call.py
@ -15,6 +15,7 @@ vision LLM extraction:

 from __future__ import annotations

+import asyncio
 import contextlib
 from typing import Any
 from uuid import uuid4
@ -57,6 +58,9 @@ class _FakeSession:
    async def commit(self) -> None:
        self.committed = True

+    async def rollback(self) -> None:
+        pass
+
    async def close(self) -> None:
        pass

@ -71,7 +75,9 @@ async def _fake_shielded_session():
 _SESSIONS_USED: list[_FakeSession] = []


-def _patch_isolation_layer(monkeypatch, *, reserve_result, finalize_result=None):
+def _patch_isolation_layer(
+    monkeypatch, *, reserve_result, finalize_result=None, finalize_exc=None
+):
    """Wire fake reserve/finalize/release/session helpers."""
    _SESSIONS_USED.clear()
    reserve_calls: list[dict[str, Any]] = []
@ -91,6 +97,8 @@ def _patch_isolation_layer(monkeypatch, *, reserve_result, finalize_result=None)
    async def _fake_finalize(
        *, db_session, user_id, request_id, actual_micros, reserved_micros
    ):
+        if finalize_exc is not None:
+            raise finalize_exc
        finalize_calls.append(
            {
                "user_id": user_id,
@ -343,6 +351,125 @@ async def test_premium_uses_estimator_when_no_micros_override(monkeypatch):
    assert spies["reserve"][0]["reserve_micros"] == 12_345


+@pytest.mark.asyncio
+async def test_premium_finalize_failure_propagates_and_releases(monkeypatch):
+    from app.services.billable_calls import BillingSettlementError, billable_call
+
+    class _FinalizeError(RuntimeError):
+        pass
+
+    spies = _patch_isolation_layer(
+        monkeypatch,
+        reserve_result=_FakeQuotaResult(allowed=True),
+        finalize_exc=_FinalizeError("db finalize failed"),
+    )
+    user_id = uuid4()
+
+    with pytest.raises(BillingSettlementError):
+        async with billable_call(
+            user_id=user_id,
+            search_space_id=42,
+            billing_tier="premium",
+            base_model="openai/gpt-image-1",
+            quota_reserve_micros_override=50_000,
+            usage_type="image_generation",
+        ) as acc:
+            acc.add(
+                model="openai/gpt-image-1",
+                prompt_tokens=0,
+                completion_tokens=0,
+                total_tokens=0,
+                cost_micros=40_000,
+                call_kind="image_generation",
+            )
+
+    assert len(spies["reserve"]) == 1
+    assert len(spies["release"]) == 1
+    assert spies["record"] == []
+
+
+@pytest.mark.asyncio
+async def test_premium_audit_commit_hang_times_out_after_finalize(monkeypatch):
+    from app.services.billable_calls import billable_call
+
+    spies = _patch_isolation_layer(
+        monkeypatch, reserve_result=_FakeQuotaResult(allowed=True)
+    )
+    user_id = uuid4()
+
+    class _HangingCommitSession(_FakeSession):
+        async def commit(self) -> None:
+            await asyncio.sleep(60)
+
+    @contextlib.asynccontextmanager
+    async def _hanging_session_factory():
+        s = _HangingCommitSession()
+        _SESSIONS_USED.append(s)
+        yield s
+
+    async with billable_call(
+        user_id=user_id,
+        search_space_id=42,
+        billing_tier="premium",
+        base_model="openai/gpt-image-1",
+        quota_reserve_micros_override=50_000,
+        usage_type="image_generation",
+        billable_session_factory=_hanging_session_factory,
+        audit_timeout_seconds=0.01,
+    ) as acc:
+        acc.add(
+            model="openai/gpt-image-1",
+            prompt_tokens=0,
+            completion_tokens=0,
+            total_tokens=0,
+            cost_micros=40_000,
+            call_kind="image_generation",
+        )
+
+    assert len(spies["reserve"]) == 1
+    assert len(spies["finalize"]) == 1
+    assert len(spies["record"]) == 1
+    assert spies["release"] == []
+
+
+@pytest.mark.asyncio
+async def test_free_audit_failure_is_best_effort(monkeypatch):
+    from app.services.billable_calls import billable_call
+
+    spies = _patch_isolation_layer(
+        monkeypatch, reserve_result=_FakeQuotaResult(allowed=True)
+    )
+
+    async def _failing_record(_session, **_kwargs):
+        raise RuntimeError("audit insert failed")
+
+    monkeypatch.setattr(
+        "app.services.billable_calls.record_token_usage",
+        _failing_record,
+        raising=False,
+    )
+
+    async with billable_call(
+        user_id=uuid4(),
+        search_space_id=42,
+        billing_tier="free",
+        base_model="openai/gpt-image-1",
+        usage_type="image_generation",
+        audit_timeout_seconds=0.01,
+    ) as acc:
+        acc.add(
+            model="openai/gpt-image-1",
+            prompt_tokens=0,
+            completion_tokens=0,
+            total_tokens=0,
+            cost_micros=37_000,
+            call_kind="image_generation",
+        )
+
+    assert spies["reserve"] == []
+    assert spies["finalize"] == []
+
+
 # ---------------------------------------------------------------------------
 # Podcast / video-presentation usage_type coverage
 # ---------------------------------------------------------------------------
@ -387,7 +514,7 @@ async def test_free_podcast_path_audits_with_podcast_usage_type(monkeypatch):
    assert len(spies["record"]) == 1
    row = spies["record"][0]
    assert row["usage_type"] == "podcast_generation"
-    assert row["thread_id"] == 99
+    assert row["thread_id"] is None
    assert row["search_space_id"] == 42
    assert row["call_details"] == {"podcast_id": 7, "title": "Test Podcast"}

--- a/surfsense_backend/tests/unit/services/test_image_gen_api_base_defense.py
+++ b/surfsense_backend/tests/unit/services/test_image_gen_api_base_defense.py
@ -0,0 +1,177 @@
+"""Defense-in-depth: image-gen call sites must not let an empty
+``api_base`` fall through to LiteLLM's module-global ``litellm.api_base``.
+
+The bug repro: an OpenRouter image-gen config ships
+``api_base=""``. The pre-fix call site in
+``image_generation_routes._execute_image_generation`` did
+``if cfg.get("api_base"): kwargs["api_base"] = cfg["api_base"]`` which
+silently dropped the empty string. LiteLLM then fell back to
+``litellm.api_base`` (commonly inherited from ``AZURE_OPENAI_ENDPOINT``)
+and OpenRouter's ``image_generation/transformation`` appended
+``/chat/completions`` to it → 404 ``Resource not found``.
+
+This test pins the post-fix behaviour: with an empty ``api_base`` in
+the config, the call site MUST set ``api_base`` to OpenRouter's public
+URL instead of leaving it unset.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+pytestmark = pytest.mark.unit
+
+
+@pytest.mark.asyncio
+async def test_global_openrouter_image_gen_sets_api_base_when_config_empty():
+    """The global-config branch (``config_id < 0``) of
+    ``_execute_image_generation`` must apply the resolver and pin
+    ``api_base`` to OpenRouter when the config ships an empty string.
+    """
+    from app.routes import image_generation_routes
+
+    cfg = {
+        "id": -20_001,
+        "name": "GPT Image 1 (OpenRouter)",
+        "provider": "OPENROUTER",
+        "model_name": "openai/gpt-image-1",
+        "api_key": "sk-or-test",
+        "api_base": "",  # the original bug shape
+        "api_version": None,
+        "litellm_params": {},
+    }
+
+    captured: dict = {}
+
+    async def fake_aimage_generation(**kwargs):
+        captured.update(kwargs)
+        return MagicMock(model_dump=lambda: {"data": []}, _hidden_params={})
+
+    image_gen = MagicMock()
+    image_gen.image_generation_config_id = cfg["id"]
+    image_gen.prompt = "test"
+    image_gen.n = 1
+    image_gen.quality = None
+    image_gen.size = None
+    image_gen.style = None
+    image_gen.response_format = None
+    image_gen.model = None
+
+    search_space = MagicMock()
+    search_space.image_generation_config_id = cfg["id"]
+    session = MagicMock()
+
+    with (
+        patch.object(
+            image_generation_routes,
+            "_get_global_image_gen_config",
+            return_value=cfg,
+        ),
+        patch.object(
+            image_generation_routes,
+            "aimage_generation",
+            side_effect=fake_aimage_generation,
+        ),
+    ):
+        await image_generation_routes._execute_image_generation(
+            session=session, image_gen=image_gen, search_space=search_space
+        )
+
+    # The whole point of the fix: even with empty ``api_base`` in the
+    # config, we forward OpenRouter's public URL so the call doesn't
+    # inherit an Azure endpoint.
+    assert captured.get("api_base") == "https://openrouter.ai/api/v1"
+    assert captured["model"] == "openrouter/openai/gpt-image-1"
+
+
+@pytest.mark.asyncio
+async def test_generate_image_tool_global_sets_api_base_when_config_empty():
+    """Same defense at the agent tool entry point — both surfaces share
+    the same OpenRouter config payloads."""
+    from app.agents.new_chat.tools import generate_image as gi_module
+
+    cfg = {
+        "id": -20_001,
+        "name": "GPT Image 1 (OpenRouter)",
+        "provider": "OPENROUTER",
+        "model_name": "openai/gpt-image-1",
+        "api_key": "sk-or-test",
+        "api_base": "",
+        "api_version": None,
+        "litellm_params": {},
+    }
+
+    captured: dict = {}
+
+    async def fake_aimage_generation(**kwargs):
+        captured.update(kwargs)
+        response = MagicMock()
+        response.model_dump.return_value = {
+            "data": [{"url": "https://example.com/x.png"}]
+        }
+        response._hidden_params = {"model": "openrouter/openai/gpt-image-1"}
+        return response
+
+    search_space = MagicMock()
+    search_space.id = 1
+    search_space.image_generation_config_id = cfg["id"]
+
+    session_cm = AsyncMock()
+    session = AsyncMock()
+    session_cm.__aenter__.return_value = session
+
+    scalars = MagicMock()
+    scalars.first.return_value = search_space
+    exec_result = MagicMock()
+    exec_result.scalars.return_value = scalars
+    session.execute.return_value = exec_result
+    session.add = MagicMock()
+    session.commit = AsyncMock()
+    session.refresh = AsyncMock()
+
+    # ``refresh(db_image_gen)`` needs to populate ``id`` for token URL fallback.
+    async def _refresh(obj):
+        obj.id = 1
+
+    session.refresh.side_effect = _refresh
+
+    with (
+        patch.object(gi_module, "shielded_async_session", return_value=session_cm),
+        patch.object(gi_module, "_get_global_image_gen_config", return_value=cfg),
+        patch.object(
+            gi_module, "aimage_generation", side_effect=fake_aimage_generation
+        ),
+        patch.object(
+            gi_module, "is_image_gen_auto_mode", side_effect=lambda cid: cid == 0
+        ),
+    ):
+        tool = gi_module.create_generate_image_tool(
+            search_space_id=1, db_session=MagicMock()
+        )
+        await tool.ainvoke({"prompt": "a cat", "n": 1})
+
+    assert captured.get("api_base") == "https://openrouter.ai/api/v1"
+    assert captured["model"] == "openrouter/openai/gpt-image-1"
+
+
+def test_image_gen_router_deployment_sets_api_base_when_config_empty():
+    """The Auto-mode router pool must also resolve ``api_base`` when an
+    OpenRouter config ships an empty string. The deployment dict is fed
+    straight to ``litellm.Router``, so a missing ``api_base`` would
+    leak the same way as the direct call sites.
+    """
+    from app.services.image_gen_router_service import ImageGenRouterService
+
+    deployment = ImageGenRouterService._config_to_deployment(
+        {
+            "model_name": "openai/gpt-image-1",
+            "provider": "OPENROUTER",
+            "api_key": "sk-or-test",
+            "api_base": "",
+        }
+    )
+    assert deployment is not None
+    assert deployment["litellm_params"]["api_base"] == "https://openrouter.ai/api/v1"
+    assert deployment["litellm_params"]["model"] == "openrouter/openai/gpt-image-1"
--- a/surfsense_backend/tests/unit/services/test_openrouter_integration_service.py
+++ b/surfsense_backend/tests/unit/services/test_openrouter_integration_service.py
@ -265,6 +265,10 @@ def test_generate_image_gen_configs_filters_by_image_output():
        assert c["billing_tier"] in {"free", "premium"}
        assert c["provider"] == "OPENROUTER"
        assert c[_OPENROUTER_DYNAMIC_MARKER] is True
+        # Defense-in-depth: emit the OpenRouter base URL at source so a
+        # downstream call site that forgets ``resolve_api_base`` still
+        # doesn't 404 against an inherited Azure endpoint.
+        assert c["api_base"] == "https://openrouter.ai/api/v1"


 def test_generate_image_gen_configs_assigns_image_id_offset():
@ -342,6 +346,10 @@ def test_generate_vision_llm_configs_filters_by_image_input_text_output():
    assert cfg["input_cost_per_token"] == pytest.approx(5e-6)
    assert cfg["output_cost_per_token"] == pytest.approx(15e-6)
    assert cfg[_OPENROUTER_DYNAMIC_MARKER] is True
+    # Defense-in-depth: emit the OpenRouter base URL at source so a
+    # downstream call site that forgets ``resolve_api_base`` still
+    # doesn't inherit an Azure endpoint.
+    assert cfg["api_base"] == "https://openrouter.ai/api/v1"


 def test_generate_vision_llm_configs_drops_chat_only_filters():
--- a/surfsense_backend/tests/unit/services/test_provider_api_base.py
+++ b/surfsense_backend/tests/unit/services/test_provider_api_base.py
@ -0,0 +1,107 @@
+"""Unit tests for the shared ``api_base`` resolver.
+
+The cascade exists so vision and image-gen call sites can't silently
+inherit ``litellm.api_base`` (commonly set by ``AZURE_OPENAI_ENDPOINT``)
+when an OpenRouter / Groq / etc. config ships an empty string. See
+``provider_api_base`` module docstring for the original repro
+(OpenRouter image-gen 404-ing against an Azure endpoint).
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from app.services.provider_api_base import (
+    PROVIDER_DEFAULT_API_BASE,
+    PROVIDER_KEY_DEFAULT_API_BASE,
+    resolve_api_base,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def test_config_value_wins_over_defaults():
+    """A non-empty config value is always returned verbatim, even when the
+    provider has a default — the operator gets the last word."""
+    result = resolve_api_base(
+        provider="OPENROUTER",
+        provider_prefix="openrouter",
+        config_api_base="https://my-openrouter-mirror.example.com/v1",
+    )
+    assert result == "https://my-openrouter-mirror.example.com/v1"
+
+
+def test_provider_key_default_when_config_missing():
+    """``DEEPSEEK`` shares the ``openai`` LiteLLM prefix but has its own
+    base URL — the provider-key map must take precedence over the prefix
+    map so DeepSeek requests don't go to OpenAI."""
+    result = resolve_api_base(
+        provider="DEEPSEEK",
+        provider_prefix="openai",
+        config_api_base=None,
+    )
+    assert result == PROVIDER_KEY_DEFAULT_API_BASE["DEEPSEEK"]
+
+
+def test_provider_prefix_default_when_no_key_default():
+    result = resolve_api_base(
+        provider="OPENROUTER",
+        provider_prefix="openrouter",
+        config_api_base=None,
+    )
+    assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
+
+
+def test_unknown_provider_returns_none():
+    """When neither map matches we return ``None`` so the caller can let
+    LiteLLM apply its own provider-integration default (Azure deployment
+    URL, custom-provider URL, etc.)."""
+    result = resolve_api_base(
+        provider="SOMETHING_NEW",
+        provider_prefix="something_new",
+        config_api_base=None,
+    )
+    assert result is None
+
+
+def test_empty_string_config_treated_as_missing():
+    """The original bug: OpenRouter dynamic configs ship ``api_base=""``
+    and downstream call sites use ``if cfg.get("api_base"):`` — empty
+    strings are falsy in Python but the cascade has to step in anyway."""
+    result = resolve_api_base(
+        provider="OPENROUTER",
+        provider_prefix="openrouter",
+        config_api_base="",
+    )
+    assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
+
+
+def test_whitespace_only_config_treated_as_missing():
+    """A config value of ``"   "`` is a configuration mistake — treat it
+    as missing instead of forwarding whitespace to LiteLLM (which would
+    almost certainly 404)."""
+    result = resolve_api_base(
+        provider="OPENROUTER",
+        provider_prefix="openrouter",
+        config_api_base="   ",
+    )
+    assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
+
+
+def test_provider_case_insensitive():
+    """Some call sites pass the provider lowercase (DB enum value), others
+    uppercase (YAML key). Both must resolve."""
+    upper = resolve_api_base(
+        provider="DEEPSEEK", provider_prefix="openai", config_api_base=None
+    )
+    lower = resolve_api_base(
+        provider="deepseek", provider_prefix="openai", config_api_base=None
+    )
+    assert upper == lower == PROVIDER_KEY_DEFAULT_API_BASE["DEEPSEEK"]
+
+
+def test_all_inputs_none_returns_none():
+    assert (
+        resolve_api_base(provider=None, provider_prefix=None, config_api_base=None)
+        is None
+    )
--- a/surfsense_backend/tests/unit/services/test_provider_capabilities.py
+++ b/surfsense_backend/tests/unit/services/test_provider_capabilities.py
@ -0,0 +1,244 @@
+"""Unit tests for the shared chat-image capability resolver.
+
+Two resolvers, two intents:
+
+- ``derive_supports_image_input`` — best-effort True for the catalog and
+  selector. Default-allow on unknown / unmapped models. The streaming
+  task safety net never sees this value directly.
+
+- ``is_known_text_only_chat_model`` — strict opt-out for the safety net.
+  Returns True only when LiteLLM's model map *explicitly* sets
+  ``supports_vision=False``. Anything else (missing key, exception,
+  True) returns False so the request flows through to the provider.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from app.services.provider_capabilities import (
+    derive_supports_image_input,
+    is_known_text_only_chat_model,
+)
+
+pytestmark = pytest.mark.unit
+
+
+# ---------------------------------------------------------------------------
+# derive_supports_image_input — OpenRouter modalities path (authoritative)
+# ---------------------------------------------------------------------------
+
+
+def test_or_modalities_with_image_returns_true():
+    assert (
+        derive_supports_image_input(
+            provider="OPENROUTER",
+            model_name="openai/gpt-4o",
+            openrouter_input_modalities=["text", "image"],
+        )
+        is True
+    )
+
+
+def test_or_modalities_text_only_returns_false():
+    assert (
+        derive_supports_image_input(
+            provider="OPENROUTER",
+            model_name="deepseek/deepseek-v3.2-exp",
+            openrouter_input_modalities=["text"],
+        )
+        is False
+    )
+
+
+def test_or_modalities_empty_list_returns_false():
+    """OR explicitly publishing an empty modality list is a definitive
+    'no inputs at all' signal — treat as False rather than falling back
+    to LiteLLM."""
+    assert (
+        derive_supports_image_input(
+            provider="OPENROUTER",
+            model_name="weird/empty-modalities",
+            openrouter_input_modalities=[],
+        )
+        is False
+    )
+
+
+def test_or_modalities_none_falls_through_to_litellm():
+    """``None`` (missing key) is *not* a definitive signal — fall through
+    to LiteLLM. Using ``openai/gpt-4o`` which is in LiteLLM's map."""
+    assert (
+        derive_supports_image_input(
+            provider="OPENAI",
+            model_name="gpt-4o",
+            openrouter_input_modalities=None,
+        )
+        is True
+    )
+
+
+# ---------------------------------------------------------------------------
+# derive_supports_image_input — LiteLLM model-map path
+# ---------------------------------------------------------------------------
+
+
+def test_litellm_known_vision_model_returns_true():
+    assert (
+        derive_supports_image_input(
+            provider="OPENAI",
+            model_name="gpt-4o",
+        )
+        is True
+    )
+
+
+def test_litellm_base_model_wins_over_model_name():
+    """Azure-style entries pass model_name=deployment_id and put the
+    canonical sku in litellm_params.base_model. The resolver must
+    consult base_model first or the deployment id (which LiteLLM
+    doesn't know) would shadow the real capability."""
+    assert (
+        derive_supports_image_input(
+            provider="AZURE_OPENAI",
+            model_name="my-azure-deployment-id",
+            base_model="gpt-4o",
+        )
+        is True
+    )
+
+
+def test_litellm_unknown_model_default_allows():
+    """Default-allow on unknown — the safety net is the actual block."""
+    assert (
+        derive_supports_image_input(
+            provider="CUSTOM",
+            model_name="brand-new-model-x9-unmapped",
+            custom_provider="brand_new_proxy",
+        )
+        is True
+    )
+
+
+def test_litellm_known_text_only_returns_false():
+    """A model that LiteLLM explicitly knows is text-only resolves to
+    False even via the catalog resolver. ``deepseek-chat`` (the
+    DeepSeek-V3 chat sku) is in the map without supports_vision and
+    LiteLLM's `supports_vision` returns False."""
+    # Sanity: confirm the helper's negative path. We use a small model
+    # known not to support vision per the map.
+    result = derive_supports_image_input(
+        provider="DEEPSEEK",
+        model_name="deepseek-chat",
+    )
+    # We accept either False (LiteLLM said explicit no) or True
+    # (default-allow if the entry isn't mapped on this version) — the
+    # invariant is that the resolver never *raises* on a known-text-only
+    # provider/model. The behaviour-binding assertion lives in
+    # ``test_is_known_text_only_chat_model_explicit_false`` below.
+    assert isinstance(result, bool)
+
+
+# ---------------------------------------------------------------------------
+# is_known_text_only_chat_model — strict opt-out semantics
+# ---------------------------------------------------------------------------
+
+
+def test_is_known_text_only_returns_false_for_vision_model():
+    assert (
+        is_known_text_only_chat_model(
+            provider="OPENAI",
+            model_name="gpt-4o",
+        )
+        is False
+    )
+
+
+def test_is_known_text_only_returns_false_for_unknown_model():
+    """Strict opt-out: missing from the map ≠ text-only. The safety net
+    must NOT fire for an unmapped model — that's the regression we're
+    fixing."""
+    assert (
+        is_known_text_only_chat_model(
+            provider="CUSTOM",
+            model_name="brand-new-model-x9-unmapped",
+            custom_provider="brand_new_proxy",
+        )
+        is False
+    )
+
+
+def test_is_known_text_only_returns_false_when_lookup_raises(monkeypatch):
+    """LiteLLM's ``get_model_info`` raises freely on parse errors. The
+    helper swallows the exception and returns False so the safety net
+    doesn't fire on a transient lookup failure."""
+    import app.services.provider_capabilities as pc
+
+    def _raise(**_kwargs):
+        raise ValueError("intentional test failure")
+
+    monkeypatch.setattr(pc.litellm, "get_model_info", _raise)
+
+    assert (
+        is_known_text_only_chat_model(
+            provider="OPENAI",
+            model_name="gpt-4o",
+        )
+        is False
+    )
+
+
+def test_is_known_text_only_returns_true_on_explicit_false(monkeypatch):
+    """Stub LiteLLM's ``get_model_info`` to return an explicit False so
+    we exercise the opt-out path deterministically. Using a stub keeps
+    the test stable across LiteLLM map updates."""
+    import app.services.provider_capabilities as pc
+
+    def _info(**_kwargs):
+        return {"supports_vision": False, "max_input_tokens": 8192}
+
+    monkeypatch.setattr(pc.litellm, "get_model_info", _info)
+
+    assert (
+        is_known_text_only_chat_model(
+            provider="OPENAI",
+            model_name="any-model",
+        )
+        is True
+    )
+
+
+def test_is_known_text_only_returns_false_on_supports_vision_true(monkeypatch):
+    import app.services.provider_capabilities as pc
+
+    def _info(**_kwargs):
+        return {"supports_vision": True}
+
+    monkeypatch.setattr(pc.litellm, "get_model_info", _info)
+
+    assert (
+        is_known_text_only_chat_model(
+            provider="OPENAI",
+            model_name="any-model",
+        )
+        is False
+    )
+
+
+def test_is_known_text_only_returns_false_on_missing_key(monkeypatch):
+    """A model entry without ``supports_vision`` at all is treated as
+    'unknown' — strict opt-out means False."""
+    import app.services.provider_capabilities as pc
+
+    def _info(**_kwargs):
+        return {"max_input_tokens": 8192}  # no supports_vision
+
+    monkeypatch.setattr(pc.litellm, "get_model_info", _info)
+
+    assert (
+        is_known_text_only_chat_model(
+            provider="OPENAI",
+            model_name="any-model",
+        )
+        is False
+    )
--- a/surfsense_backend/tests/unit/services/test_supports_image_input.py
+++ b/surfsense_backend/tests/unit/services/test_supports_image_input.py
@ -0,0 +1,281 @@
+"""Unit tests for the chat-catalog ``supports_image_input`` capability flag.
+
+Capability is sourced from two places, in order of preference:
+
+1. ``architecture.input_modalities`` for dynamic OpenRouter chat configs
+   (authoritative — OpenRouter publishes per-model modalities directly).
+2. LiteLLM's authoritative model map (``litellm.supports_vision``) for
+   YAML / BYOK configs that don't carry an explicit operator override.
+
+The catalog default is *True* (conservative-allow): an unknown / unmapped
+model is not pre-judged. The streaming-task safety net
+(``is_known_text_only_chat_model``) is the only place a False actually
+blocks a request — and it requires LiteLLM to *explicitly* mark the model
+as text-only.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from app.services.openrouter_integration_service import (
+    _OPENROUTER_DYNAMIC_MARKER,
+    _generate_configs,
+    _supports_image_input,
+)
+
+pytestmark = pytest.mark.unit
+
+
+_SETTINGS_BASE: dict = {
+    "api_key": "sk-or-test",
+    "id_offset": -10_000,
+    "rpm": 200,
+    "tpm": 1_000_000,
+    "free_rpm": 20,
+    "free_tpm": 100_000,
+    "anonymous_enabled_paid": False,
+    "anonymous_enabled_free": True,
+    "quota_reserve_tokens": 4000,
+}
+
+
+# ---------------------------------------------------------------------------
+# _supports_image_input helper (OpenRouter modalities)
+# ---------------------------------------------------------------------------
+
+
+def test_supports_image_input_true_for_multimodal():
+    assert (
+        _supports_image_input(
+            {
+                "id": "openai/gpt-4o",
+                "architecture": {
+                    "input_modalities": ["text", "image"],
+                    "output_modalities": ["text"],
+                },
+            }
+        )
+        is True
+    )
+
+
+def test_supports_image_input_false_for_text_only():
+    """The exact failure mode the safety net guards against — DeepSeek V3
+    is a text-in/text-out model and would 404 if forwarded image_url."""
+    assert (
+        _supports_image_input(
+            {
+                "id": "deepseek/deepseek-v3.2-exp",
+                "architecture": {
+                    "input_modalities": ["text"],
+                    "output_modalities": ["text"],
+                },
+            }
+        )
+        is False
+    )
+
+
+def test_supports_image_input_false_when_modalities_missing():
+    """Defensive: missing architecture is treated as text-only at the
+    OpenRouter helper level. The wider catalog resolver
+    (`derive_supports_image_input`) only consults modalities when they
+    are non-empty, otherwise it falls back to LiteLLM."""
+    assert _supports_image_input({"id": "weird/model"}) is False
+    assert _supports_image_input({"id": "weird/model", "architecture": {}}) is False
+    assert (
+        _supports_image_input(
+            {"id": "weird/model", "architecture": {"input_modalities": None}}
+        )
+        is False
+    )
+
+
+# ---------------------------------------------------------------------------
+# _generate_configs threads the flag onto every emitted chat config
+# ---------------------------------------------------------------------------
+
+
+def test_generate_configs_emits_supports_image_input():
+    raw = [
+        {
+            "id": "openai/gpt-4o",
+            "architecture": {
+                "input_modalities": ["text", "image"],
+                "output_modalities": ["text"],
+            },
+            "supported_parameters": ["tools"],
+            "context_length": 200_000,
+            "pricing": {"prompt": "0.000005", "completion": "0.000015"},
+        },
+        {
+            "id": "deepseek/deepseek-v3.2-exp",
+            "architecture": {
+                "input_modalities": ["text"],
+                "output_modalities": ["text"],
+            },
+            "supported_parameters": ["tools"],
+            "context_length": 200_000,
+            "pricing": {"prompt": "0.000003", "completion": "0.000015"},
+        },
+    ]
+    cfgs = _generate_configs(raw, dict(_SETTINGS_BASE))
+    by_model = {c["model_name"]: c for c in cfgs}
+
+    gpt = by_model["openai/gpt-4o"]
+    assert gpt["supports_image_input"] is True
+    assert gpt[_OPENROUTER_DYNAMIC_MARKER] is True
+
+    deepseek = by_model["deepseek/deepseek-v3.2-exp"]
+    assert deepseek["supports_image_input"] is False
+    assert deepseek[_OPENROUTER_DYNAMIC_MARKER] is True
+
+
+# ---------------------------------------------------------------------------
+# YAML loader: defer to derive_supports_image_input on unannotated entries
+# ---------------------------------------------------------------------------
+
+
+def test_yaml_loader_resolves_unannotated_vision_model_to_true(tmp_path, monkeypatch):
+    """The regression case: an Azure GPT-5.x YAML entry without a
+    ``supports_image_input`` override should resolve to True via LiteLLM's
+    model map (which says ``supports_vision: true``). Previously this
+    defaulted to False, blocking every image turn for vision-capable
+    YAML configs."""
+    yaml_dir = tmp_path / "app" / "config"
+    yaml_dir.mkdir(parents=True)
+    (yaml_dir / "global_llm_config.yaml").write_text(
+        """
+global_llm_configs:
+  - id: -2
+    name: Azure GPT-4o
+    provider: AZURE_OPENAI
+    model_name: gpt-4o
+    api_key: sk-test
+""",
+        encoding="utf-8",
+    )
+
+    from app import config as config_module
+
+    monkeypatch.setattr(config_module, "BASE_DIR", tmp_path)
+
+    configs = config_module.load_global_llm_configs()
+    assert len(configs) == 1
+    assert configs[0]["supports_image_input"] is True
+
+
+def test_yaml_loader_respects_explicit_supports_image_input(tmp_path, monkeypatch):
+    yaml_dir = tmp_path / "app" / "config"
+    yaml_dir.mkdir(parents=True)
+    (yaml_dir / "global_llm_config.yaml").write_text(
+        """
+global_llm_configs:
+  - id: -1
+    name: GPT-4o
+    provider: OPENAI
+    model_name: gpt-4o
+    api_key: sk-test
+    supports_image_input: false
+""",
+        encoding="utf-8",
+    )
+
+    from app import config as config_module
+
+    monkeypatch.setattr(config_module, "BASE_DIR", tmp_path)
+
+    configs = config_module.load_global_llm_configs()
+    assert len(configs) == 1
+    # Operator override always wins, even against LiteLLM's True.
+    assert configs[0]["supports_image_input"] is False
+
+
+def test_yaml_loader_unknown_model_default_allows(tmp_path, monkeypatch):
+    """Unknown / unmapped model in YAML: default-allow. The streaming
+    safety net (which requires an explicit-False from LiteLLM) is the
+    only place a real block happens, so we don't lock the user out of
+    a freshly added third-party entry the catalog can't introspect."""
+    yaml_dir = tmp_path / "app" / "config"
+    yaml_dir.mkdir(parents=True)
+    (yaml_dir / "global_llm_config.yaml").write_text(
+        """
+global_llm_configs:
+  - id: -1
+    name: Some Brand New Model
+    provider: CUSTOM
+    custom_provider: brand_new_proxy
+    model_name: brand-new-model-x9
+    api_key: sk-test
+""",
+        encoding="utf-8",
+    )
+
+    from app import config as config_module
+
+    monkeypatch.setattr(config_module, "BASE_DIR", tmp_path)
+
+    configs = config_module.load_global_llm_configs()
+    assert len(configs) == 1
+    assert configs[0]["supports_image_input"] is True
+
+
+# ---------------------------------------------------------------------------
+# AgentConfig threads the flag through both YAML and Auto / BYOK
+# ---------------------------------------------------------------------------
+
+
+def test_agent_config_from_yaml_explicit_overrides_resolver():
+    from app.agents.new_chat.llm_config import AgentConfig
+
+    cfg_text_only = AgentConfig.from_yaml_config(
+        {
+            "id": -1,
+            "name": "Text Only Override",
+            "provider": "openai",
+            "model_name": "gpt-4o",  # Capable per LiteLLM, but operator says no.
+            "api_key": "sk-test",
+            "supports_image_input": False,
+        }
+    )
+    cfg_explicit_vision = AgentConfig.from_yaml_config(
+        {
+            "id": -2,
+            "name": "GPT-4o",
+            "provider": "openai",
+            "model_name": "gpt-4o",
+            "api_key": "sk-test",
+            "supports_image_input": True,
+        }
+    )
+    assert cfg_text_only.supports_image_input is False
+    assert cfg_explicit_vision.supports_image_input is True
+
+
+def test_agent_config_from_yaml_unannotated_uses_resolver():
+    """Without an explicit YAML key, AgentConfig defers to the catalog
+    resolver — for ``gpt-4o`` LiteLLM's map says supports_vision=True."""
+    from app.agents.new_chat.llm_config import AgentConfig
+
+    cfg = AgentConfig.from_yaml_config(
+        {
+            "id": -1,
+            "name": "GPT-4o (no override)",
+            "provider": "openai",
+            "model_name": "gpt-4o",
+            "api_key": "sk-test",
+        }
+    )
+    assert cfg.supports_image_input is True
+
+
+def test_agent_config_auto_mode_supports_image_input():
+    """Auto routes across the pool. We optimistically allow image input
+    so users can keep their selection on Auto with a vision-capable
+    deployment somewhere in the pool. The router's own `allowed_fails`
+    handles non-vision deployments via fallback."""
+    from app.agents.new_chat.llm_config import AgentConfig
+
+    auto = AgentConfig.from_auto_mode()
+    assert auto.supports_image_input is True
--- a/surfsense_backend/tests/unit/services/test_vision_llm_api_base_defense.py
+++ b/surfsense_backend/tests/unit/services/test_vision_llm_api_base_defense.py
@ -0,0 +1,89 @@
+"""Defense-in-depth: vision-LLM resolution must not leak ``api_base``
+defaults from ``litellm.api_base`` either.
+
+Vision shares the same shape as image-gen — global YAML / OpenRouter
+dynamic configs ship ``api_base=""`` and the pre-fix ``get_vision_llm``
+call sites would silently drop the empty string and inherit
+``AZURE_OPENAI_ENDPOINT``. ``ChatLiteLLM(...)`` doesn't 404 on
+construction so we test the kwargs we hand to it instead.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+pytestmark = pytest.mark.unit
+
+
+@pytest.mark.asyncio
+async def test_get_vision_llm_global_openrouter_sets_api_base():
+    """Global negative-ID branch: an OpenRouter vision config with
+    ``api_base=""`` must end up calling ``SanitizedChatLiteLLM`` with
+    ``api_base="https://openrouter.ai/api/v1"`` — never an empty string,
+    never silently absent."""
+    from app.services import llm_service
+
+    cfg = {
+        "id": -30_001,
+        "name": "GPT-4o Vision (OpenRouter)",
+        "provider": "OPENROUTER",
+        "model_name": "openai/gpt-4o",
+        "api_key": "sk-or-test",
+        "api_base": "",
+        "api_version": None,
+        "litellm_params": {},
+        "billing_tier": "free",
+    }
+
+    search_space = MagicMock()
+    search_space.id = 1
+    search_space.user_id = "user-x"
+    search_space.vision_llm_config_id = cfg["id"]
+
+    session = AsyncMock()
+    scalars = MagicMock()
+    scalars.first.return_value = search_space
+    result = MagicMock()
+    result.scalars.return_value = scalars
+    session.execute.return_value = result
+
+    captured: dict = {}
+
+    class FakeSanitized:
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+
+    with (
+        patch(
+            "app.services.vision_llm_router_service.get_global_vision_llm_config",
+            return_value=cfg,
+        ),
+        patch(
+            "app.agents.new_chat.llm_config.SanitizedChatLiteLLM",
+            new=FakeSanitized,
+        ),
+    ):
+        await llm_service.get_vision_llm(session=session, search_space_id=1)
+
+    assert captured.get("api_base") == "https://openrouter.ai/api/v1"
+    assert captured["model"] == "openrouter/openai/gpt-4o"
+
+
+def test_vision_router_deployment_sets_api_base_when_config_empty():
+    """Auto-mode vision router: deployments are fed to ``litellm.Router``,
+    so the resolver has to apply at deployment construction time too."""
+    from app.services.vision_llm_router_service import VisionLLMRouterService
+
+    deployment = VisionLLMRouterService._config_to_deployment(
+        {
+            "model_name": "openai/gpt-4o",
+            "provider": "OPENROUTER",
+            "api_key": "sk-or-test",
+            "api_base": "",
+        }
+    )
+    assert deployment is not None
+    assert deployment["litellm_params"]["api_base"] == "https://openrouter.ai/api/v1"
+    assert deployment["litellm_params"]["model"] == "openrouter/openai/gpt-4o"