feat: fixed vision/image provider specific errors and fixed podcast/video streaming
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-05-02 19:18:53 -07:00
parent ae9d36d77f
commit 47b2994ec7
54 changed files with 4469 additions and 563 deletions

View file

@ -0,0 +1,286 @@
"""Image-aware extension of the Auto-pin resolver.
When the current chat turn carries an ``image_url`` block, the pin
resolver must:
1. Filter the candidate pool to vision-capable cfgs so a freshly
selected pin can never be text-only.
2. Treat any existing pin whose capability is False as invalid (force
re-pin), even when it would otherwise be reused as the thread's
stable model.
3. Raise ``ValueError`` (mapped to the friendly
``MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT`` SSE error in the streaming
task) when no vision-capable cfg is available instead of silently
pinning text-only and 404-ing at the provider.
"""
from __future__ import annotations
from dataclasses import dataclass
from types import SimpleNamespace
import pytest
from app.services.auto_model_pin_service import (
clear_healthy,
clear_runtime_cooldown,
resolve_or_get_pinned_llm_config_id,
)
pytestmark = pytest.mark.unit
@pytest.fixture(autouse=True)
def _reset_caches():
clear_runtime_cooldown()
clear_healthy()
yield
clear_runtime_cooldown()
clear_healthy()
@dataclass
class _FakeQuotaResult:
allowed: bool
class _FakeExecResult:
def __init__(self, thread):
self._thread = thread
def unique(self):
return self
def scalar_one_or_none(self):
return self._thread
class _FakeSession:
def __init__(self, thread):
self.thread = thread
self.commit_count = 0
async def execute(self, _stmt):
return _FakeExecResult(self.thread)
async def commit(self):
self.commit_count += 1
def _thread(*, pinned: int | None = None):
return SimpleNamespace(id=1, search_space_id=10, pinned_llm_config_id=pinned)
def _vision_cfg(id_: int, *, tier: str = "free", quality: int = 80) -> dict:
return {
"id": id_,
"provider": "OPENAI",
"model_name": f"vision-{id_}",
"api_key": "k",
"billing_tier": tier,
"supports_image_input": True,
"auto_pin_tier": "A",
"quality_score": quality,
}
def _text_only_cfg(id_: int, *, tier: str = "free", quality: int = 90) -> dict:
return {
"id": id_,
"provider": "OPENAI",
"model_name": f"text-{id_}",
"api_key": "k",
"billing_tier": tier,
# Higher quality than the vision cfgs — so a bug that ignores
# the image flag would surface as the resolver picking this one.
"supports_image_input": False,
"auto_pin_tier": "A",
"quality_score": quality,
}
async def _premium_allowed(*_args, **_kwargs):
return _FakeQuotaResult(allowed=True)
@pytest.mark.asyncio
async def test_image_turn_filters_out_text_only_candidates(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[_text_only_cfg(-1), _vision_cfg(-2)],
)
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
_premium_allowed,
)
result = await resolve_or_get_pinned_llm_config_id(
session,
thread_id=1,
search_space_id=10,
user_id=None,
selected_llm_config_id=0,
requires_image_input=True,
)
assert result.resolved_llm_config_id == -2
# The thread should be pinned to the vision cfg even though the
# text-only cfg has a higher quality score.
assert session.thread.pinned_llm_config_id == -2
@pytest.mark.asyncio
async def test_image_turn_force_repins_stale_text_only_pin(monkeypatch):
"""An existing text-only pin must be invalidated when the next turn
requires image input. The non-image path would happily reuse it."""
from app.config import config
session = _FakeSession(_thread(pinned=-1))
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[_text_only_cfg(-1), _vision_cfg(-2)],
)
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
_premium_allowed,
)
result = await resolve_or_get_pinned_llm_config_id(
session,
thread_id=1,
search_space_id=10,
user_id=None,
selected_llm_config_id=0,
requires_image_input=True,
)
assert result.resolved_llm_config_id == -2
assert result.from_existing_pin is False
assert session.thread.pinned_llm_config_id == -2
@pytest.mark.asyncio
async def test_image_turn_reuses_existing_vision_pin(monkeypatch):
"""If the thread is already pinned to a vision-capable cfg, reuse it
same as the non-image path. Image-aware filtering must not force
spurious re-pins."""
from app.config import config
session = _FakeSession(_thread(pinned=-2))
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[_text_only_cfg(-1), _vision_cfg(-2), _vision_cfg(-3, quality=70)],
)
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
_premium_allowed,
)
result = await resolve_or_get_pinned_llm_config_id(
session,
thread_id=1,
search_space_id=10,
user_id=None,
selected_llm_config_id=0,
requires_image_input=True,
)
assert result.resolved_llm_config_id == -2
assert result.from_existing_pin is True
@pytest.mark.asyncio
async def test_image_turn_with_no_vision_candidates_raises(monkeypatch):
"""The friendly-error path: no vision-capable cfg in the pool -> raise
``ValueError`` whose message contains ``vision-capable`` so the
streaming task can map it to ``MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT``."""
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[_text_only_cfg(-1), _text_only_cfg(-2)],
)
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
_premium_allowed,
)
with pytest.raises(ValueError, match="vision-capable"):
await resolve_or_get_pinned_llm_config_id(
session,
thread_id=1,
search_space_id=10,
user_id=None,
selected_llm_config_id=0,
requires_image_input=True,
)
@pytest.mark.asyncio
async def test_non_image_turn_keeps_text_only_in_pool(monkeypatch):
"""Regression guard: the image flag must default False and not affect
a normal text-only turn text-only cfgs remain selectable."""
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[_text_only_cfg(-1)],
)
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
_premium_allowed,
)
result = await resolve_or_get_pinned_llm_config_id(
session,
thread_id=1,
search_space_id=10,
user_id=None,
selected_llm_config_id=0,
)
assert result.resolved_llm_config_id == -1
@pytest.mark.asyncio
async def test_image_turn_unannotated_cfg_resolves_via_helper(monkeypatch):
"""A YAML cfg that omits ``supports_image_input`` falls through to
``derive_supports_image_input`` (LiteLLM-driven). For ``gpt-4o``
that returns True, so the cfg should be a valid candidate."""
from app.config import config
session = _FakeSession(_thread())
cfg_unannotated_vision = {
"id": -2,
"provider": "OPENAI",
"model_name": "gpt-4o", # known vision model in LiteLLM map
"api_key": "k",
"billing_tier": "free",
"auto_pin_tier": "A",
"quality_score": 80,
# NOTE: no supports_image_input key
}
monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [cfg_unannotated_vision])
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
_premium_allowed,
)
result = await resolve_or_get_pinned_llm_config_id(
session,
thread_id=1,
search_space_id=10,
user_id=None,
selected_llm_config_id=0,
requires_image_input=True,
)
assert result.resolved_llm_config_id == -2

View file

@ -15,6 +15,7 @@ vision LLM extraction:
from __future__ import annotations
import asyncio
import contextlib
from typing import Any
from uuid import uuid4
@ -57,6 +58,9 @@ class _FakeSession:
async def commit(self) -> None:
self.committed = True
async def rollback(self) -> None:
pass
async def close(self) -> None:
pass
@ -71,7 +75,9 @@ async def _fake_shielded_session():
_SESSIONS_USED: list[_FakeSession] = []
def _patch_isolation_layer(monkeypatch, *, reserve_result, finalize_result=None):
def _patch_isolation_layer(
monkeypatch, *, reserve_result, finalize_result=None, finalize_exc=None
):
"""Wire fake reserve/finalize/release/session helpers."""
_SESSIONS_USED.clear()
reserve_calls: list[dict[str, Any]] = []
@ -91,6 +97,8 @@ def _patch_isolation_layer(monkeypatch, *, reserve_result, finalize_result=None)
async def _fake_finalize(
*, db_session, user_id, request_id, actual_micros, reserved_micros
):
if finalize_exc is not None:
raise finalize_exc
finalize_calls.append(
{
"user_id": user_id,
@ -343,6 +351,125 @@ async def test_premium_uses_estimator_when_no_micros_override(monkeypatch):
assert spies["reserve"][0]["reserve_micros"] == 12_345
@pytest.mark.asyncio
async def test_premium_finalize_failure_propagates_and_releases(monkeypatch):
from app.services.billable_calls import BillingSettlementError, billable_call
class _FinalizeError(RuntimeError):
pass
spies = _patch_isolation_layer(
monkeypatch,
reserve_result=_FakeQuotaResult(allowed=True),
finalize_exc=_FinalizeError("db finalize failed"),
)
user_id = uuid4()
with pytest.raises(BillingSettlementError):
async with billable_call(
user_id=user_id,
search_space_id=42,
billing_tier="premium",
base_model="openai/gpt-image-1",
quota_reserve_micros_override=50_000,
usage_type="image_generation",
) as acc:
acc.add(
model="openai/gpt-image-1",
prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
cost_micros=40_000,
call_kind="image_generation",
)
assert len(spies["reserve"]) == 1
assert len(spies["release"]) == 1
assert spies["record"] == []
@pytest.mark.asyncio
async def test_premium_audit_commit_hang_times_out_after_finalize(monkeypatch):
from app.services.billable_calls import billable_call
spies = _patch_isolation_layer(
monkeypatch, reserve_result=_FakeQuotaResult(allowed=True)
)
user_id = uuid4()
class _HangingCommitSession(_FakeSession):
async def commit(self) -> None:
await asyncio.sleep(60)
@contextlib.asynccontextmanager
async def _hanging_session_factory():
s = _HangingCommitSession()
_SESSIONS_USED.append(s)
yield s
async with billable_call(
user_id=user_id,
search_space_id=42,
billing_tier="premium",
base_model="openai/gpt-image-1",
quota_reserve_micros_override=50_000,
usage_type="image_generation",
billable_session_factory=_hanging_session_factory,
audit_timeout_seconds=0.01,
) as acc:
acc.add(
model="openai/gpt-image-1",
prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
cost_micros=40_000,
call_kind="image_generation",
)
assert len(spies["reserve"]) == 1
assert len(spies["finalize"]) == 1
assert len(spies["record"]) == 1
assert spies["release"] == []
@pytest.mark.asyncio
async def test_free_audit_failure_is_best_effort(monkeypatch):
from app.services.billable_calls import billable_call
spies = _patch_isolation_layer(
monkeypatch, reserve_result=_FakeQuotaResult(allowed=True)
)
async def _failing_record(_session, **_kwargs):
raise RuntimeError("audit insert failed")
monkeypatch.setattr(
"app.services.billable_calls.record_token_usage",
_failing_record,
raising=False,
)
async with billable_call(
user_id=uuid4(),
search_space_id=42,
billing_tier="free",
base_model="openai/gpt-image-1",
usage_type="image_generation",
audit_timeout_seconds=0.01,
) as acc:
acc.add(
model="openai/gpt-image-1",
prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
cost_micros=37_000,
call_kind="image_generation",
)
assert spies["reserve"] == []
assert spies["finalize"] == []
# ---------------------------------------------------------------------------
# Podcast / video-presentation usage_type coverage
# ---------------------------------------------------------------------------
@ -387,7 +514,7 @@ async def test_free_podcast_path_audits_with_podcast_usage_type(monkeypatch):
assert len(spies["record"]) == 1
row = spies["record"][0]
assert row["usage_type"] == "podcast_generation"
assert row["thread_id"] == 99
assert row["thread_id"] is None
assert row["search_space_id"] == 42
assert row["call_details"] == {"podcast_id": 7, "title": "Test Podcast"}

View file

@ -0,0 +1,177 @@
"""Defense-in-depth: image-gen call sites must not let an empty
``api_base`` fall through to LiteLLM's module-global ``litellm.api_base``.
The bug repro: an OpenRouter image-gen config ships
``api_base=""``. The pre-fix call site in
``image_generation_routes._execute_image_generation`` did
``if cfg.get("api_base"): kwargs["api_base"] = cfg["api_base"]`` which
silently dropped the empty string. LiteLLM then fell back to
``litellm.api_base`` (commonly inherited from ``AZURE_OPENAI_ENDPOINT``)
and OpenRouter's ``image_generation/transformation`` appended
``/chat/completions`` to it 404 ``Resource not found``.
This test pins the post-fix behaviour: with an empty ``api_base`` in
the config, the call site MUST set ``api_base`` to OpenRouter's public
URL instead of leaving it unset.
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
pytestmark = pytest.mark.unit
@pytest.mark.asyncio
async def test_global_openrouter_image_gen_sets_api_base_when_config_empty():
"""The global-config branch (``config_id < 0``) of
``_execute_image_generation`` must apply the resolver and pin
``api_base`` to OpenRouter when the config ships an empty string.
"""
from app.routes import image_generation_routes
cfg = {
"id": -20_001,
"name": "GPT Image 1 (OpenRouter)",
"provider": "OPENROUTER",
"model_name": "openai/gpt-image-1",
"api_key": "sk-or-test",
"api_base": "", # the original bug shape
"api_version": None,
"litellm_params": {},
}
captured: dict = {}
async def fake_aimage_generation(**kwargs):
captured.update(kwargs)
return MagicMock(model_dump=lambda: {"data": []}, _hidden_params={})
image_gen = MagicMock()
image_gen.image_generation_config_id = cfg["id"]
image_gen.prompt = "test"
image_gen.n = 1
image_gen.quality = None
image_gen.size = None
image_gen.style = None
image_gen.response_format = None
image_gen.model = None
search_space = MagicMock()
search_space.image_generation_config_id = cfg["id"]
session = MagicMock()
with (
patch.object(
image_generation_routes,
"_get_global_image_gen_config",
return_value=cfg,
),
patch.object(
image_generation_routes,
"aimage_generation",
side_effect=fake_aimage_generation,
),
):
await image_generation_routes._execute_image_generation(
session=session, image_gen=image_gen, search_space=search_space
)
# The whole point of the fix: even with empty ``api_base`` in the
# config, we forward OpenRouter's public URL so the call doesn't
# inherit an Azure endpoint.
assert captured.get("api_base") == "https://openrouter.ai/api/v1"
assert captured["model"] == "openrouter/openai/gpt-image-1"
@pytest.mark.asyncio
async def test_generate_image_tool_global_sets_api_base_when_config_empty():
"""Same defense at the agent tool entry point — both surfaces share
the same OpenRouter config payloads."""
from app.agents.new_chat.tools import generate_image as gi_module
cfg = {
"id": -20_001,
"name": "GPT Image 1 (OpenRouter)",
"provider": "OPENROUTER",
"model_name": "openai/gpt-image-1",
"api_key": "sk-or-test",
"api_base": "",
"api_version": None,
"litellm_params": {},
}
captured: dict = {}
async def fake_aimage_generation(**kwargs):
captured.update(kwargs)
response = MagicMock()
response.model_dump.return_value = {
"data": [{"url": "https://example.com/x.png"}]
}
response._hidden_params = {"model": "openrouter/openai/gpt-image-1"}
return response
search_space = MagicMock()
search_space.id = 1
search_space.image_generation_config_id = cfg["id"]
session_cm = AsyncMock()
session = AsyncMock()
session_cm.__aenter__.return_value = session
scalars = MagicMock()
scalars.first.return_value = search_space
exec_result = MagicMock()
exec_result.scalars.return_value = scalars
session.execute.return_value = exec_result
session.add = MagicMock()
session.commit = AsyncMock()
session.refresh = AsyncMock()
# ``refresh(db_image_gen)`` needs to populate ``id`` for token URL fallback.
async def _refresh(obj):
obj.id = 1
session.refresh.side_effect = _refresh
with (
patch.object(gi_module, "shielded_async_session", return_value=session_cm),
patch.object(gi_module, "_get_global_image_gen_config", return_value=cfg),
patch.object(
gi_module, "aimage_generation", side_effect=fake_aimage_generation
),
patch.object(
gi_module, "is_image_gen_auto_mode", side_effect=lambda cid: cid == 0
),
):
tool = gi_module.create_generate_image_tool(
search_space_id=1, db_session=MagicMock()
)
await tool.ainvoke({"prompt": "a cat", "n": 1})
assert captured.get("api_base") == "https://openrouter.ai/api/v1"
assert captured["model"] == "openrouter/openai/gpt-image-1"
def test_image_gen_router_deployment_sets_api_base_when_config_empty():
"""The Auto-mode router pool must also resolve ``api_base`` when an
OpenRouter config ships an empty string. The deployment dict is fed
straight to ``litellm.Router``, so a missing ``api_base`` would
leak the same way as the direct call sites.
"""
from app.services.image_gen_router_service import ImageGenRouterService
deployment = ImageGenRouterService._config_to_deployment(
{
"model_name": "openai/gpt-image-1",
"provider": "OPENROUTER",
"api_key": "sk-or-test",
"api_base": "",
}
)
assert deployment is not None
assert deployment["litellm_params"]["api_base"] == "https://openrouter.ai/api/v1"
assert deployment["litellm_params"]["model"] == "openrouter/openai/gpt-image-1"

View file

@ -265,6 +265,10 @@ def test_generate_image_gen_configs_filters_by_image_output():
assert c["billing_tier"] in {"free", "premium"}
assert c["provider"] == "OPENROUTER"
assert c[_OPENROUTER_DYNAMIC_MARKER] is True
# Defense-in-depth: emit the OpenRouter base URL at source so a
# downstream call site that forgets ``resolve_api_base`` still
# doesn't 404 against an inherited Azure endpoint.
assert c["api_base"] == "https://openrouter.ai/api/v1"
def test_generate_image_gen_configs_assigns_image_id_offset():
@ -342,6 +346,10 @@ def test_generate_vision_llm_configs_filters_by_image_input_text_output():
assert cfg["input_cost_per_token"] == pytest.approx(5e-6)
assert cfg["output_cost_per_token"] == pytest.approx(15e-6)
assert cfg[_OPENROUTER_DYNAMIC_MARKER] is True
# Defense-in-depth: emit the OpenRouter base URL at source so a
# downstream call site that forgets ``resolve_api_base`` still
# doesn't inherit an Azure endpoint.
assert cfg["api_base"] == "https://openrouter.ai/api/v1"
def test_generate_vision_llm_configs_drops_chat_only_filters():

View file

@ -0,0 +1,107 @@
"""Unit tests for the shared ``api_base`` resolver.
The cascade exists so vision and image-gen call sites can't silently
inherit ``litellm.api_base`` (commonly set by ``AZURE_OPENAI_ENDPOINT``)
when an OpenRouter / Groq / etc. config ships an empty string. See
``provider_api_base`` module docstring for the original repro
(OpenRouter image-gen 404-ing against an Azure endpoint).
"""
from __future__ import annotations
import pytest
from app.services.provider_api_base import (
PROVIDER_DEFAULT_API_BASE,
PROVIDER_KEY_DEFAULT_API_BASE,
resolve_api_base,
)
pytestmark = pytest.mark.unit
def test_config_value_wins_over_defaults():
"""A non-empty config value is always returned verbatim, even when the
provider has a default the operator gets the last word."""
result = resolve_api_base(
provider="OPENROUTER",
provider_prefix="openrouter",
config_api_base="https://my-openrouter-mirror.example.com/v1",
)
assert result == "https://my-openrouter-mirror.example.com/v1"
def test_provider_key_default_when_config_missing():
"""``DEEPSEEK`` shares the ``openai`` LiteLLM prefix but has its own
base URL the provider-key map must take precedence over the prefix
map so DeepSeek requests don't go to OpenAI."""
result = resolve_api_base(
provider="DEEPSEEK",
provider_prefix="openai",
config_api_base=None,
)
assert result == PROVIDER_KEY_DEFAULT_API_BASE["DEEPSEEK"]
def test_provider_prefix_default_when_no_key_default():
result = resolve_api_base(
provider="OPENROUTER",
provider_prefix="openrouter",
config_api_base=None,
)
assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
def test_unknown_provider_returns_none():
"""When neither map matches we return ``None`` so the caller can let
LiteLLM apply its own provider-integration default (Azure deployment
URL, custom-provider URL, etc.)."""
result = resolve_api_base(
provider="SOMETHING_NEW",
provider_prefix="something_new",
config_api_base=None,
)
assert result is None
def test_empty_string_config_treated_as_missing():
"""The original bug: OpenRouter dynamic configs ship ``api_base=""``
and downstream call sites use ``if cfg.get("api_base"):`` empty
strings are falsy in Python but the cascade has to step in anyway."""
result = resolve_api_base(
provider="OPENROUTER",
provider_prefix="openrouter",
config_api_base="",
)
assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
def test_whitespace_only_config_treated_as_missing():
"""A config value of ``" "`` is a configuration mistake — treat it
as missing instead of forwarding whitespace to LiteLLM (which would
almost certainly 404)."""
result = resolve_api_base(
provider="OPENROUTER",
provider_prefix="openrouter",
config_api_base=" ",
)
assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
def test_provider_case_insensitive():
"""Some call sites pass the provider lowercase (DB enum value), others
uppercase (YAML key). Both must resolve."""
upper = resolve_api_base(
provider="DEEPSEEK", provider_prefix="openai", config_api_base=None
)
lower = resolve_api_base(
provider="deepseek", provider_prefix="openai", config_api_base=None
)
assert upper == lower == PROVIDER_KEY_DEFAULT_API_BASE["DEEPSEEK"]
def test_all_inputs_none_returns_none():
assert (
resolve_api_base(provider=None, provider_prefix=None, config_api_base=None)
is None
)

View file

@ -0,0 +1,244 @@
"""Unit tests for the shared chat-image capability resolver.
Two resolvers, two intents:
- ``derive_supports_image_input`` best-effort True for the catalog and
selector. Default-allow on unknown / unmapped models. The streaming
task safety net never sees this value directly.
- ``is_known_text_only_chat_model`` strict opt-out for the safety net.
Returns True only when LiteLLM's model map *explicitly* sets
``supports_vision=False``. Anything else (missing key, exception,
True) returns False so the request flows through to the provider.
"""
from __future__ import annotations
import pytest
from app.services.provider_capabilities import (
derive_supports_image_input,
is_known_text_only_chat_model,
)
pytestmark = pytest.mark.unit
# ---------------------------------------------------------------------------
# derive_supports_image_input — OpenRouter modalities path (authoritative)
# ---------------------------------------------------------------------------
def test_or_modalities_with_image_returns_true():
assert (
derive_supports_image_input(
provider="OPENROUTER",
model_name="openai/gpt-4o",
openrouter_input_modalities=["text", "image"],
)
is True
)
def test_or_modalities_text_only_returns_false():
assert (
derive_supports_image_input(
provider="OPENROUTER",
model_name="deepseek/deepseek-v3.2-exp",
openrouter_input_modalities=["text"],
)
is False
)
def test_or_modalities_empty_list_returns_false():
"""OR explicitly publishing an empty modality list is a definitive
'no inputs at all' signal treat as False rather than falling back
to LiteLLM."""
assert (
derive_supports_image_input(
provider="OPENROUTER",
model_name="weird/empty-modalities",
openrouter_input_modalities=[],
)
is False
)
def test_or_modalities_none_falls_through_to_litellm():
"""``None`` (missing key) is *not* a definitive signal — fall through
to LiteLLM. Using ``openai/gpt-4o`` which is in LiteLLM's map."""
assert (
derive_supports_image_input(
provider="OPENAI",
model_name="gpt-4o",
openrouter_input_modalities=None,
)
is True
)
# ---------------------------------------------------------------------------
# derive_supports_image_input — LiteLLM model-map path
# ---------------------------------------------------------------------------
def test_litellm_known_vision_model_returns_true():
assert (
derive_supports_image_input(
provider="OPENAI",
model_name="gpt-4o",
)
is True
)
def test_litellm_base_model_wins_over_model_name():
"""Azure-style entries pass model_name=deployment_id and put the
canonical sku in litellm_params.base_model. The resolver must
consult base_model first or the deployment id (which LiteLLM
doesn't know) would shadow the real capability."""
assert (
derive_supports_image_input(
provider="AZURE_OPENAI",
model_name="my-azure-deployment-id",
base_model="gpt-4o",
)
is True
)
def test_litellm_unknown_model_default_allows():
"""Default-allow on unknown — the safety net is the actual block."""
assert (
derive_supports_image_input(
provider="CUSTOM",
model_name="brand-new-model-x9-unmapped",
custom_provider="brand_new_proxy",
)
is True
)
def test_litellm_known_text_only_returns_false():
"""A model that LiteLLM explicitly knows is text-only resolves to
False even via the catalog resolver. ``deepseek-chat`` (the
DeepSeek-V3 chat sku) is in the map without supports_vision and
LiteLLM's `supports_vision` returns False."""
# Sanity: confirm the helper's negative path. We use a small model
# known not to support vision per the map.
result = derive_supports_image_input(
provider="DEEPSEEK",
model_name="deepseek-chat",
)
# We accept either False (LiteLLM said explicit no) or True
# (default-allow if the entry isn't mapped on this version) — the
# invariant is that the resolver never *raises* on a known-text-only
# provider/model. The behaviour-binding assertion lives in
# ``test_is_known_text_only_chat_model_explicit_false`` below.
assert isinstance(result, bool)
# ---------------------------------------------------------------------------
# is_known_text_only_chat_model — strict opt-out semantics
# ---------------------------------------------------------------------------
def test_is_known_text_only_returns_false_for_vision_model():
assert (
is_known_text_only_chat_model(
provider="OPENAI",
model_name="gpt-4o",
)
is False
)
def test_is_known_text_only_returns_false_for_unknown_model():
"""Strict opt-out: missing from the map ≠ text-only. The safety net
must NOT fire for an unmapped model that's the regression we're
fixing."""
assert (
is_known_text_only_chat_model(
provider="CUSTOM",
model_name="brand-new-model-x9-unmapped",
custom_provider="brand_new_proxy",
)
is False
)
def test_is_known_text_only_returns_false_when_lookup_raises(monkeypatch):
"""LiteLLM's ``get_model_info`` raises freely on parse errors. The
helper swallows the exception and returns False so the safety net
doesn't fire on a transient lookup failure."""
import app.services.provider_capabilities as pc
def _raise(**_kwargs):
raise ValueError("intentional test failure")
monkeypatch.setattr(pc.litellm, "get_model_info", _raise)
assert (
is_known_text_only_chat_model(
provider="OPENAI",
model_name="gpt-4o",
)
is False
)
def test_is_known_text_only_returns_true_on_explicit_false(monkeypatch):
"""Stub LiteLLM's ``get_model_info`` to return an explicit False so
we exercise the opt-out path deterministically. Using a stub keeps
the test stable across LiteLLM map updates."""
import app.services.provider_capabilities as pc
def _info(**_kwargs):
return {"supports_vision": False, "max_input_tokens": 8192}
monkeypatch.setattr(pc.litellm, "get_model_info", _info)
assert (
is_known_text_only_chat_model(
provider="OPENAI",
model_name="any-model",
)
is True
)
def test_is_known_text_only_returns_false_on_supports_vision_true(monkeypatch):
import app.services.provider_capabilities as pc
def _info(**_kwargs):
return {"supports_vision": True}
monkeypatch.setattr(pc.litellm, "get_model_info", _info)
assert (
is_known_text_only_chat_model(
provider="OPENAI",
model_name="any-model",
)
is False
)
def test_is_known_text_only_returns_false_on_missing_key(monkeypatch):
"""A model entry without ``supports_vision`` at all is treated as
'unknown' strict opt-out means False."""
import app.services.provider_capabilities as pc
def _info(**_kwargs):
return {"max_input_tokens": 8192} # no supports_vision
monkeypatch.setattr(pc.litellm, "get_model_info", _info)
assert (
is_known_text_only_chat_model(
provider="OPENAI",
model_name="any-model",
)
is False
)

View file

@ -0,0 +1,281 @@
"""Unit tests for the chat-catalog ``supports_image_input`` capability flag.
Capability is sourced from two places, in order of preference:
1. ``architecture.input_modalities`` for dynamic OpenRouter chat configs
(authoritative OpenRouter publishes per-model modalities directly).
2. LiteLLM's authoritative model map (``litellm.supports_vision``) for
YAML / BYOK configs that don't carry an explicit operator override.
The catalog default is *True* (conservative-allow): an unknown / unmapped
model is not pre-judged. The streaming-task safety net
(``is_known_text_only_chat_model``) is the only place a False actually
blocks a request and it requires LiteLLM to *explicitly* mark the model
as text-only.
"""
from __future__ import annotations
import pytest
from app.services.openrouter_integration_service import (
_OPENROUTER_DYNAMIC_MARKER,
_generate_configs,
_supports_image_input,
)
pytestmark = pytest.mark.unit
_SETTINGS_BASE: dict = {
"api_key": "sk-or-test",
"id_offset": -10_000,
"rpm": 200,
"tpm": 1_000_000,
"free_rpm": 20,
"free_tpm": 100_000,
"anonymous_enabled_paid": False,
"anonymous_enabled_free": True,
"quota_reserve_tokens": 4000,
}
# ---------------------------------------------------------------------------
# _supports_image_input helper (OpenRouter modalities)
# ---------------------------------------------------------------------------
def test_supports_image_input_true_for_multimodal():
assert (
_supports_image_input(
{
"id": "openai/gpt-4o",
"architecture": {
"input_modalities": ["text", "image"],
"output_modalities": ["text"],
},
}
)
is True
)
def test_supports_image_input_false_for_text_only():
"""The exact failure mode the safety net guards against — DeepSeek V3
is a text-in/text-out model and would 404 if forwarded image_url."""
assert (
_supports_image_input(
{
"id": "deepseek/deepseek-v3.2-exp",
"architecture": {
"input_modalities": ["text"],
"output_modalities": ["text"],
},
}
)
is False
)
def test_supports_image_input_false_when_modalities_missing():
"""Defensive: missing architecture is treated as text-only at the
OpenRouter helper level. The wider catalog resolver
(`derive_supports_image_input`) only consults modalities when they
are non-empty, otherwise it falls back to LiteLLM."""
assert _supports_image_input({"id": "weird/model"}) is False
assert _supports_image_input({"id": "weird/model", "architecture": {}}) is False
assert (
_supports_image_input(
{"id": "weird/model", "architecture": {"input_modalities": None}}
)
is False
)
# ---------------------------------------------------------------------------
# _generate_configs threads the flag onto every emitted chat config
# ---------------------------------------------------------------------------
def test_generate_configs_emits_supports_image_input():
raw = [
{
"id": "openai/gpt-4o",
"architecture": {
"input_modalities": ["text", "image"],
"output_modalities": ["text"],
},
"supported_parameters": ["tools"],
"context_length": 200_000,
"pricing": {"prompt": "0.000005", "completion": "0.000015"},
},
{
"id": "deepseek/deepseek-v3.2-exp",
"architecture": {
"input_modalities": ["text"],
"output_modalities": ["text"],
},
"supported_parameters": ["tools"],
"context_length": 200_000,
"pricing": {"prompt": "0.000003", "completion": "0.000015"},
},
]
cfgs = _generate_configs(raw, dict(_SETTINGS_BASE))
by_model = {c["model_name"]: c for c in cfgs}
gpt = by_model["openai/gpt-4o"]
assert gpt["supports_image_input"] is True
assert gpt[_OPENROUTER_DYNAMIC_MARKER] is True
deepseek = by_model["deepseek/deepseek-v3.2-exp"]
assert deepseek["supports_image_input"] is False
assert deepseek[_OPENROUTER_DYNAMIC_MARKER] is True
# ---------------------------------------------------------------------------
# YAML loader: defer to derive_supports_image_input on unannotated entries
# ---------------------------------------------------------------------------
def test_yaml_loader_resolves_unannotated_vision_model_to_true(tmp_path, monkeypatch):
"""The regression case: an Azure GPT-5.x YAML entry without a
``supports_image_input`` override should resolve to True via LiteLLM's
model map (which says ``supports_vision: true``). Previously this
defaulted to False, blocking every image turn for vision-capable
YAML configs."""
yaml_dir = tmp_path / "app" / "config"
yaml_dir.mkdir(parents=True)
(yaml_dir / "global_llm_config.yaml").write_text(
"""
global_llm_configs:
- id: -2
name: Azure GPT-4o
provider: AZURE_OPENAI
model_name: gpt-4o
api_key: sk-test
""",
encoding="utf-8",
)
from app import config as config_module
monkeypatch.setattr(config_module, "BASE_DIR", tmp_path)
configs = config_module.load_global_llm_configs()
assert len(configs) == 1
assert configs[0]["supports_image_input"] is True
def test_yaml_loader_respects_explicit_supports_image_input(tmp_path, monkeypatch):
yaml_dir = tmp_path / "app" / "config"
yaml_dir.mkdir(parents=True)
(yaml_dir / "global_llm_config.yaml").write_text(
"""
global_llm_configs:
- id: -1
name: GPT-4o
provider: OPENAI
model_name: gpt-4o
api_key: sk-test
supports_image_input: false
""",
encoding="utf-8",
)
from app import config as config_module
monkeypatch.setattr(config_module, "BASE_DIR", tmp_path)
configs = config_module.load_global_llm_configs()
assert len(configs) == 1
# Operator override always wins, even against LiteLLM's True.
assert configs[0]["supports_image_input"] is False
def test_yaml_loader_unknown_model_default_allows(tmp_path, monkeypatch):
"""Unknown / unmapped model in YAML: default-allow. The streaming
safety net (which requires an explicit-False from LiteLLM) is the
only place a real block happens, so we don't lock the user out of
a freshly added third-party entry the catalog can't introspect."""
yaml_dir = tmp_path / "app" / "config"
yaml_dir.mkdir(parents=True)
(yaml_dir / "global_llm_config.yaml").write_text(
"""
global_llm_configs:
- id: -1
name: Some Brand New Model
provider: CUSTOM
custom_provider: brand_new_proxy
model_name: brand-new-model-x9
api_key: sk-test
""",
encoding="utf-8",
)
from app import config as config_module
monkeypatch.setattr(config_module, "BASE_DIR", tmp_path)
configs = config_module.load_global_llm_configs()
assert len(configs) == 1
assert configs[0]["supports_image_input"] is True
# ---------------------------------------------------------------------------
# AgentConfig threads the flag through both YAML and Auto / BYOK
# ---------------------------------------------------------------------------
def test_agent_config_from_yaml_explicit_overrides_resolver():
from app.agents.new_chat.llm_config import AgentConfig
cfg_text_only = AgentConfig.from_yaml_config(
{
"id": -1,
"name": "Text Only Override",
"provider": "openai",
"model_name": "gpt-4o", # Capable per LiteLLM, but operator says no.
"api_key": "sk-test",
"supports_image_input": False,
}
)
cfg_explicit_vision = AgentConfig.from_yaml_config(
{
"id": -2,
"name": "GPT-4o",
"provider": "openai",
"model_name": "gpt-4o",
"api_key": "sk-test",
"supports_image_input": True,
}
)
assert cfg_text_only.supports_image_input is False
assert cfg_explicit_vision.supports_image_input is True
def test_agent_config_from_yaml_unannotated_uses_resolver():
"""Without an explicit YAML key, AgentConfig defers to the catalog
resolver for ``gpt-4o`` LiteLLM's map says supports_vision=True."""
from app.agents.new_chat.llm_config import AgentConfig
cfg = AgentConfig.from_yaml_config(
{
"id": -1,
"name": "GPT-4o (no override)",
"provider": "openai",
"model_name": "gpt-4o",
"api_key": "sk-test",
}
)
assert cfg.supports_image_input is True
def test_agent_config_auto_mode_supports_image_input():
"""Auto routes across the pool. We optimistically allow image input
so users can keep their selection on Auto with a vision-capable
deployment somewhere in the pool. The router's own `allowed_fails`
handles non-vision deployments via fallback."""
from app.agents.new_chat.llm_config import AgentConfig
auto = AgentConfig.from_auto_mode()
assert auto.supports_image_input is True

View file

@ -0,0 +1,89 @@
"""Defense-in-depth: vision-LLM resolution must not leak ``api_base``
defaults from ``litellm.api_base`` either.
Vision shares the same shape as image-gen global YAML / OpenRouter
dynamic configs ship ``api_base=""`` and the pre-fix ``get_vision_llm``
call sites would silently drop the empty string and inherit
``AZURE_OPENAI_ENDPOINT``. ``ChatLiteLLM(...)`` doesn't 404 on
construction so we test the kwargs we hand to it instead.
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
pytestmark = pytest.mark.unit
@pytest.mark.asyncio
async def test_get_vision_llm_global_openrouter_sets_api_base():
"""Global negative-ID branch: an OpenRouter vision config with
``api_base=""`` must end up calling ``SanitizedChatLiteLLM`` with
``api_base="https://openrouter.ai/api/v1"`` never an empty string,
never silently absent."""
from app.services import llm_service
cfg = {
"id": -30_001,
"name": "GPT-4o Vision (OpenRouter)",
"provider": "OPENROUTER",
"model_name": "openai/gpt-4o",
"api_key": "sk-or-test",
"api_base": "",
"api_version": None,
"litellm_params": {},
"billing_tier": "free",
}
search_space = MagicMock()
search_space.id = 1
search_space.user_id = "user-x"
search_space.vision_llm_config_id = cfg["id"]
session = AsyncMock()
scalars = MagicMock()
scalars.first.return_value = search_space
result = MagicMock()
result.scalars.return_value = scalars
session.execute.return_value = result
captured: dict = {}
class FakeSanitized:
def __init__(self, **kwargs):
captured.update(kwargs)
with (
patch(
"app.services.vision_llm_router_service.get_global_vision_llm_config",
return_value=cfg,
),
patch(
"app.agents.new_chat.llm_config.SanitizedChatLiteLLM",
new=FakeSanitized,
),
):
await llm_service.get_vision_llm(session=session, search_space_id=1)
assert captured.get("api_base") == "https://openrouter.ai/api/v1"
assert captured["model"] == "openrouter/openai/gpt-4o"
def test_vision_router_deployment_sets_api_base_when_config_empty():
"""Auto-mode vision router: deployments are fed to ``litellm.Router``,
so the resolver has to apply at deployment construction time too."""
from app.services.vision_llm_router_service import VisionLLMRouterService
deployment = VisionLLMRouterService._config_to_deployment(
{
"model_name": "openai/gpt-4o",
"provider": "OPENROUTER",
"api_key": "sk-or-test",
"api_base": "",
}
)
assert deployment is not None
assert deployment["litellm_params"]["api_base"] == "https://openrouter.ai/api/v1"
assert deployment["litellm_params"]["model"] == "openrouter/openai/gpt-4o"