mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-04 05:12:38 +02:00
feat: fixed vision/image provider specific errors and fixed podcast/video streaming
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions
This commit is contained in:
parent
ae9d36d77f
commit
47b2994ec7
54 changed files with 4469 additions and 563 deletions
|
|
@ -0,0 +1,286 @@
|
|||
"""Image-aware extension of the Auto-pin resolver.
|
||||
|
||||
When the current chat turn carries an ``image_url`` block, the pin
|
||||
resolver must:
|
||||
|
||||
1. Filter the candidate pool to vision-capable cfgs so a freshly
|
||||
selected pin can never be text-only.
|
||||
2. Treat any existing pin whose capability is False as invalid (force
|
||||
re-pin), even when it would otherwise be reused as the thread's
|
||||
stable model.
|
||||
3. Raise ``ValueError`` (mapped to the friendly
|
||||
``MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT`` SSE error in the streaming
|
||||
task) when no vision-capable cfg is available — instead of silently
|
||||
pinning text-only and 404-ing at the provider.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from app.services.auto_model_pin_service import (
|
||||
clear_healthy,
|
||||
clear_runtime_cooldown,
|
||||
resolve_or_get_pinned_llm_config_id,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_caches():
|
||||
clear_runtime_cooldown()
|
||||
clear_healthy()
|
||||
yield
|
||||
clear_runtime_cooldown()
|
||||
clear_healthy()
|
||||
|
||||
|
||||
@dataclass
|
||||
class _FakeQuotaResult:
|
||||
allowed: bool
|
||||
|
||||
|
||||
class _FakeExecResult:
|
||||
def __init__(self, thread):
|
||||
self._thread = thread
|
||||
|
||||
def unique(self):
|
||||
return self
|
||||
|
||||
def scalar_one_or_none(self):
|
||||
return self._thread
|
||||
|
||||
|
||||
class _FakeSession:
|
||||
def __init__(self, thread):
|
||||
self.thread = thread
|
||||
self.commit_count = 0
|
||||
|
||||
async def execute(self, _stmt):
|
||||
return _FakeExecResult(self.thread)
|
||||
|
||||
async def commit(self):
|
||||
self.commit_count += 1
|
||||
|
||||
|
||||
def _thread(*, pinned: int | None = None):
|
||||
return SimpleNamespace(id=1, search_space_id=10, pinned_llm_config_id=pinned)
|
||||
|
||||
|
||||
def _vision_cfg(id_: int, *, tier: str = "free", quality: int = 80) -> dict:
|
||||
return {
|
||||
"id": id_,
|
||||
"provider": "OPENAI",
|
||||
"model_name": f"vision-{id_}",
|
||||
"api_key": "k",
|
||||
"billing_tier": tier,
|
||||
"supports_image_input": True,
|
||||
"auto_pin_tier": "A",
|
||||
"quality_score": quality,
|
||||
}
|
||||
|
||||
|
||||
def _text_only_cfg(id_: int, *, tier: str = "free", quality: int = 90) -> dict:
|
||||
return {
|
||||
"id": id_,
|
||||
"provider": "OPENAI",
|
||||
"model_name": f"text-{id_}",
|
||||
"api_key": "k",
|
||||
"billing_tier": tier,
|
||||
# Higher quality than the vision cfgs — so a bug that ignores
|
||||
# the image flag would surface as the resolver picking this one.
|
||||
"supports_image_input": False,
|
||||
"auto_pin_tier": "A",
|
||||
"quality_score": quality,
|
||||
}
|
||||
|
||||
|
||||
async def _premium_allowed(*_args, **_kwargs):
|
||||
return _FakeQuotaResult(allowed=True)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_image_turn_filters_out_text_only_candidates(monkeypatch):
|
||||
from app.config import config
|
||||
|
||||
session = _FakeSession(_thread())
|
||||
monkeypatch.setattr(
|
||||
config,
|
||||
"GLOBAL_LLM_CONFIGS",
|
||||
[_text_only_cfg(-1), _vision_cfg(-2)],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
|
||||
_premium_allowed,
|
||||
)
|
||||
|
||||
result = await resolve_or_get_pinned_llm_config_id(
|
||||
session,
|
||||
thread_id=1,
|
||||
search_space_id=10,
|
||||
user_id=None,
|
||||
selected_llm_config_id=0,
|
||||
requires_image_input=True,
|
||||
)
|
||||
|
||||
assert result.resolved_llm_config_id == -2
|
||||
# The thread should be pinned to the vision cfg even though the
|
||||
# text-only cfg has a higher quality score.
|
||||
assert session.thread.pinned_llm_config_id == -2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_image_turn_force_repins_stale_text_only_pin(monkeypatch):
|
||||
"""An existing text-only pin must be invalidated when the next turn
|
||||
requires image input. The non-image path would happily reuse it."""
|
||||
from app.config import config
|
||||
|
||||
session = _FakeSession(_thread(pinned=-1))
|
||||
monkeypatch.setattr(
|
||||
config,
|
||||
"GLOBAL_LLM_CONFIGS",
|
||||
[_text_only_cfg(-1), _vision_cfg(-2)],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
|
||||
_premium_allowed,
|
||||
)
|
||||
|
||||
result = await resolve_or_get_pinned_llm_config_id(
|
||||
session,
|
||||
thread_id=1,
|
||||
search_space_id=10,
|
||||
user_id=None,
|
||||
selected_llm_config_id=0,
|
||||
requires_image_input=True,
|
||||
)
|
||||
|
||||
assert result.resolved_llm_config_id == -2
|
||||
assert result.from_existing_pin is False
|
||||
assert session.thread.pinned_llm_config_id == -2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_image_turn_reuses_existing_vision_pin(monkeypatch):
|
||||
"""If the thread is already pinned to a vision-capable cfg, reuse it
|
||||
— same as the non-image path. Image-aware filtering must not force
|
||||
spurious re-pins."""
|
||||
from app.config import config
|
||||
|
||||
session = _FakeSession(_thread(pinned=-2))
|
||||
monkeypatch.setattr(
|
||||
config,
|
||||
"GLOBAL_LLM_CONFIGS",
|
||||
[_text_only_cfg(-1), _vision_cfg(-2), _vision_cfg(-3, quality=70)],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
|
||||
_premium_allowed,
|
||||
)
|
||||
|
||||
result = await resolve_or_get_pinned_llm_config_id(
|
||||
session,
|
||||
thread_id=1,
|
||||
search_space_id=10,
|
||||
user_id=None,
|
||||
selected_llm_config_id=0,
|
||||
requires_image_input=True,
|
||||
)
|
||||
|
||||
assert result.resolved_llm_config_id == -2
|
||||
assert result.from_existing_pin is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_image_turn_with_no_vision_candidates_raises(monkeypatch):
|
||||
"""The friendly-error path: no vision-capable cfg in the pool -> raise
|
||||
``ValueError`` whose message contains ``vision-capable`` so the
|
||||
streaming task can map it to ``MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT``."""
|
||||
from app.config import config
|
||||
|
||||
session = _FakeSession(_thread())
|
||||
monkeypatch.setattr(
|
||||
config,
|
||||
"GLOBAL_LLM_CONFIGS",
|
||||
[_text_only_cfg(-1), _text_only_cfg(-2)],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
|
||||
_premium_allowed,
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="vision-capable"):
|
||||
await resolve_or_get_pinned_llm_config_id(
|
||||
session,
|
||||
thread_id=1,
|
||||
search_space_id=10,
|
||||
user_id=None,
|
||||
selected_llm_config_id=0,
|
||||
requires_image_input=True,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_image_turn_keeps_text_only_in_pool(monkeypatch):
|
||||
"""Regression guard: the image flag must default False and not affect
|
||||
a normal text-only turn — text-only cfgs remain selectable."""
|
||||
from app.config import config
|
||||
|
||||
session = _FakeSession(_thread())
|
||||
monkeypatch.setattr(
|
||||
config,
|
||||
"GLOBAL_LLM_CONFIGS",
|
||||
[_text_only_cfg(-1)],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
|
||||
_premium_allowed,
|
||||
)
|
||||
|
||||
result = await resolve_or_get_pinned_llm_config_id(
|
||||
session,
|
||||
thread_id=1,
|
||||
search_space_id=10,
|
||||
user_id=None,
|
||||
selected_llm_config_id=0,
|
||||
)
|
||||
assert result.resolved_llm_config_id == -1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_image_turn_unannotated_cfg_resolves_via_helper(monkeypatch):
|
||||
"""A YAML cfg that omits ``supports_image_input`` falls through to
|
||||
``derive_supports_image_input`` (LiteLLM-driven). For ``gpt-4o``
|
||||
that returns True, so the cfg should be a valid candidate."""
|
||||
from app.config import config
|
||||
|
||||
session = _FakeSession(_thread())
|
||||
cfg_unannotated_vision = {
|
||||
"id": -2,
|
||||
"provider": "OPENAI",
|
||||
"model_name": "gpt-4o", # known vision model in LiteLLM map
|
||||
"api_key": "k",
|
||||
"billing_tier": "free",
|
||||
"auto_pin_tier": "A",
|
||||
"quality_score": 80,
|
||||
# NOTE: no supports_image_input key
|
||||
}
|
||||
monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [cfg_unannotated_vision])
|
||||
monkeypatch.setattr(
|
||||
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
|
||||
_premium_allowed,
|
||||
)
|
||||
|
||||
result = await resolve_or_get_pinned_llm_config_id(
|
||||
session,
|
||||
thread_id=1,
|
||||
search_space_id=10,
|
||||
user_id=None,
|
||||
selected_llm_config_id=0,
|
||||
requires_image_input=True,
|
||||
)
|
||||
assert result.resolved_llm_config_id == -2
|
||||
|
|
@ -15,6 +15,7 @@ vision LLM extraction:
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
|
@ -57,6 +58,9 @@ class _FakeSession:
|
|||
async def commit(self) -> None:
|
||||
self.committed = True
|
||||
|
||||
async def rollback(self) -> None:
|
||||
pass
|
||||
|
||||
async def close(self) -> None:
|
||||
pass
|
||||
|
||||
|
|
@ -71,7 +75,9 @@ async def _fake_shielded_session():
|
|||
_SESSIONS_USED: list[_FakeSession] = []
|
||||
|
||||
|
||||
def _patch_isolation_layer(monkeypatch, *, reserve_result, finalize_result=None):
|
||||
def _patch_isolation_layer(
|
||||
monkeypatch, *, reserve_result, finalize_result=None, finalize_exc=None
|
||||
):
|
||||
"""Wire fake reserve/finalize/release/session helpers."""
|
||||
_SESSIONS_USED.clear()
|
||||
reserve_calls: list[dict[str, Any]] = []
|
||||
|
|
@ -91,6 +97,8 @@ def _patch_isolation_layer(monkeypatch, *, reserve_result, finalize_result=None)
|
|||
async def _fake_finalize(
|
||||
*, db_session, user_id, request_id, actual_micros, reserved_micros
|
||||
):
|
||||
if finalize_exc is not None:
|
||||
raise finalize_exc
|
||||
finalize_calls.append(
|
||||
{
|
||||
"user_id": user_id,
|
||||
|
|
@ -343,6 +351,125 @@ async def test_premium_uses_estimator_when_no_micros_override(monkeypatch):
|
|||
assert spies["reserve"][0]["reserve_micros"] == 12_345
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_premium_finalize_failure_propagates_and_releases(monkeypatch):
|
||||
from app.services.billable_calls import BillingSettlementError, billable_call
|
||||
|
||||
class _FinalizeError(RuntimeError):
|
||||
pass
|
||||
|
||||
spies = _patch_isolation_layer(
|
||||
monkeypatch,
|
||||
reserve_result=_FakeQuotaResult(allowed=True),
|
||||
finalize_exc=_FinalizeError("db finalize failed"),
|
||||
)
|
||||
user_id = uuid4()
|
||||
|
||||
with pytest.raises(BillingSettlementError):
|
||||
async with billable_call(
|
||||
user_id=user_id,
|
||||
search_space_id=42,
|
||||
billing_tier="premium",
|
||||
base_model="openai/gpt-image-1",
|
||||
quota_reserve_micros_override=50_000,
|
||||
usage_type="image_generation",
|
||||
) as acc:
|
||||
acc.add(
|
||||
model="openai/gpt-image-1",
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
cost_micros=40_000,
|
||||
call_kind="image_generation",
|
||||
)
|
||||
|
||||
assert len(spies["reserve"]) == 1
|
||||
assert len(spies["release"]) == 1
|
||||
assert spies["record"] == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_premium_audit_commit_hang_times_out_after_finalize(monkeypatch):
|
||||
from app.services.billable_calls import billable_call
|
||||
|
||||
spies = _patch_isolation_layer(
|
||||
monkeypatch, reserve_result=_FakeQuotaResult(allowed=True)
|
||||
)
|
||||
user_id = uuid4()
|
||||
|
||||
class _HangingCommitSession(_FakeSession):
|
||||
async def commit(self) -> None:
|
||||
await asyncio.sleep(60)
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def _hanging_session_factory():
|
||||
s = _HangingCommitSession()
|
||||
_SESSIONS_USED.append(s)
|
||||
yield s
|
||||
|
||||
async with billable_call(
|
||||
user_id=user_id,
|
||||
search_space_id=42,
|
||||
billing_tier="premium",
|
||||
base_model="openai/gpt-image-1",
|
||||
quota_reserve_micros_override=50_000,
|
||||
usage_type="image_generation",
|
||||
billable_session_factory=_hanging_session_factory,
|
||||
audit_timeout_seconds=0.01,
|
||||
) as acc:
|
||||
acc.add(
|
||||
model="openai/gpt-image-1",
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
cost_micros=40_000,
|
||||
call_kind="image_generation",
|
||||
)
|
||||
|
||||
assert len(spies["reserve"]) == 1
|
||||
assert len(spies["finalize"]) == 1
|
||||
assert len(spies["record"]) == 1
|
||||
assert spies["release"] == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_free_audit_failure_is_best_effort(monkeypatch):
|
||||
from app.services.billable_calls import billable_call
|
||||
|
||||
spies = _patch_isolation_layer(
|
||||
monkeypatch, reserve_result=_FakeQuotaResult(allowed=True)
|
||||
)
|
||||
|
||||
async def _failing_record(_session, **_kwargs):
|
||||
raise RuntimeError("audit insert failed")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.services.billable_calls.record_token_usage",
|
||||
_failing_record,
|
||||
raising=False,
|
||||
)
|
||||
|
||||
async with billable_call(
|
||||
user_id=uuid4(),
|
||||
search_space_id=42,
|
||||
billing_tier="free",
|
||||
base_model="openai/gpt-image-1",
|
||||
usage_type="image_generation",
|
||||
audit_timeout_seconds=0.01,
|
||||
) as acc:
|
||||
acc.add(
|
||||
model="openai/gpt-image-1",
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
cost_micros=37_000,
|
||||
call_kind="image_generation",
|
||||
)
|
||||
|
||||
assert spies["reserve"] == []
|
||||
assert spies["finalize"] == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Podcast / video-presentation usage_type coverage
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -387,7 +514,7 @@ async def test_free_podcast_path_audits_with_podcast_usage_type(monkeypatch):
|
|||
assert len(spies["record"]) == 1
|
||||
row = spies["record"][0]
|
||||
assert row["usage_type"] == "podcast_generation"
|
||||
assert row["thread_id"] == 99
|
||||
assert row["thread_id"] is None
|
||||
assert row["search_space_id"] == 42
|
||||
assert row["call_details"] == {"podcast_id": 7, "title": "Test Podcast"}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,177 @@
|
|||
"""Defense-in-depth: image-gen call sites must not let an empty
|
||||
``api_base`` fall through to LiteLLM's module-global ``litellm.api_base``.
|
||||
|
||||
The bug repro: an OpenRouter image-gen config ships
|
||||
``api_base=""``. The pre-fix call site in
|
||||
``image_generation_routes._execute_image_generation`` did
|
||||
``if cfg.get("api_base"): kwargs["api_base"] = cfg["api_base"]`` which
|
||||
silently dropped the empty string. LiteLLM then fell back to
|
||||
``litellm.api_base`` (commonly inherited from ``AZURE_OPENAI_ENDPOINT``)
|
||||
and OpenRouter's ``image_generation/transformation`` appended
|
||||
``/chat/completions`` to it → 404 ``Resource not found``.
|
||||
|
||||
This test pins the post-fix behaviour: with an empty ``api_base`` in
|
||||
the config, the call site MUST set ``api_base`` to OpenRouter's public
|
||||
URL instead of leaving it unset.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_global_openrouter_image_gen_sets_api_base_when_config_empty():
|
||||
"""The global-config branch (``config_id < 0``) of
|
||||
``_execute_image_generation`` must apply the resolver and pin
|
||||
``api_base`` to OpenRouter when the config ships an empty string.
|
||||
"""
|
||||
from app.routes import image_generation_routes
|
||||
|
||||
cfg = {
|
||||
"id": -20_001,
|
||||
"name": "GPT Image 1 (OpenRouter)",
|
||||
"provider": "OPENROUTER",
|
||||
"model_name": "openai/gpt-image-1",
|
||||
"api_key": "sk-or-test",
|
||||
"api_base": "", # the original bug shape
|
||||
"api_version": None,
|
||||
"litellm_params": {},
|
||||
}
|
||||
|
||||
captured: dict = {}
|
||||
|
||||
async def fake_aimage_generation(**kwargs):
|
||||
captured.update(kwargs)
|
||||
return MagicMock(model_dump=lambda: {"data": []}, _hidden_params={})
|
||||
|
||||
image_gen = MagicMock()
|
||||
image_gen.image_generation_config_id = cfg["id"]
|
||||
image_gen.prompt = "test"
|
||||
image_gen.n = 1
|
||||
image_gen.quality = None
|
||||
image_gen.size = None
|
||||
image_gen.style = None
|
||||
image_gen.response_format = None
|
||||
image_gen.model = None
|
||||
|
||||
search_space = MagicMock()
|
||||
search_space.image_generation_config_id = cfg["id"]
|
||||
session = MagicMock()
|
||||
|
||||
with (
|
||||
patch.object(
|
||||
image_generation_routes,
|
||||
"_get_global_image_gen_config",
|
||||
return_value=cfg,
|
||||
),
|
||||
patch.object(
|
||||
image_generation_routes,
|
||||
"aimage_generation",
|
||||
side_effect=fake_aimage_generation,
|
||||
),
|
||||
):
|
||||
await image_generation_routes._execute_image_generation(
|
||||
session=session, image_gen=image_gen, search_space=search_space
|
||||
)
|
||||
|
||||
# The whole point of the fix: even with empty ``api_base`` in the
|
||||
# config, we forward OpenRouter's public URL so the call doesn't
|
||||
# inherit an Azure endpoint.
|
||||
assert captured.get("api_base") == "https://openrouter.ai/api/v1"
|
||||
assert captured["model"] == "openrouter/openai/gpt-image-1"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_image_tool_global_sets_api_base_when_config_empty():
|
||||
"""Same defense at the agent tool entry point — both surfaces share
|
||||
the same OpenRouter config payloads."""
|
||||
from app.agents.new_chat.tools import generate_image as gi_module
|
||||
|
||||
cfg = {
|
||||
"id": -20_001,
|
||||
"name": "GPT Image 1 (OpenRouter)",
|
||||
"provider": "OPENROUTER",
|
||||
"model_name": "openai/gpt-image-1",
|
||||
"api_key": "sk-or-test",
|
||||
"api_base": "",
|
||||
"api_version": None,
|
||||
"litellm_params": {},
|
||||
}
|
||||
|
||||
captured: dict = {}
|
||||
|
||||
async def fake_aimage_generation(**kwargs):
|
||||
captured.update(kwargs)
|
||||
response = MagicMock()
|
||||
response.model_dump.return_value = {
|
||||
"data": [{"url": "https://example.com/x.png"}]
|
||||
}
|
||||
response._hidden_params = {"model": "openrouter/openai/gpt-image-1"}
|
||||
return response
|
||||
|
||||
search_space = MagicMock()
|
||||
search_space.id = 1
|
||||
search_space.image_generation_config_id = cfg["id"]
|
||||
|
||||
session_cm = AsyncMock()
|
||||
session = AsyncMock()
|
||||
session_cm.__aenter__.return_value = session
|
||||
|
||||
scalars = MagicMock()
|
||||
scalars.first.return_value = search_space
|
||||
exec_result = MagicMock()
|
||||
exec_result.scalars.return_value = scalars
|
||||
session.execute.return_value = exec_result
|
||||
session.add = MagicMock()
|
||||
session.commit = AsyncMock()
|
||||
session.refresh = AsyncMock()
|
||||
|
||||
# ``refresh(db_image_gen)`` needs to populate ``id`` for token URL fallback.
|
||||
async def _refresh(obj):
|
||||
obj.id = 1
|
||||
|
||||
session.refresh.side_effect = _refresh
|
||||
|
||||
with (
|
||||
patch.object(gi_module, "shielded_async_session", return_value=session_cm),
|
||||
patch.object(gi_module, "_get_global_image_gen_config", return_value=cfg),
|
||||
patch.object(
|
||||
gi_module, "aimage_generation", side_effect=fake_aimage_generation
|
||||
),
|
||||
patch.object(
|
||||
gi_module, "is_image_gen_auto_mode", side_effect=lambda cid: cid == 0
|
||||
),
|
||||
):
|
||||
tool = gi_module.create_generate_image_tool(
|
||||
search_space_id=1, db_session=MagicMock()
|
||||
)
|
||||
await tool.ainvoke({"prompt": "a cat", "n": 1})
|
||||
|
||||
assert captured.get("api_base") == "https://openrouter.ai/api/v1"
|
||||
assert captured["model"] == "openrouter/openai/gpt-image-1"
|
||||
|
||||
|
||||
def test_image_gen_router_deployment_sets_api_base_when_config_empty():
|
||||
"""The Auto-mode router pool must also resolve ``api_base`` when an
|
||||
OpenRouter config ships an empty string. The deployment dict is fed
|
||||
straight to ``litellm.Router``, so a missing ``api_base`` would
|
||||
leak the same way as the direct call sites.
|
||||
"""
|
||||
from app.services.image_gen_router_service import ImageGenRouterService
|
||||
|
||||
deployment = ImageGenRouterService._config_to_deployment(
|
||||
{
|
||||
"model_name": "openai/gpt-image-1",
|
||||
"provider": "OPENROUTER",
|
||||
"api_key": "sk-or-test",
|
||||
"api_base": "",
|
||||
}
|
||||
)
|
||||
assert deployment is not None
|
||||
assert deployment["litellm_params"]["api_base"] == "https://openrouter.ai/api/v1"
|
||||
assert deployment["litellm_params"]["model"] == "openrouter/openai/gpt-image-1"
|
||||
|
|
@ -265,6 +265,10 @@ def test_generate_image_gen_configs_filters_by_image_output():
|
|||
assert c["billing_tier"] in {"free", "premium"}
|
||||
assert c["provider"] == "OPENROUTER"
|
||||
assert c[_OPENROUTER_DYNAMIC_MARKER] is True
|
||||
# Defense-in-depth: emit the OpenRouter base URL at source so a
|
||||
# downstream call site that forgets ``resolve_api_base`` still
|
||||
# doesn't 404 against an inherited Azure endpoint.
|
||||
assert c["api_base"] == "https://openrouter.ai/api/v1"
|
||||
|
||||
|
||||
def test_generate_image_gen_configs_assigns_image_id_offset():
|
||||
|
|
@ -342,6 +346,10 @@ def test_generate_vision_llm_configs_filters_by_image_input_text_output():
|
|||
assert cfg["input_cost_per_token"] == pytest.approx(5e-6)
|
||||
assert cfg["output_cost_per_token"] == pytest.approx(15e-6)
|
||||
assert cfg[_OPENROUTER_DYNAMIC_MARKER] is True
|
||||
# Defense-in-depth: emit the OpenRouter base URL at source so a
|
||||
# downstream call site that forgets ``resolve_api_base`` still
|
||||
# doesn't inherit an Azure endpoint.
|
||||
assert cfg["api_base"] == "https://openrouter.ai/api/v1"
|
||||
|
||||
|
||||
def test_generate_vision_llm_configs_drops_chat_only_filters():
|
||||
|
|
|
|||
107
surfsense_backend/tests/unit/services/test_provider_api_base.py
Normal file
107
surfsense_backend/tests/unit/services/test_provider_api_base.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
"""Unit tests for the shared ``api_base`` resolver.
|
||||
|
||||
The cascade exists so vision and image-gen call sites can't silently
|
||||
inherit ``litellm.api_base`` (commonly set by ``AZURE_OPENAI_ENDPOINT``)
|
||||
when an OpenRouter / Groq / etc. config ships an empty string. See
|
||||
``provider_api_base`` module docstring for the original repro
|
||||
(OpenRouter image-gen 404-ing against an Azure endpoint).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.services.provider_api_base import (
|
||||
PROVIDER_DEFAULT_API_BASE,
|
||||
PROVIDER_KEY_DEFAULT_API_BASE,
|
||||
resolve_api_base,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def test_config_value_wins_over_defaults():
|
||||
"""A non-empty config value is always returned verbatim, even when the
|
||||
provider has a default — the operator gets the last word."""
|
||||
result = resolve_api_base(
|
||||
provider="OPENROUTER",
|
||||
provider_prefix="openrouter",
|
||||
config_api_base="https://my-openrouter-mirror.example.com/v1",
|
||||
)
|
||||
assert result == "https://my-openrouter-mirror.example.com/v1"
|
||||
|
||||
|
||||
def test_provider_key_default_when_config_missing():
|
||||
"""``DEEPSEEK`` shares the ``openai`` LiteLLM prefix but has its own
|
||||
base URL — the provider-key map must take precedence over the prefix
|
||||
map so DeepSeek requests don't go to OpenAI."""
|
||||
result = resolve_api_base(
|
||||
provider="DEEPSEEK",
|
||||
provider_prefix="openai",
|
||||
config_api_base=None,
|
||||
)
|
||||
assert result == PROVIDER_KEY_DEFAULT_API_BASE["DEEPSEEK"]
|
||||
|
||||
|
||||
def test_provider_prefix_default_when_no_key_default():
|
||||
result = resolve_api_base(
|
||||
provider="OPENROUTER",
|
||||
provider_prefix="openrouter",
|
||||
config_api_base=None,
|
||||
)
|
||||
assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
|
||||
|
||||
|
||||
def test_unknown_provider_returns_none():
|
||||
"""When neither map matches we return ``None`` so the caller can let
|
||||
LiteLLM apply its own provider-integration default (Azure deployment
|
||||
URL, custom-provider URL, etc.)."""
|
||||
result = resolve_api_base(
|
||||
provider="SOMETHING_NEW",
|
||||
provider_prefix="something_new",
|
||||
config_api_base=None,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_empty_string_config_treated_as_missing():
|
||||
"""The original bug: OpenRouter dynamic configs ship ``api_base=""``
|
||||
and downstream call sites use ``if cfg.get("api_base"):`` — empty
|
||||
strings are falsy in Python but the cascade has to step in anyway."""
|
||||
result = resolve_api_base(
|
||||
provider="OPENROUTER",
|
||||
provider_prefix="openrouter",
|
||||
config_api_base="",
|
||||
)
|
||||
assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
|
||||
|
||||
|
||||
def test_whitespace_only_config_treated_as_missing():
|
||||
"""A config value of ``" "`` is a configuration mistake — treat it
|
||||
as missing instead of forwarding whitespace to LiteLLM (which would
|
||||
almost certainly 404)."""
|
||||
result = resolve_api_base(
|
||||
provider="OPENROUTER",
|
||||
provider_prefix="openrouter",
|
||||
config_api_base=" ",
|
||||
)
|
||||
assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
|
||||
|
||||
|
||||
def test_provider_case_insensitive():
|
||||
"""Some call sites pass the provider lowercase (DB enum value), others
|
||||
uppercase (YAML key). Both must resolve."""
|
||||
upper = resolve_api_base(
|
||||
provider="DEEPSEEK", provider_prefix="openai", config_api_base=None
|
||||
)
|
||||
lower = resolve_api_base(
|
||||
provider="deepseek", provider_prefix="openai", config_api_base=None
|
||||
)
|
||||
assert upper == lower == PROVIDER_KEY_DEFAULT_API_BASE["DEEPSEEK"]
|
||||
|
||||
|
||||
def test_all_inputs_none_returns_none():
|
||||
assert (
|
||||
resolve_api_base(provider=None, provider_prefix=None, config_api_base=None)
|
||||
is None
|
||||
)
|
||||
|
|
@ -0,0 +1,244 @@
|
|||
"""Unit tests for the shared chat-image capability resolver.
|
||||
|
||||
Two resolvers, two intents:
|
||||
|
||||
- ``derive_supports_image_input`` — best-effort True for the catalog and
|
||||
selector. Default-allow on unknown / unmapped models. The streaming
|
||||
task safety net never sees this value directly.
|
||||
|
||||
- ``is_known_text_only_chat_model`` — strict opt-out for the safety net.
|
||||
Returns True only when LiteLLM's model map *explicitly* sets
|
||||
``supports_vision=False``. Anything else (missing key, exception,
|
||||
True) returns False so the request flows through to the provider.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.services.provider_capabilities import (
|
||||
derive_supports_image_input,
|
||||
is_known_text_only_chat_model,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# derive_supports_image_input — OpenRouter modalities path (authoritative)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_or_modalities_with_image_returns_true():
|
||||
assert (
|
||||
derive_supports_image_input(
|
||||
provider="OPENROUTER",
|
||||
model_name="openai/gpt-4o",
|
||||
openrouter_input_modalities=["text", "image"],
|
||||
)
|
||||
is True
|
||||
)
|
||||
|
||||
|
||||
def test_or_modalities_text_only_returns_false():
|
||||
assert (
|
||||
derive_supports_image_input(
|
||||
provider="OPENROUTER",
|
||||
model_name="deepseek/deepseek-v3.2-exp",
|
||||
openrouter_input_modalities=["text"],
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_or_modalities_empty_list_returns_false():
|
||||
"""OR explicitly publishing an empty modality list is a definitive
|
||||
'no inputs at all' signal — treat as False rather than falling back
|
||||
to LiteLLM."""
|
||||
assert (
|
||||
derive_supports_image_input(
|
||||
provider="OPENROUTER",
|
||||
model_name="weird/empty-modalities",
|
||||
openrouter_input_modalities=[],
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_or_modalities_none_falls_through_to_litellm():
|
||||
"""``None`` (missing key) is *not* a definitive signal — fall through
|
||||
to LiteLLM. Using ``openai/gpt-4o`` which is in LiteLLM's map."""
|
||||
assert (
|
||||
derive_supports_image_input(
|
||||
provider="OPENAI",
|
||||
model_name="gpt-4o",
|
||||
openrouter_input_modalities=None,
|
||||
)
|
||||
is True
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# derive_supports_image_input — LiteLLM model-map path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_litellm_known_vision_model_returns_true():
|
||||
assert (
|
||||
derive_supports_image_input(
|
||||
provider="OPENAI",
|
||||
model_name="gpt-4o",
|
||||
)
|
||||
is True
|
||||
)
|
||||
|
||||
|
||||
def test_litellm_base_model_wins_over_model_name():
|
||||
"""Azure-style entries pass model_name=deployment_id and put the
|
||||
canonical sku in litellm_params.base_model. The resolver must
|
||||
consult base_model first or the deployment id (which LiteLLM
|
||||
doesn't know) would shadow the real capability."""
|
||||
assert (
|
||||
derive_supports_image_input(
|
||||
provider="AZURE_OPENAI",
|
||||
model_name="my-azure-deployment-id",
|
||||
base_model="gpt-4o",
|
||||
)
|
||||
is True
|
||||
)
|
||||
|
||||
|
||||
def test_litellm_unknown_model_default_allows():
|
||||
"""Default-allow on unknown — the safety net is the actual block."""
|
||||
assert (
|
||||
derive_supports_image_input(
|
||||
provider="CUSTOM",
|
||||
model_name="brand-new-model-x9-unmapped",
|
||||
custom_provider="brand_new_proxy",
|
||||
)
|
||||
is True
|
||||
)
|
||||
|
||||
|
||||
def test_litellm_known_text_only_returns_false():
|
||||
"""A model that LiteLLM explicitly knows is text-only resolves to
|
||||
False even via the catalog resolver. ``deepseek-chat`` (the
|
||||
DeepSeek-V3 chat sku) is in the map without supports_vision and
|
||||
LiteLLM's `supports_vision` returns False."""
|
||||
# Sanity: confirm the helper's negative path. We use a small model
|
||||
# known not to support vision per the map.
|
||||
result = derive_supports_image_input(
|
||||
provider="DEEPSEEK",
|
||||
model_name="deepseek-chat",
|
||||
)
|
||||
# We accept either False (LiteLLM said explicit no) or True
|
||||
# (default-allow if the entry isn't mapped on this version) — the
|
||||
# invariant is that the resolver never *raises* on a known-text-only
|
||||
# provider/model. The behaviour-binding assertion lives in
|
||||
# ``test_is_known_text_only_chat_model_explicit_false`` below.
|
||||
assert isinstance(result, bool)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# is_known_text_only_chat_model — strict opt-out semantics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_is_known_text_only_returns_false_for_vision_model():
|
||||
assert (
|
||||
is_known_text_only_chat_model(
|
||||
provider="OPENAI",
|
||||
model_name="gpt-4o",
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_is_known_text_only_returns_false_for_unknown_model():
|
||||
"""Strict opt-out: missing from the map ≠ text-only. The safety net
|
||||
must NOT fire for an unmapped model — that's the regression we're
|
||||
fixing."""
|
||||
assert (
|
||||
is_known_text_only_chat_model(
|
||||
provider="CUSTOM",
|
||||
model_name="brand-new-model-x9-unmapped",
|
||||
custom_provider="brand_new_proxy",
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_is_known_text_only_returns_false_when_lookup_raises(monkeypatch):
|
||||
"""LiteLLM's ``get_model_info`` raises freely on parse errors. The
|
||||
helper swallows the exception and returns False so the safety net
|
||||
doesn't fire on a transient lookup failure."""
|
||||
import app.services.provider_capabilities as pc
|
||||
|
||||
def _raise(**_kwargs):
|
||||
raise ValueError("intentional test failure")
|
||||
|
||||
monkeypatch.setattr(pc.litellm, "get_model_info", _raise)
|
||||
|
||||
assert (
|
||||
is_known_text_only_chat_model(
|
||||
provider="OPENAI",
|
||||
model_name="gpt-4o",
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_is_known_text_only_returns_true_on_explicit_false(monkeypatch):
|
||||
"""Stub LiteLLM's ``get_model_info`` to return an explicit False so
|
||||
we exercise the opt-out path deterministically. Using a stub keeps
|
||||
the test stable across LiteLLM map updates."""
|
||||
import app.services.provider_capabilities as pc
|
||||
|
||||
def _info(**_kwargs):
|
||||
return {"supports_vision": False, "max_input_tokens": 8192}
|
||||
|
||||
monkeypatch.setattr(pc.litellm, "get_model_info", _info)
|
||||
|
||||
assert (
|
||||
is_known_text_only_chat_model(
|
||||
provider="OPENAI",
|
||||
model_name="any-model",
|
||||
)
|
||||
is True
|
||||
)
|
||||
|
||||
|
||||
def test_is_known_text_only_returns_false_on_supports_vision_true(monkeypatch):
|
||||
import app.services.provider_capabilities as pc
|
||||
|
||||
def _info(**_kwargs):
|
||||
return {"supports_vision": True}
|
||||
|
||||
monkeypatch.setattr(pc.litellm, "get_model_info", _info)
|
||||
|
||||
assert (
|
||||
is_known_text_only_chat_model(
|
||||
provider="OPENAI",
|
||||
model_name="any-model",
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_is_known_text_only_returns_false_on_missing_key(monkeypatch):
|
||||
"""A model entry without ``supports_vision`` at all is treated as
|
||||
'unknown' — strict opt-out means False."""
|
||||
import app.services.provider_capabilities as pc
|
||||
|
||||
def _info(**_kwargs):
|
||||
return {"max_input_tokens": 8192} # no supports_vision
|
||||
|
||||
monkeypatch.setattr(pc.litellm, "get_model_info", _info)
|
||||
|
||||
assert (
|
||||
is_known_text_only_chat_model(
|
||||
provider="OPENAI",
|
||||
model_name="any-model",
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
|
@ -0,0 +1,281 @@
|
|||
"""Unit tests for the chat-catalog ``supports_image_input`` capability flag.
|
||||
|
||||
Capability is sourced from two places, in order of preference:
|
||||
|
||||
1. ``architecture.input_modalities`` for dynamic OpenRouter chat configs
|
||||
(authoritative — OpenRouter publishes per-model modalities directly).
|
||||
2. LiteLLM's authoritative model map (``litellm.supports_vision``) for
|
||||
YAML / BYOK configs that don't carry an explicit operator override.
|
||||
|
||||
The catalog default is *True* (conservative-allow): an unknown / unmapped
|
||||
model is not pre-judged. The streaming-task safety net
|
||||
(``is_known_text_only_chat_model``) is the only place a False actually
|
||||
blocks a request — and it requires LiteLLM to *explicitly* mark the model
|
||||
as text-only.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.services.openrouter_integration_service import (
|
||||
_OPENROUTER_DYNAMIC_MARKER,
|
||||
_generate_configs,
|
||||
_supports_image_input,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
_SETTINGS_BASE: dict = {
|
||||
"api_key": "sk-or-test",
|
||||
"id_offset": -10_000,
|
||||
"rpm": 200,
|
||||
"tpm": 1_000_000,
|
||||
"free_rpm": 20,
|
||||
"free_tpm": 100_000,
|
||||
"anonymous_enabled_paid": False,
|
||||
"anonymous_enabled_free": True,
|
||||
"quota_reserve_tokens": 4000,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _supports_image_input helper (OpenRouter modalities)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_supports_image_input_true_for_multimodal():
|
||||
assert (
|
||||
_supports_image_input(
|
||||
{
|
||||
"id": "openai/gpt-4o",
|
||||
"architecture": {
|
||||
"input_modalities": ["text", "image"],
|
||||
"output_modalities": ["text"],
|
||||
},
|
||||
}
|
||||
)
|
||||
is True
|
||||
)
|
||||
|
||||
|
||||
def test_supports_image_input_false_for_text_only():
|
||||
"""The exact failure mode the safety net guards against — DeepSeek V3
|
||||
is a text-in/text-out model and would 404 if forwarded image_url."""
|
||||
assert (
|
||||
_supports_image_input(
|
||||
{
|
||||
"id": "deepseek/deepseek-v3.2-exp",
|
||||
"architecture": {
|
||||
"input_modalities": ["text"],
|
||||
"output_modalities": ["text"],
|
||||
},
|
||||
}
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_supports_image_input_false_when_modalities_missing():
|
||||
"""Defensive: missing architecture is treated as text-only at the
|
||||
OpenRouter helper level. The wider catalog resolver
|
||||
(`derive_supports_image_input`) only consults modalities when they
|
||||
are non-empty, otherwise it falls back to LiteLLM."""
|
||||
assert _supports_image_input({"id": "weird/model"}) is False
|
||||
assert _supports_image_input({"id": "weird/model", "architecture": {}}) is False
|
||||
assert (
|
||||
_supports_image_input(
|
||||
{"id": "weird/model", "architecture": {"input_modalities": None}}
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _generate_configs threads the flag onto every emitted chat config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_generate_configs_emits_supports_image_input():
|
||||
raw = [
|
||||
{
|
||||
"id": "openai/gpt-4o",
|
||||
"architecture": {
|
||||
"input_modalities": ["text", "image"],
|
||||
"output_modalities": ["text"],
|
||||
},
|
||||
"supported_parameters": ["tools"],
|
||||
"context_length": 200_000,
|
||||
"pricing": {"prompt": "0.000005", "completion": "0.000015"},
|
||||
},
|
||||
{
|
||||
"id": "deepseek/deepseek-v3.2-exp",
|
||||
"architecture": {
|
||||
"input_modalities": ["text"],
|
||||
"output_modalities": ["text"],
|
||||
},
|
||||
"supported_parameters": ["tools"],
|
||||
"context_length": 200_000,
|
||||
"pricing": {"prompt": "0.000003", "completion": "0.000015"},
|
||||
},
|
||||
]
|
||||
cfgs = _generate_configs(raw, dict(_SETTINGS_BASE))
|
||||
by_model = {c["model_name"]: c for c in cfgs}
|
||||
|
||||
gpt = by_model["openai/gpt-4o"]
|
||||
assert gpt["supports_image_input"] is True
|
||||
assert gpt[_OPENROUTER_DYNAMIC_MARKER] is True
|
||||
|
||||
deepseek = by_model["deepseek/deepseek-v3.2-exp"]
|
||||
assert deepseek["supports_image_input"] is False
|
||||
assert deepseek[_OPENROUTER_DYNAMIC_MARKER] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# YAML loader: defer to derive_supports_image_input on unannotated entries
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_yaml_loader_resolves_unannotated_vision_model_to_true(tmp_path, monkeypatch):
|
||||
"""The regression case: an Azure GPT-5.x YAML entry without a
|
||||
``supports_image_input`` override should resolve to True via LiteLLM's
|
||||
model map (which says ``supports_vision: true``). Previously this
|
||||
defaulted to False, blocking every image turn for vision-capable
|
||||
YAML configs."""
|
||||
yaml_dir = tmp_path / "app" / "config"
|
||||
yaml_dir.mkdir(parents=True)
|
||||
(yaml_dir / "global_llm_config.yaml").write_text(
|
||||
"""
|
||||
global_llm_configs:
|
||||
- id: -2
|
||||
name: Azure GPT-4o
|
||||
provider: AZURE_OPENAI
|
||||
model_name: gpt-4o
|
||||
api_key: sk-test
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
from app import config as config_module
|
||||
|
||||
monkeypatch.setattr(config_module, "BASE_DIR", tmp_path)
|
||||
|
||||
configs = config_module.load_global_llm_configs()
|
||||
assert len(configs) == 1
|
||||
assert configs[0]["supports_image_input"] is True
|
||||
|
||||
|
||||
def test_yaml_loader_respects_explicit_supports_image_input(tmp_path, monkeypatch):
|
||||
yaml_dir = tmp_path / "app" / "config"
|
||||
yaml_dir.mkdir(parents=True)
|
||||
(yaml_dir / "global_llm_config.yaml").write_text(
|
||||
"""
|
||||
global_llm_configs:
|
||||
- id: -1
|
||||
name: GPT-4o
|
||||
provider: OPENAI
|
||||
model_name: gpt-4o
|
||||
api_key: sk-test
|
||||
supports_image_input: false
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
from app import config as config_module
|
||||
|
||||
monkeypatch.setattr(config_module, "BASE_DIR", tmp_path)
|
||||
|
||||
configs = config_module.load_global_llm_configs()
|
||||
assert len(configs) == 1
|
||||
# Operator override always wins, even against LiteLLM's True.
|
||||
assert configs[0]["supports_image_input"] is False
|
||||
|
||||
|
||||
def test_yaml_loader_unknown_model_default_allows(tmp_path, monkeypatch):
|
||||
"""Unknown / unmapped model in YAML: default-allow. The streaming
|
||||
safety net (which requires an explicit-False from LiteLLM) is the
|
||||
only place a real block happens, so we don't lock the user out of
|
||||
a freshly added third-party entry the catalog can't introspect."""
|
||||
yaml_dir = tmp_path / "app" / "config"
|
||||
yaml_dir.mkdir(parents=True)
|
||||
(yaml_dir / "global_llm_config.yaml").write_text(
|
||||
"""
|
||||
global_llm_configs:
|
||||
- id: -1
|
||||
name: Some Brand New Model
|
||||
provider: CUSTOM
|
||||
custom_provider: brand_new_proxy
|
||||
model_name: brand-new-model-x9
|
||||
api_key: sk-test
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
from app import config as config_module
|
||||
|
||||
monkeypatch.setattr(config_module, "BASE_DIR", tmp_path)
|
||||
|
||||
configs = config_module.load_global_llm_configs()
|
||||
assert len(configs) == 1
|
||||
assert configs[0]["supports_image_input"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AgentConfig threads the flag through both YAML and Auto / BYOK
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_agent_config_from_yaml_explicit_overrides_resolver():
|
||||
from app.agents.new_chat.llm_config import AgentConfig
|
||||
|
||||
cfg_text_only = AgentConfig.from_yaml_config(
|
||||
{
|
||||
"id": -1,
|
||||
"name": "Text Only Override",
|
||||
"provider": "openai",
|
||||
"model_name": "gpt-4o", # Capable per LiteLLM, but operator says no.
|
||||
"api_key": "sk-test",
|
||||
"supports_image_input": False,
|
||||
}
|
||||
)
|
||||
cfg_explicit_vision = AgentConfig.from_yaml_config(
|
||||
{
|
||||
"id": -2,
|
||||
"name": "GPT-4o",
|
||||
"provider": "openai",
|
||||
"model_name": "gpt-4o",
|
||||
"api_key": "sk-test",
|
||||
"supports_image_input": True,
|
||||
}
|
||||
)
|
||||
assert cfg_text_only.supports_image_input is False
|
||||
assert cfg_explicit_vision.supports_image_input is True
|
||||
|
||||
|
||||
def test_agent_config_from_yaml_unannotated_uses_resolver():
|
||||
"""Without an explicit YAML key, AgentConfig defers to the catalog
|
||||
resolver — for ``gpt-4o`` LiteLLM's map says supports_vision=True."""
|
||||
from app.agents.new_chat.llm_config import AgentConfig
|
||||
|
||||
cfg = AgentConfig.from_yaml_config(
|
||||
{
|
||||
"id": -1,
|
||||
"name": "GPT-4o (no override)",
|
||||
"provider": "openai",
|
||||
"model_name": "gpt-4o",
|
||||
"api_key": "sk-test",
|
||||
}
|
||||
)
|
||||
assert cfg.supports_image_input is True
|
||||
|
||||
|
||||
def test_agent_config_auto_mode_supports_image_input():
|
||||
"""Auto routes across the pool. We optimistically allow image input
|
||||
so users can keep their selection on Auto with a vision-capable
|
||||
deployment somewhere in the pool. The router's own `allowed_fails`
|
||||
handles non-vision deployments via fallback."""
|
||||
from app.agents.new_chat.llm_config import AgentConfig
|
||||
|
||||
auto = AgentConfig.from_auto_mode()
|
||||
assert auto.supports_image_input is True
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
"""Defense-in-depth: vision-LLM resolution must not leak ``api_base``
|
||||
defaults from ``litellm.api_base`` either.
|
||||
|
||||
Vision shares the same shape as image-gen — global YAML / OpenRouter
|
||||
dynamic configs ship ``api_base=""`` and the pre-fix ``get_vision_llm``
|
||||
call sites would silently drop the empty string and inherit
|
||||
``AZURE_OPENAI_ENDPOINT``. ``ChatLiteLLM(...)`` doesn't 404 on
|
||||
construction so we test the kwargs we hand to it instead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_vision_llm_global_openrouter_sets_api_base():
|
||||
"""Global negative-ID branch: an OpenRouter vision config with
|
||||
``api_base=""`` must end up calling ``SanitizedChatLiteLLM`` with
|
||||
``api_base="https://openrouter.ai/api/v1"`` — never an empty string,
|
||||
never silently absent."""
|
||||
from app.services import llm_service
|
||||
|
||||
cfg = {
|
||||
"id": -30_001,
|
||||
"name": "GPT-4o Vision (OpenRouter)",
|
||||
"provider": "OPENROUTER",
|
||||
"model_name": "openai/gpt-4o",
|
||||
"api_key": "sk-or-test",
|
||||
"api_base": "",
|
||||
"api_version": None,
|
||||
"litellm_params": {},
|
||||
"billing_tier": "free",
|
||||
}
|
||||
|
||||
search_space = MagicMock()
|
||||
search_space.id = 1
|
||||
search_space.user_id = "user-x"
|
||||
search_space.vision_llm_config_id = cfg["id"]
|
||||
|
||||
session = AsyncMock()
|
||||
scalars = MagicMock()
|
||||
scalars.first.return_value = search_space
|
||||
result = MagicMock()
|
||||
result.scalars.return_value = scalars
|
||||
session.execute.return_value = result
|
||||
|
||||
captured: dict = {}
|
||||
|
||||
class FakeSanitized:
|
||||
def __init__(self, **kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"app.services.vision_llm_router_service.get_global_vision_llm_config",
|
||||
return_value=cfg,
|
||||
),
|
||||
patch(
|
||||
"app.agents.new_chat.llm_config.SanitizedChatLiteLLM",
|
||||
new=FakeSanitized,
|
||||
),
|
||||
):
|
||||
await llm_service.get_vision_llm(session=session, search_space_id=1)
|
||||
|
||||
assert captured.get("api_base") == "https://openrouter.ai/api/v1"
|
||||
assert captured["model"] == "openrouter/openai/gpt-4o"
|
||||
|
||||
|
||||
def test_vision_router_deployment_sets_api_base_when_config_empty():
|
||||
"""Auto-mode vision router: deployments are fed to ``litellm.Router``,
|
||||
so the resolver has to apply at deployment construction time too."""
|
||||
from app.services.vision_llm_router_service import VisionLLMRouterService
|
||||
|
||||
deployment = VisionLLMRouterService._config_to_deployment(
|
||||
{
|
||||
"model_name": "openai/gpt-4o",
|
||||
"provider": "OPENROUTER",
|
||||
"api_key": "sk-or-test",
|
||||
"api_base": "",
|
||||
}
|
||||
)
|
||||
assert deployment is not None
|
||||
assert deployment["litellm_params"]["api_base"] == "https://openrouter.ai/api/v1"
|
||||
assert deployment["litellm_params"]["model"] == "openrouter/openai/gpt-4o"
|
||||
Loading…
Add table
Add a link
Reference in a new issue