feat(database-migrations): add migration to remove legacy model config tables and remove stale model connection code

This commit is contained in:
Anish Sarkar 2026-06-13 12:45:43 +05:30
parent 50668775f8
commit bd4a04f2e7
93 changed files with 956 additions and 11442 deletions

View file

@ -1,27 +1,4 @@
"""Unit tests for ``_resolve_agent_billing_for_search_space``.
Validates the resolver used by Celery podcast/video tasks to compute
``(owner_user_id, billing_tier, base_model)`` from a search space and its
agent LLM config. The resolver mirrors chat's billing-resolution pattern at
``stream_new_chat.py:2294-2351`` and is the single integration point that
prevents Auto-mode podcast/video from leaking premium credit.
Coverage:
* Auto mode + ``thread_id`` set, pin resolves to a negative-id premium
global returns ``("premium", <base_model>)``.
* Auto mode + ``thread_id`` set, pin resolves to a negative-id free
global returns ``("free", <base_model>)``.
* Auto mode + ``thread_id`` set, pin resolves to a positive-id BYOK config
always ``"free"``.
* Auto mode + ``thread_id=None`` fallback to ``("free", "auto")`` without
hitting the pin service.
* Negative id (no Auto) uses ``get_global_llm_config``'s
``billing_tier``.
* Positive id (user BYOK) always ``"free"``.
* Search space not found raises ``ValueError``.
* ``agent_llm_id`` is None raises ``ValueError``.
"""
"""Unit tests for ``_resolve_agent_billing_for_search_space``."""
from __future__ import annotations
@ -34,11 +11,6 @@ import pytest
pytestmark = pytest.mark.unit
# ---------------------------------------------------------------------------
# Fakes
# ---------------------------------------------------------------------------
class _FakeExecResult:
def __init__(self, obj):
self._obj = obj
@ -51,14 +23,6 @@ class _FakeExecResult:
class _FakeSession:
"""Tiny AsyncSession stub.
``responses`` is a list of objects to return from successive
``execute()`` calls (in order). The resolver makes at most two
``execute()`` calls (search-space lookup, then optionally NewLLMConfig
lookup), so two queued responses cover the matrix.
"""
def __init__(self, responses: list):
self._responses = list(responses)
@ -67,9 +31,6 @@ class _FakeSession:
return _FakeExecResult(None)
return _FakeExecResult(self._responses.pop(0))
async def commit(self) -> None:
pass
@dataclass
class _FakePinResolution:
@ -78,53 +39,33 @@ class _FakePinResolution:
from_existing_pin: bool = False
def _make_search_space(*, agent_llm_id: int | None, user_id: UUID) -> SimpleNamespace:
return SimpleNamespace(
id=42,
agent_llm_id=agent_llm_id,
user_id=user_id,
)
def _make_search_space(*, chat_model_id: int | None, user_id: UUID) -> SimpleNamespace:
return SimpleNamespace(id=42, chat_model_id=chat_model_id, user_id=user_id)
def _make_byok_config(
*, id_: int, base_model: str | None = None, model_name: str = "gpt-byok"
def _make_byok_model(
*, id_: int, base_model: str | None = None, model_id: str = "gpt-byok"
) -> SimpleNamespace:
return SimpleNamespace(
id=id_,
model_name=model_name,
litellm_params={"base_model": base_model} if base_model else {},
model_id=model_id,
catalog={"base_model": base_model} if base_model else {},
connection=SimpleNamespace(enabled=True, search_space_id=42, user_id=None),
)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_auto_mode_with_thread_id_resolves_to_premium_global(monkeypatch):
"""Auto + thread → pin service resolves to negative-id premium config →
resolver returns ``("premium", <base_model>)``."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=0, user_id=user_id)])
# Mock the pin service to return a concrete premium config id.
async def _fake_resolve_pin(
sess,
*,
thread_id,
search_space_id,
user_id,
selected_llm_config_id,
force_repin_free=False,
):
assert selected_llm_config_id == 0
assert thread_id == 99
async def _fake_resolve_pin(*_args, **kwargs):
assert kwargs["selected_llm_config_id"] == 0
assert kwargs["thread_id"] == 99
return _FakePinResolution(resolved_llm_config_id=-1, resolved_tier="premium")
# Mock global config lookup to return a premium entry.
def _fake_get_global(cfg_id):
if cfg_id == -1:
return {
@ -135,8 +76,6 @@ async def test_auto_mode_with_thread_id_resolves_to_premium_global(monkeypatch):
}
return None
# Lazy imports inside the resolver — patch the *target* modules so the
# imported names resolve to our fakes.
import app.services.auto_model_pin_service as pin_module
import app.services.llm_service as llm_module
@ -154,77 +93,18 @@ async def test_auto_mode_with_thread_id_resolves_to_premium_global(monkeypatch):
assert base_model == "gpt-5.4"
@pytest.mark.asyncio
async def test_auto_mode_with_thread_id_resolves_to_free_global(monkeypatch):
"""Auto + thread → pin returns negative-id free config → resolver
returns ``("free", <base_model>)``. Same path the pin service takes for
out-of-credit users (graceful degradation)."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
async def _fake_resolve_pin(
sess,
*,
thread_id,
search_space_id,
user_id,
selected_llm_config_id,
force_repin_free=False,
):
return _FakePinResolution(resolved_llm_config_id=-3, resolved_tier="free")
def _fake_get_global(cfg_id):
if cfg_id == -3:
return {
"id": -3,
"model_name": "openrouter/free-model",
"billing_tier": "free",
"litellm_params": {"base_model": "openrouter/free-model"},
}
return None
import app.services.auto_model_pin_service as pin_module
import app.services.llm_service as llm_module
monkeypatch.setattr(
pin_module, "resolve_or_get_pinned_llm_config_id", _fake_resolve_pin
)
monkeypatch.setattr(llm_module, "get_global_llm_config", _fake_get_global)
owner, tier, base_model = await _resolve_agent_billing_for_search_space(
session, search_space_id=42, thread_id=99
)
assert owner == user_id
assert tier == "free"
assert base_model == "openrouter/free-model"
@pytest.mark.asyncio
async def test_auto_mode_with_thread_id_resolves_to_byok_is_free(monkeypatch):
"""Auto + thread → pin returns positive-id BYOK config → resolver
returns ``("free", ...)`` (BYOK is always free per
``AgentConfig.from_new_llm_config``)."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
search_space = _make_search_space(agent_llm_id=0, user_id=user_id)
byok_cfg = _make_byok_config(
id_=17, base_model="anthropic/claude-3-haiku", model_name="my-claude"
search_space = _make_search_space(chat_model_id=0, user_id=user_id)
byok_model = _make_byok_model(
id_=17, base_model="anthropic/claude-3-haiku", model_id="my-claude"
)
session = _FakeSession([search_space, byok_cfg])
session = _FakeSession([search_space, byok_model])
async def _fake_resolve_pin(
sess,
*,
thread_id,
search_space_id,
user_id,
selected_llm_config_id,
force_repin_free=False,
):
async def _fake_resolve_pin(*_args, **_kwargs):
return _FakePinResolution(resolved_llm_config_id=17, resolved_tier="free")
import app.services.auto_model_pin_service as pin_module
@ -244,13 +124,10 @@ async def test_auto_mode_with_thread_id_resolves_to_byok_is_free(monkeypatch):
@pytest.mark.asyncio
async def test_auto_mode_without_thread_id_falls_back_to_free():
"""Auto + ``thread_id=None`` → ``("free", "auto")`` without invoking
the pin service. Forward-compat fallback for any future direct-API
entrypoint that doesn't have a chat thread."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=0, user_id=user_id)])
owner, tier, base_model = await _resolve_agent_billing_for_search_space(
session, search_space_id=42, thread_id=None
@ -263,13 +140,10 @@ async def test_auto_mode_without_thread_id_falls_back_to_free():
@pytest.mark.asyncio
async def test_auto_mode_pin_failure_falls_back_to_free(monkeypatch):
"""If the pin service raises ``ValueError`` (thread missing /
mismatched search space), the resolver should log and return free
rather than killing the whole task."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=0, user_id=user_id)])
async def _fake_resolve_pin(*args, **kwargs):
raise ValueError("thread missing")
@ -291,12 +165,10 @@ async def test_auto_mode_pin_failure_falls_back_to_free(monkeypatch):
@pytest.mark.asyncio
async def test_negative_id_premium_global_returns_premium(monkeypatch):
"""Explicit negative agent_llm_id → ``get_global_llm_config`` →
return its ``billing_tier``."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=-1, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=-1, user_id=user_id)])
def _fake_get_global(cfg_id):
return {
@ -319,50 +191,15 @@ async def test_negative_id_premium_global_returns_premium(monkeypatch):
assert base_model == "gpt-5.4"
@pytest.mark.asyncio
async def test_negative_id_free_global_returns_free(monkeypatch):
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=-2, user_id=user_id)])
def _fake_get_global(cfg_id):
return {
"id": cfg_id,
"model_name": "openrouter/some-free",
"billing_tier": "free",
"litellm_params": {"base_model": "openrouter/some-free"},
}
import app.services.llm_service as llm_module
monkeypatch.setattr(llm_module, "get_global_llm_config", _fake_get_global)
owner, tier, base_model = await _resolve_agent_billing_for_search_space(
session, search_space_id=42, thread_id=None
)
assert owner == user_id
assert tier == "free"
assert base_model == "openrouter/some-free"
@pytest.mark.asyncio
async def test_negative_id_missing_base_model_falls_back_to_model_name(monkeypatch):
"""When the global config has no ``litellm_params.base_model``, the
resolver falls back to ``model_name`` matching chat's behavior."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=-5, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=-5, user_id=user_id)])
def _fake_get_global(cfg_id):
return {
"id": cfg_id,
"model_name": "fallback-model",
"billing_tier": "premium",
# No litellm_params.
}
return {"id": cfg_id, "model_name": "fallback-model", "billing_tier": "premium"}
import app.services.llm_service as llm_module
@ -378,14 +215,12 @@ async def test_negative_id_missing_base_model_falls_back_to_model_name(monkeypat
@pytest.mark.asyncio
async def test_positive_id_byok_is_always_free():
"""Positive agent_llm_id → user-owned BYOK NewLLMConfig → always free,
regardless of underlying provider tier."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
search_space = _make_search_space(agent_llm_id=23, user_id=user_id)
byok_cfg = _make_byok_config(id_=23, base_model="anthropic/claude-3.5-sonnet")
session = _FakeSession([search_space, byok_cfg])
search_space = _make_search_space(chat_model_id=23, user_id=user_id)
byok_model = _make_byok_model(id_=23, base_model="anthropic/claude-3.5-sonnet")
session = _FakeSession([search_space, byok_model])
owner, tier, base_model = await _resolve_agent_billing_for_search_space(
session, search_space_id=42
@ -398,13 +233,10 @@ async def test_positive_id_byok_is_always_free():
@pytest.mark.asyncio
async def test_positive_id_byok_missing_returns_free_with_empty_base_model():
"""If the BYOK config row is missing/deleted but the search space still
points at it, the resolver still returns free (no debit) with an empty
base_model billable_call's premium path is skipped, no harm done."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=99, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=99, user_id=user_id)])
owner, tier, base_model = await _resolve_agent_billing_for_search_space(
session, search_space_id=42
@ -419,18 +251,18 @@ async def test_positive_id_byok_missing_returns_free_with_empty_base_model():
async def test_search_space_not_found_raises_value_error():
from app.services.billable_calls import _resolve_agent_billing_for_search_space
session = _FakeSession([None])
with pytest.raises(ValueError, match="Search space"):
await _resolve_agent_billing_for_search_space(session, search_space_id=999)
await _resolve_agent_billing_for_search_space(
_FakeSession([None]), search_space_id=999
)
@pytest.mark.asyncio
async def test_agent_llm_id_none_raises_value_error():
async def test_chat_model_id_none_raises_value_error():
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=None, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=None, user_id=user_id)])
with pytest.raises(ValueError, match="agent_llm_id"):
with pytest.raises(ValueError, match="chat_model_id"):
await _resolve_agent_billing_for_search_space(session, search_space_id=42)

View file

@ -32,8 +32,9 @@ class _FakeQuotaResult:
class _FakeExecResult:
def __init__(self, thread):
def __init__(self, *, thread=None, scalars=None):
self._thread = thread
self._scalars = scalars or []
def unique(self):
return self
@ -41,19 +42,69 @@ class _FakeExecResult:
def scalar_one_or_none(self):
return self._thread
def scalars(self):
return SimpleNamespace(all=lambda: self._scalars)
class _FakeSession:
def __init__(self, thread):
def __init__(self, thread, *, models=None):
self.thread = thread
self.models = models or []
self.commit_count = 0
self.execute_count = 0
async def execute(self, _stmt):
return _FakeExecResult(self.thread)
self.execute_count += 1
if self.execute_count == 1:
return _FakeExecResult(thread=self.thread)
return _FakeExecResult(scalars=self.models)
async def commit(self):
self.commit_count += 1
def _set_global_llm_configs(monkeypatch, config, configs: list[dict]):
"""Patch the new global model catalog shape from compact legacy cfg fixtures."""
connections = []
models = []
for cfg in configs:
config_id = int(cfg["id"])
connection_id = config_id - 100_000
provider = cfg.get("provider") or cfg.get("litellm_provider")
model_name = cfg["model_name"]
connections.append(
{
"id": connection_id,
"provider": provider,
"scope": "GLOBAL",
"enabled": True,
}
)
models.append(
{
"id": config_id,
"connection_id": connection_id,
"model_id": model_name,
"display_name": cfg.get("name") or model_name,
"supports_chat": cfg.get("supports_chat", True),
"supports_image_input": cfg.get("supports_image_input", True),
"supports_tools": cfg.get("supports_tools", True),
"supports_image_generation": cfg.get("supports_image_generation", False),
"capabilities_override": cfg.get("capabilities_override") or {},
"billing_tier": cfg.get("billing_tier", "free"),
"catalog": {
"auto_pin_tier": cfg.get("auto_pin_tier"),
"quality_score": cfg.get("quality_score")
or cfg.get("quality_score_static"),
},
}
)
monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", configs)
monkeypatch.setattr(config, "GLOBAL_CONNECTIONS", connections)
monkeypatch.setattr(config, "GLOBAL_MODELS", models)
def _thread(
*,
search_space_id: int = 10,
@ -71,9 +122,9 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{"id": -2, "litellm_provider": "openai", "model_name": "gpt-free", "api_key": "k1"},
{
@ -111,9 +162,9 @@ async def test_premium_eligible_auto_prefers_premium_over_free(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -2,
@ -158,9 +209,9 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
@ -216,9 +267,9 @@ async def test_next_turn_reuses_existing_pin(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
@ -257,9 +308,9 @@ async def test_premium_eligible_auto_can_pin_premium(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
@ -295,9 +346,9 @@ async def test_premium_ineligible_auto_pins_free_only(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -2,
@ -340,9 +391,9 @@ async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -2,
@ -385,9 +436,9 @@ async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -2,
@ -433,9 +484,9 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-2))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{"id": -2, "litellm_provider": "openai", "model_name": "gpt-free", "api_key": "k1"},
],
@ -458,9 +509,9 @@ async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-999))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{"id": -2, "litellm_provider": "openai", "model_name": "gpt-free", "api_key": "k1"},
],
@ -487,7 +538,7 @@ async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
# ---------------------------------------------------------------------------
# Quality-aware pin selection (Auto Fastest upgrade)
# Quality-aware pin selection (Auto upgrade)
# ---------------------------------------------------------------------------
@ -498,9 +549,9 @@ async def test_health_gated_config_is_excluded_from_selection(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
@ -550,9 +601,9 @@ async def test_tier_a_locks_first_premium_user_skips_or(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
@ -602,9 +653,9 @@ async def test_tier_a_falls_through_to_or_when_a_pool_empty_for_user(monkeypatch
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
@ -676,9 +727,9 @@ async def test_top_k_picks_only_high_score_models(monkeypatch):
"quality_score": 10,
"health_gated": False,
}
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[*high_score_cfgs, low_score_trap],
)
@ -723,9 +774,9 @@ async def test_pin_reuse_survives_health_gating_for_existing_pin(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
@ -775,9 +826,9 @@ async def test_pin_reuse_regression_existing_healthy_pin(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
@ -833,9 +884,9 @@ async def test_runtime_cooled_down_pin_is_not_reused(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
@ -886,9 +937,9 @@ async def test_clearing_runtime_cooldown_restores_pin_reuse(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
@ -931,9 +982,9 @@ async def test_auto_pin_repin_excludes_previous_config_on_runtime_retry(monkeypa
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,

View file

@ -15,15 +15,19 @@ async def test_global_openrouter_image_gen_sets_explicit_api_base():
"""The global-config branch forwards the explicit OpenRouter base."""
from app.routes import image_generation_routes
cfg = {
global_model = {
"id": -20_001,
"name": "GPT Image 1 (OpenRouter)",
"litellm_provider": "openrouter",
"model_name": "openai/gpt-image-1",
"connection_id": -101,
"model_id": "openai/gpt-image-1",
"supports_image_generation": True,
"capabilities_override": {},
}
global_connection = {
"id": -101,
"provider": "openrouter",
"api_key": "sk-or-test",
"api_base": "https://openrouter.ai/api/v1",
"api_version": None,
"litellm_params": {},
"base_url": "https://openrouter.ai/api/v1",
"extra": {},
}
captured: dict = {}
@ -33,7 +37,7 @@ async def test_global_openrouter_image_gen_sets_explicit_api_base():
return MagicMock(model_dump=lambda: {"data": []}, _hidden_params={})
image_gen = MagicMock()
image_gen.image_generation_config_id = cfg["id"]
image_gen.image_gen_model_id = global_model["id"]
image_gen.prompt = "test"
image_gen.n = 1
image_gen.quality = None
@ -43,14 +47,19 @@ async def test_global_openrouter_image_gen_sets_explicit_api_base():
image_gen.model = None
search_space = MagicMock()
search_space.image_generation_config_id = cfg["id"]
search_space.image_gen_model_id = global_model["id"]
session = MagicMock()
with (
patch.object(
image_generation_routes,
"_get_global_image_gen_config",
return_value=cfg,
"_get_global_model",
return_value=global_model,
),
patch.object(
image_generation_routes,
"_get_global_connection",
return_value=global_connection,
),
patch.object(
image_generation_routes,
@ -74,15 +83,19 @@ async def test_generate_image_tool_global_sets_explicit_api_base():
generate_image as gi_module,
)
cfg = {
global_model = {
"id": -20_001,
"name": "GPT Image 1 (OpenRouter)",
"litellm_provider": "openrouter",
"model_name": "openai/gpt-image-1",
"connection_id": -101,
"model_id": "openai/gpt-image-1",
"supports_image_generation": True,
"capabilities_override": {},
}
global_connection = {
"id": -101,
"provider": "openrouter",
"api_key": "sk-or-test",
"api_base": "https://openrouter.ai/api/v1",
"api_version": None,
"litellm_params": {},
"base_url": "https://openrouter.ai/api/v1",
"extra": {},
}
captured: dict = {}
@ -98,7 +111,7 @@ async def test_generate_image_tool_global_sets_explicit_api_base():
search_space = MagicMock()
search_space.id = 1
search_space.image_generation_config_id = cfg["id"]
search_space.image_gen_model_id = global_model["id"]
session_cm = AsyncMock()
session = AsyncMock()
@ -121,7 +134,8 @@ async def test_generate_image_tool_global_sets_explicit_api_base():
with (
patch.object(gi_module, "shielded_async_session", return_value=session_cm),
patch.object(gi_module, "_get_global_image_gen_config", return_value=cfg),
patch.object(gi_module, "_get_global_model", return_value=global_model),
patch.object(gi_module, "_get_global_connection", return_value=global_connection),
patch.object(
gi_module, "aimage_generation", side_effect=fake_aimage_generation
),

View file

@ -217,7 +217,7 @@ def test_generate_configs_drops_non_text_and_non_tool_models():
# ---------------------------------------------------------------------------
# _generate_image_gen_configs / _generate_vision_llm_configs
# _generate_image_gen_configs
# ---------------------------------------------------------------------------
@ -263,7 +263,7 @@ def test_generate_image_gen_configs_filters_by_image_output():
# Each config must carry ``billing_tier`` for routing in image_generation_routes.
for c in cfgs:
assert c["billing_tier"] in {"free", "premium"}
assert c["litellm_provider"] == "openrouter"
assert c["provider"] == "openrouter"
assert c[_OPENROUTER_DYNAMIC_MARKER] is True
# Emit the OpenRouter base URL at source so every call path passes an
# explicit api_base and cannot inherit a process-global endpoint.
@ -271,9 +271,7 @@ def test_generate_image_gen_configs_filters_by_image_output():
def test_generate_image_gen_configs_assigns_image_id_offset():
"""Image configs use a different id_offset (-20000) so their negative
IDs don't collide with chat configs (-10000) or vision configs (-30000).
"""
"""Image configs use their own id_offset (-20000)."""
from app.services.openrouter_integration_service import (
_generate_image_gen_configs,
)
@ -291,88 +289,3 @@ def test_generate_image_gen_configs_assigns_image_id_offset():
assert all(c["id"] < -20_000 + 1 for c in cfgs)
assert all(c["id"] > -29_000_000 for c in cfgs)
def test_generate_vision_llm_configs_filters_by_image_input_text_output():
"""Vision LLMs must accept image input AND emit text — pure image-gen
(no text out) and text-only (no image in) models are excluded.
"""
from app.services.openrouter_integration_service import (
_generate_vision_llm_configs,
)
raw = [
# GPT-4o: vision LLM (image in, text out) — must emit.
{
"id": "openai/gpt-4o",
"architecture": {
"input_modalities": ["text", "image"],
"output_modalities": ["text"],
},
"context_length": 128_000,
"pricing": {"prompt": "0.000005", "completion": "0.000015"},
},
# Pure image generator — image *output*, no text out. Must NOT emit.
{
"id": "openai/gpt-image-1",
"architecture": {
"input_modalities": ["text"],
"output_modalities": ["image"],
},
"context_length": 4_000,
"pricing": {"prompt": "0", "completion": "0"},
},
# Pure text model (no image in). Must NOT emit.
{
"id": "anthropic/claude-3-haiku",
"architecture": {
"input_modalities": ["text"],
"output_modalities": ["text"],
},
"context_length": 200_000,
"pricing": {"prompt": "0.000001", "completion": "0.000005"},
},
]
cfgs = _generate_vision_llm_configs(raw, dict(_SETTINGS_BASE))
names = {c["model_name"] for c in cfgs}
assert names == {"openai/gpt-4o"}
cfg = cfgs[0]
assert cfg["billing_tier"] == "premium"
# Pricing carried inline so pricing_registration can register vision
# under ``openrouter/openai/gpt-4o`` even if the chat catalogue cache
# is cleared.
assert cfg["input_cost_per_token"] == pytest.approx(5e-6)
assert cfg["output_cost_per_token"] == pytest.approx(15e-6)
assert cfg[_OPENROUTER_DYNAMIC_MARKER] is True
# Emit the OpenRouter base URL at source so every call path passes an
# explicit api_base and cannot inherit a process-global endpoint.
assert cfg["api_base"] == "https://openrouter.ai/api/v1"
def test_generate_vision_llm_configs_drops_chat_only_filters():
"""A small-context vision model that doesn't advertise tool calling is
still a valid vision LLM for "describe this image" prompts. The chat
filters (``supports_tool_calling``, ``has_sufficient_context``) must
NOT be applied to vision emission.
"""
from app.services.openrouter_integration_service import (
_generate_vision_llm_configs,
)
raw = [
{
"id": "tiny/vision-mini",
"architecture": {
"input_modalities": ["text", "image"],
"output_modalities": ["text"],
},
"supported_parameters": [], # no tools
"context_length": 4_000, # well below MIN_CONTEXT_LENGTH
"pricing": {"prompt": "0.0000001", "completion": "0.0000005"},
}
]
cfgs = _generate_vision_llm_configs(raw, dict(_SETTINGS_BASE))
assert len(cfgs) == 1
assert cfgs[0]["model_name"] == "tiny/vision-mini"

View file

@ -370,77 +370,3 @@ def test_register_continues_after_individual_failure(monkeypatch, caplog):
assert any("custom-deployment" in payload for payload in successful_calls)
def test_vision_configs_registered_with_chat_shape(monkeypatch):
"""``register_pricing_from_global_configs`` walks
``GLOBAL_VISION_LLM_CONFIGS`` in addition to the chat configs so vision
calls (during indexing) bill correctly. Vision configs use the same
chat-shape token prices, but image-gen pricing is intentionally NOT
registered here (handled via ``response_cost`` in LiteLLM).
"""
from app.config import config
from app.services.pricing_registration import register_pricing_from_global_configs
spy = _patch_register(monkeypatch)
_patch_openrouter_pricing(
monkeypatch,
{"openai/gpt-4o": {"prompt": "0.000005", "completion": "0.000015"}},
)
# No chat configs — only vision. Proves the vision walk is a separate
# iteration, not piggy-backed on the chat list.
monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [])
monkeypatch.setattr(
config,
"GLOBAL_VISION_LLM_CONFIGS",
[
{
"id": -1,
"litellm_provider": "openrouter",
"model_name": "openai/gpt-4o",
"billing_tier": "premium",
"input_cost_per_token": 5e-6,
"output_cost_per_token": 15e-6,
}
],
)
register_pricing_from_global_configs()
assert "openrouter/openai/gpt-4o" in spy.all_keys
payload_value = spy.calls[0]["openrouter/openai/gpt-4o"]
assert payload_value["mode"] == "chat"
assert payload_value["litellm_provider"] == "openrouter"
assert payload_value["input_cost_per_token"] == pytest.approx(5e-6)
assert payload_value["output_cost_per_token"] == pytest.approx(15e-6)
def test_vision_with_inline_pricing_when_or_cache_missing(monkeypatch):
"""If the OpenRouter pricing cache misses a vision model (different
catalogue surface), the vision walk falls back to inline
``input_cost_per_token``/``output_cost_per_token`` on the cfg itself.
"""
from app.config import config
from app.services.pricing_registration import register_pricing_from_global_configs
spy = _patch_register(monkeypatch)
_patch_openrouter_pricing(monkeypatch, {})
monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [])
monkeypatch.setattr(
config,
"GLOBAL_VISION_LLM_CONFIGS",
[
{
"id": -1,
"litellm_provider": "openrouter",
"model_name": "google/gemini-2.5-flash",
"billing_tier": "premium",
"input_cost_per_token": 1e-6,
"output_cost_per_token": 4e-6,
}
],
)
register_pricing_from_global_configs()
assert "openrouter/google/gemini-2.5-flash" in spy.all_keys

View file

@ -1,4 +1,4 @@
"""Unit tests for the Auto (Fastest) quality scoring module."""
"""Unit tests for the Auto quality scoring module."""
from __future__ import annotations

View file

@ -1,77 +0,0 @@
"""Vision LLM resolution must pass explicit per-config ``api_base``."""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
pytestmark = pytest.mark.unit
@pytest.mark.asyncio
async def test_get_vision_llm_global_openrouter_sets_api_base():
"""Global negative-ID branch forwards the explicit OpenRouter base."""
from app.services import llm_service
cfg = {
"id": -30_001,
"name": "GPT-4o Vision (OpenRouter)",
"litellm_provider": "openrouter",
"model_name": "openai/gpt-4o",
"api_key": "sk-or-test",
"api_base": "https://openrouter.ai/api/v1",
"api_version": None,
"litellm_params": {},
"billing_tier": "free",
}
search_space = MagicMock()
search_space.id = 1
search_space.user_id = "user-x"
search_space.vision_llm_config_id = cfg["id"]
session = AsyncMock()
scalars = MagicMock()
scalars.first.return_value = search_space
result = MagicMock()
result.scalars.return_value = scalars
session.execute.return_value = result
captured: dict = {}
class FakeSanitized:
def __init__(self, **kwargs):
captured.update(kwargs)
with (
patch(
"app.services.vision_llm_router_service.get_global_vision_llm_config",
return_value=cfg,
),
patch(
"app.agents.chat.runtime.llm_config.SanitizedChatLiteLLM",
new=FakeSanitized,
),
):
await llm_service.get_vision_llm(session=session, search_space_id=1)
assert captured.get("api_base") == "https://openrouter.ai/api/v1"
assert captured["model"] == "openrouter/openai/gpt-4o"
def test_vision_router_deployment_sets_api_base_when_config_empty():
"""Auto-mode vision router carries explicit api_base into deployments."""
from app.services.vision_llm_router_service import VisionLLMRouterService
deployment = VisionLLMRouterService._config_to_deployment(
{
"model_name": "openai/gpt-4o",
"litellm_provider": "openrouter",
"api_key": "sk-or-test",
"api_base": "https://openrouter.ai/api/v1",
}
)
assert deployment is not None
assert deployment["litellm_params"]["api_base"] == "https://openrouter.ai/api/v1"
assert deployment["litellm_params"]["model"] == "openrouter/openai/gpt-4o"