Merge remote-tracking branch 'upstream/dev' into features/documents-injestion-layered-cached

This commit is contained in:
CREDO23 2026-06-14 11:30:33 +02:00
commit 32a6e54ce6
215 changed files with 9532 additions and 15405 deletions

View file

@ -19,7 +19,7 @@
# so the resolved auto-pin id is never sent to a real LLM provider.
# The values below only need to pass
# auto_model_pin_service._is_usable_global_config()
# which requires id / model_name / provider / api_key all truthy.
# which requires id / model_name / litellm_provider / api_key all truthy.
#
# Why TWO entries (premium + free):
# auto_model_pin_service.resolve_or_get_pinned_llm_config_id() splits
@ -44,9 +44,10 @@ global_llm_configs:
anonymous_enabled: false
seo_enabled: false
quality_score: 1.0
provider: "OPENAI"
litellm_provider: "openai"
model_name: "fake-e2e-model-premium"
api_key: "fake-e2e-api-key-not-for-production"
api_base: "https://api.openai.com/v1"
supports_image_input: false
quota_reserve_tokens: 1024
rpm: 1000
@ -60,9 +61,10 @@ global_llm_configs:
anonymous_enabled: false
seo_enabled: false
quality_score: 1.0
provider: "OPENAI"
litellm_provider: "openai"
model_name: "fake-e2e-model-free"
api_key: "fake-e2e-api-key-not-for-production"
api_base: "https://api.openai.com/v1"
supports_image_input: false
quota_reserve_tokens: 1024
rpm: 1000

View file

@ -0,0 +1,39 @@
"""Regression tests for model-boundary message sanitization."""
from __future__ import annotations
import pytest
from langchain_core.messages import AIMessage
from app.agents.chat.runtime.llm_config import _sanitize_messages
pytestmark = pytest.mark.unit
def test_sanitize_messages_strips_provider_specific_thinking_blocks() -> None:
original = AIMessage(
content=[
{"type": "thinking", "thinking": "private reasoning"},
{"type": "text", "text": "visible answer"},
]
)
sanitized = _sanitize_messages([original])
assert sanitized[0].content == "visible answer"
assert original.content == [
{"type": "thinking", "thinking": "private reasoning"},
{"type": "text", "text": "visible answer"},
]
def test_sanitize_messages_sets_tool_only_ai_content_to_none() -> None:
message = AIMessage(
content="",
tool_calls=[{"name": "search", "args": {"q": "x"}, "id": "call_1"}],
)
sanitized = _sanitize_messages([message])
assert sanitized[0].content is None
assert message.content == ""

View file

@ -1,6 +1,6 @@
"""Lock the runtime model-policy backstop in ``build_dependencies``.
Automations resolve their LLM from the *captured* ``agent_llm_id`` snapshot (so
Automations resolve their LLM from the *captured* ``chat_model_id`` snapshot (so
runs are insulated from later chat/search-space model changes), and the model
policy is re-checked at run time so a captured model that is no longer billable
fails the run clearly. When no snapshot is present, resolution falls back to the
@ -45,10 +45,10 @@ def patched_side_effects(monkeypatch: pytest.MonkeyPatch):
return None
async def test_build_dependencies_resolves_captured_agent_llm_id(
async def test_build_dependencies_resolves_captured_chat_model_id(
monkeypatch: pytest.MonkeyPatch, patched_side_effects
) -> None:
"""The bundle loads with the *captured* ``agent_llm_id``, not the live search space."""
"""The bundle loads with the *captured* ``chat_model_id``, not the live search space."""
captured: dict[str, Any] = {}
async def _fake_load(_session, *, config_id, search_space_id):
@ -67,13 +67,13 @@ async def test_build_dependencies_resolves_captured_agent_llm_id(
lambda _ss: pytest.fail("search-space policy should not run on captured path"),
)
search_space = SimpleNamespace(agent_llm_id=-99)
search_space = SimpleNamespace(chat_model_id=-99)
result = await build_dependencies(
session=_FakeSession(search_space),
search_space_id=42,
agent_llm_id=-7,
image_generation_config_id=5,
vision_llm_config_id=-1,
chat_model_id=-7,
image_gen_model_id=5,
vision_model_id=-1,
)
assert captured == {"config_id": -7, "search_space_id": 42}
@ -98,17 +98,17 @@ async def test_build_dependencies_validates_captured_ids(
monkeypatch.setattr(deps_mod, "load_llm_bundle", _fake_load)
await build_dependencies(
session=_FakeSession(SimpleNamespace(agent_llm_id=0)),
session=_FakeSession(SimpleNamespace(chat_model_id=0)),
search_space_id=42,
agent_llm_id=-7,
image_generation_config_id=5,
vision_llm_config_id=-1,
chat_model_id=-7,
image_gen_model_id=5,
vision_model_id=-1,
)
assert seen == {
"agent_llm_id": -7,
"image_generation_config_id": 5,
"vision_llm_config_id": -1,
"chat_model_id": -7,
"image_gen_model_id": 5,
"vision_model_id": -1,
}
@ -119,7 +119,7 @@ async def test_build_dependencies_raises_on_captured_policy_violation(
def _raise(**_kw):
raise AutomationModelPolicyError(
[{"kind": "image", "config_id": -2, "reason": "free model"}]
[{"kind": "image", "model_id": -2, "reason": "free model"}]
)
monkeypatch.setattr(deps_mod, "assert_models_billable", _raise)
@ -131,11 +131,11 @@ async def test_build_dependencies_raises_on_captured_policy_violation(
with pytest.raises(DependencyError):
await build_dependencies(
session=_FakeSession(SimpleNamespace(agent_llm_id=-7)),
session=_FakeSession(SimpleNamespace(chat_model_id=-7)),
search_space_id=42,
agent_llm_id=-7,
image_generation_config_id=-2,
vision_llm_config_id=-1,
chat_model_id=-7,
image_gen_model_id=-2,
vision_model_id=-1,
)
@ -157,7 +157,7 @@ async def test_build_dependencies_falls_back_to_search_space(
lambda **_kw: pytest.fail("captured policy should not run on fallback path"),
)
search_space = SimpleNamespace(agent_llm_id=-7)
search_space = SimpleNamespace(chat_model_id=-7)
result = await build_dependencies(
session=_FakeSession(search_space), search_space_id=42
)

View file

@ -28,9 +28,9 @@ def _run() -> SimpleNamespace:
def test_build_action_ctx_propagates_captured_models() -> None:
"""``definition.models`` flows onto the ActionContext model fields."""
models = AutomationModels(
agent_llm_id=-1,
image_generation_config_id=5,
vision_llm_config_id=-1,
chat_model_id=-1,
image_gen_model_id=5,
vision_model_id=-1,
)
ctx = _build_action_ctx(
cast(AsyncSession, None),
@ -40,9 +40,9 @@ def test_build_action_ctx_propagates_captured_models() -> None:
)
assert ctx.search_space_id == 42
assert ctx.agent_llm_id == -1
assert ctx.image_generation_config_id == 5
assert ctx.vision_llm_config_id == -1
assert ctx.chat_model_id == -1
assert ctx.image_gen_model_id == 5
assert ctx.vision_model_id == -1
def test_build_action_ctx_none_models_leaves_fields_none() -> None:
@ -54,6 +54,6 @@ def test_build_action_ctx_none_models_leaves_fields_none() -> None:
None,
)
assert ctx.agent_llm_id is None
assert ctx.image_generation_config_id is None
assert ctx.vision_llm_config_id is None
assert ctx.chat_model_id is None
assert ctx.image_gen_model_id is None
assert ctx.vision_model_id is None

View file

@ -40,24 +40,24 @@ def test_automation_definition_models_round_trip() -> None:
name="Daily digest",
plan=[PlanStep(step_id="s1", action="agent_task")],
models=AutomationModels(
agent_llm_id=-1,
image_generation_config_id=5,
vision_llm_config_id=-1,
chat_model_id=-1,
image_gen_model_id=5,
vision_model_id=-1,
),
)
dumped = definition.model_dump(mode="json", by_alias=True)
assert dumped["models"] == {
"agent_llm_id": -1,
"image_generation_config_id": 5,
"vision_llm_config_id": -1,
"chat_model_id": -1,
"image_gen_model_id": 5,
"vision_model_id": -1,
}
restored = AutomationDefinition.model_validate(dumped)
assert restored.models is not None
assert restored.models.agent_llm_id == -1
assert restored.models.image_generation_config_id == 5
assert restored.models.vision_llm_config_id == -1
assert restored.models.chat_model_id == -1
assert restored.models.image_gen_model_id == 5
assert restored.models.vision_model_id == -1
def test_automation_definition_rejects_unknown_top_level_field() -> None:

View file

@ -64,12 +64,12 @@ async def test_assert_models_billable_raises_422_on_violation(
def _raise(_ss):
raise AutomationModelPolicyError(
[{"kind": "llm", "config_id": 0, "reason": "Auto mode"}]
[{"kind": "llm", "model_id": 0, "reason": "Auto mode"}]
)
monkeypatch.setattr(automation_mod, "assert_automation_models_billable", _raise)
service = _service(SimpleNamespace(agent_llm_id=0))
service = _service(SimpleNamespace(chat_model_id=0))
with pytest.raises(HTTPException) as exc_info:
await service._assert_models_billable(1)
@ -99,7 +99,7 @@ async def test_assert_models_billable_returns_search_space_when_ok(
automation_mod, "assert_automation_models_billable", lambda _ss: None
)
search_space = SimpleNamespace(agent_llm_id=-1)
search_space = SimpleNamespace(chat_model_id=-1)
service = _service(search_space)
assert await service._assert_models_billable(1) is search_space
@ -123,9 +123,9 @@ async def test_create_injects_captured_models_from_search_space(
monkeypatch.setattr(AutomationService, "_get_with_triggers_or_raise", _return_added)
search_space = SimpleNamespace(
agent_llm_id=-1,
image_generation_config_id=5,
vision_llm_config_id=-1,
chat_model_id=-1,
image_gen_model_id=5,
vision_model_id=-1,
)
service = _service(search_space)
payload = AutomationCreate(
@ -137,9 +137,9 @@ async def test_create_injects_captured_models_from_search_space(
automation = await service.create(payload)
assert automation.definition["models"] == {
"agent_llm_id": -1,
"image_generation_config_id": 5,
"vision_llm_config_id": -1,
"chat_model_id": -1,
"image_gen_model_id": 5,
"vision_model_id": -1,
}
@ -162,9 +162,9 @@ async def test_create_treats_unset_prefs_as_auto_zero(
monkeypatch.setattr(AutomationService, "_get_with_triggers_or_raise", _return_added)
search_space = SimpleNamespace(
agent_llm_id=None,
image_generation_config_id=None,
vision_llm_config_id=None,
chat_model_id=None,
image_gen_model_id=None,
vision_model_id=None,
)
service = _service(search_space)
payload = AutomationCreate(search_space_id=1, name="A", definition=_definition())
@ -172,9 +172,9 @@ async def test_create_treats_unset_prefs_as_auto_zero(
automation = await service.create(payload)
assert automation.definition["models"] == {
"agent_llm_id": 0,
"image_generation_config_id": 0,
"vision_llm_config_id": 0,
"chat_model_id": 0,
"image_gen_model_id": 0,
"vision_model_id": 0,
}
@ -195,11 +195,11 @@ async def test_create_honors_selected_models_when_provided(
)
validated: dict[str, Any] = {}
def _assert_ok(*, agent_llm_id, image_generation_config_id, vision_llm_config_id):
def _assert_ok(*, chat_model_id, image_gen_model_id, vision_model_id):
validated["ids"] = (
agent_llm_id,
image_generation_config_id,
vision_llm_config_id,
chat_model_id,
image_gen_model_id,
vision_model_id,
)
monkeypatch.setattr(automation_mod, "assert_models_billable", _assert_ok)
@ -213,15 +213,15 @@ async def test_create_honors_selected_models_when_provided(
monkeypatch.setattr(AutomationService, "_authorize", _noop_authorize)
monkeypatch.setattr(AutomationService, "_get_with_triggers_or_raise", _return_added)
service = _service(SimpleNamespace(agent_llm_id=-99))
service = _service(SimpleNamespace(chat_model_id=-99))
payload = AutomationCreate(
search_space_id=1,
name="A",
definition=_definition(
models=AutomationModels(
agent_llm_id=-1,
image_generation_config_id=7,
vision_llm_config_id=-2,
chat_model_id=-1,
image_gen_model_id=7,
vision_model_id=-2,
)
),
)
@ -230,9 +230,9 @@ async def test_create_honors_selected_models_when_provided(
assert validated["ids"] == (-1, 7, -2)
assert automation.definition["models"] == {
"agent_llm_id": -1,
"image_generation_config_id": 7,
"vision_llm_config_id": -2,
"chat_model_id": -1,
"image_gen_model_id": 7,
"vision_model_id": -2,
}
@ -241,9 +241,9 @@ async def test_create_rejects_unbillable_selected_models(
) -> None:
"""A non-billable explicit selection maps the policy error to HTTP 422."""
def _raise(*, agent_llm_id, image_generation_config_id, vision_llm_config_id):
def _raise(*, chat_model_id, image_gen_model_id, vision_model_id):
raise AutomationModelPolicyError(
[{"kind": "llm", "config_id": -3, "reason": "free model"}]
[{"kind": "llm", "model_id": -3, "reason": "free model"}]
)
monkeypatch.setattr(automation_mod, "assert_models_billable", _raise)
@ -253,15 +253,15 @@ async def test_create_rejects_unbillable_selected_models(
monkeypatch.setattr(AutomationService, "_authorize", _noop_authorize)
service = _service(SimpleNamespace(agent_llm_id=-3))
service = _service(SimpleNamespace(chat_model_id=-3))
payload = AutomationCreate(
search_space_id=1,
name="A",
definition=_definition(
models=AutomationModels(
agent_llm_id=-3,
image_generation_config_id=7,
vision_llm_config_id=-2,
chat_model_id=-3,
image_gen_model_id=7,
vision_model_id=-2,
)
),
)
@ -277,9 +277,9 @@ async def test_update_preserves_captured_models(
) -> None:
"""A definition edit carries over the previously captured ``models``."""
captured = {
"agent_llm_id": -1,
"image_generation_config_id": 5,
"vision_llm_config_id": -1,
"chat_model_id": -1,
"image_gen_model_id": 5,
"vision_model_id": -1,
}
existing = SimpleNamespace(
search_space_id=1,
@ -318,20 +318,20 @@ async def test_update_honors_changed_models_when_valid(
"name": "A",
"plan": [],
"models": {
"agent_llm_id": -1,
"image_generation_config_id": 5,
"vision_llm_config_id": -1,
"chat_model_id": -1,
"image_gen_model_id": 5,
"vision_model_id": -1,
},
},
version=3,
)
validated: dict[str, Any] = {}
def _assert_ok(*, agent_llm_id, image_generation_config_id, vision_llm_config_id):
def _assert_ok(*, chat_model_id, image_gen_model_id, vision_model_id):
validated["ids"] = (
agent_llm_id,
image_generation_config_id,
vision_llm_config_id,
chat_model_id,
image_gen_model_id,
vision_model_id,
)
monkeypatch.setattr(automation_mod, "assert_models_billable", _assert_ok)
@ -351,9 +351,9 @@ async def test_update_honors_changed_models_when_valid(
patch = AutomationUpdate(
definition=_definition(
models=AutomationModels(
agent_llm_id=-2,
image_generation_config_id=9,
vision_llm_config_id=-2,
chat_model_id=-2,
image_gen_model_id=9,
vision_model_id=-2,
)
)
)
@ -362,9 +362,9 @@ async def test_update_honors_changed_models_when_valid(
assert validated["ids"] == (-2, 9, -2)
assert result.definition["models"] == {
"agent_llm_id": -2,
"image_generation_config_id": 9,
"vision_llm_config_id": -2,
"chat_model_id": -2,
"image_gen_model_id": 9,
"vision_model_id": -2,
}
assert result.version == 4
@ -379,17 +379,17 @@ async def test_update_rejects_changed_unbillable_models(
"name": "A",
"plan": [],
"models": {
"agent_llm_id": -1,
"image_generation_config_id": 5,
"vision_llm_config_id": -1,
"chat_model_id": -1,
"image_gen_model_id": 5,
"vision_model_id": -1,
},
},
version=3,
)
def _raise(*, agent_llm_id, image_generation_config_id, vision_llm_config_id):
def _raise(*, chat_model_id, image_gen_model_id, vision_model_id):
raise AutomationModelPolicyError(
[{"kind": "llm", "config_id": -7, "reason": "free model"}]
[{"kind": "llm", "model_id": -7, "reason": "free model"}]
)
monkeypatch.setattr(automation_mod, "assert_models_billable", _raise)
@ -409,9 +409,9 @@ async def test_update_rejects_changed_unbillable_models(
patch = AutomationUpdate(
definition=_definition(
models=AutomationModels(
agent_llm_id=-7,
image_generation_config_id=5,
vision_llm_config_id=-1,
chat_model_id=-7,
image_gen_model_id=5,
vision_model_id=-1,
)
)
)
@ -431,9 +431,9 @@ async def test_update_keeps_unchanged_models_without_revalidation(
premium without an unrelated edit tripping the policy check.
"""
captured = {
"agent_llm_id": -1,
"image_generation_config_id": 5,
"vision_llm_config_id": -1,
"chat_model_id": -1,
"image_gen_model_id": 5,
"vision_model_id": -1,
}
existing = SimpleNamespace(
search_space_id=1,
@ -485,7 +485,7 @@ async def test_model_eligibility_authorizes_and_returns_payload(
lambda _ss: {"allowed": False, "violations": [{"kind": "image"}]},
)
service = _service(SimpleNamespace(agent_llm_id=-2))
service = _service(SimpleNamespace(chat_model_id=-2))
result = await service.model_eligibility(search_space_id=5)
assert result == {"allowed": False, "violations": [{"kind": "image"}]}

View file

@ -27,9 +27,9 @@ pytestmark = pytest.mark.unit
def _search_space(*, llm: int | None, image: int | None, vision: int | None):
"""Minimal stand-in for the ``SearchSpace`` ORM row the policy reads."""
return SimpleNamespace(
agent_llm_id=llm,
image_generation_config_id=image,
vision_llm_config_id=vision,
chat_model_id=llm,
image_gen_model_id=image,
vision_model_id=vision,
)
@ -39,29 +39,11 @@ def patched_globals(monkeypatch: pytest.MonkeyPatch):
Negative ids: -1 is premium, -2 is free, for each of llm/image/vision.
"""
llm_configs = {
-1: {"id": -1, "billing_tier": "premium"},
-2: {"id": -2, "billing_tier": "free"},
}
monkeypatch.setattr(
"app.agents.chat.runtime.llm_config.load_global_llm_config_by_id",
lambda cid: llm_configs.get(cid),
)
from app.config import config as app_config
monkeypatch.setattr(
app_config,
"GLOBAL_IMAGE_GEN_CONFIGS",
[
{"id": -1, "billing_tier": "premium"},
{"id": -2, "billing_tier": "free"},
],
raising=False,
)
monkeypatch.setattr(
app_config,
"GLOBAL_VISION_LLM_CONFIGS",
"GLOBAL_MODELS",
[
{"id": -1, "billing_tier": "premium"},
{"id": -2, "billing_tier": "free"},
@ -71,7 +53,7 @@ def patched_globals(monkeypatch: pytest.MonkeyPatch):
return None
@pytest.mark.parametrize("kind", ["llm", "image", "vision"])
@pytest.mark.parametrize("kind", ["chat", "image", "vision"])
def test_byok_positive_id_is_allowed(kind: str, patched_globals) -> None:
"""A positive config id is a user-owned BYOK model — always billable."""
allowed, reason = model_policy._classify(kind, 7)
@ -79,7 +61,7 @@ def test_byok_positive_id_is_allowed(kind: str, patched_globals) -> None:
assert reason == ""
@pytest.mark.parametrize("kind", ["llm", "image", "vision"])
@pytest.mark.parametrize("kind", ["chat", "image", "vision"])
@pytest.mark.parametrize("config_id", [0, None])
def test_auto_mode_is_blocked(kind: str, config_id, patched_globals) -> None:
"""Auto mode (id 0) and an unset slot (None) are blocked."""
@ -88,7 +70,7 @@ def test_auto_mode_is_blocked(kind: str, config_id, patched_globals) -> None:
assert "Auto mode" in reason
@pytest.mark.parametrize("kind", ["llm", "image", "vision"])
@pytest.mark.parametrize("kind", ["chat", "image", "vision"])
def test_premium_global_is_allowed(kind: str, patched_globals) -> None:
"""A negative (global) id with premium billing tier is allowed."""
allowed, reason = model_policy._classify(kind, -1)
@ -96,7 +78,7 @@ def test_premium_global_is_allowed(kind: str, patched_globals) -> None:
assert reason == ""
@pytest.mark.parametrize("kind", ["llm", "image", "vision"])
@pytest.mark.parametrize("kind", ["chat", "image", "vision"])
def test_free_global_is_blocked(kind: str, patched_globals) -> None:
"""A negative (global) id with a free billing tier is blocked."""
allowed, reason = model_policy._classify(kind, -2)
@ -104,7 +86,7 @@ def test_free_global_is_blocked(kind: str, patched_globals) -> None:
assert "free model" in reason
@pytest.mark.parametrize("kind", ["llm", "image", "vision"])
@pytest.mark.parametrize("kind", ["chat", "image", "vision"])
def test_unknown_global_id_is_blocked(kind: str, patched_globals) -> None:
"""A negative id that resolves to no config is treated as not premium."""
allowed, _ = model_policy._classify(kind, -999)
@ -125,10 +107,10 @@ def test_eligibility_reports_each_violation(patched_globals) -> None:
assert result["allowed"] is False
kinds = {v["kind"] for v in result["violations"]}
assert kinds == {"llm", "image", "vision"}
# config_id is echoed back for the UI / settings deep-link.
by_kind = {v["kind"]: v["config_id"] for v in result["violations"]}
assert by_kind == {"llm": -2, "image": 0, "vision": -2}
assert kinds == {"chat", "image", "vision"}
# model_id is echoed back for the UI / settings deep-link.
by_kind = {v["kind"]: v["model_id"] for v in result["violations"]}
assert by_kind == {"chat": -2, "image": 0, "vision": -2}
def test_assert_raises_with_violations(patched_globals) -> None:
@ -138,7 +120,7 @@ def test_assert_raises_with_violations(patched_globals) -> None:
assert_automation_models_billable(search_space)
assert len(exc_info.value.violations) == 1
assert exc_info.value.violations[0]["kind"] == "llm"
assert exc_info.value.violations[0]["kind"] == "chat"
def test_assert_passes_when_all_billable(patched_globals) -> None:
@ -153,7 +135,7 @@ def test_assert_passes_when_all_billable(patched_globals) -> None:
def test_get_model_eligibility_all_billable(patched_globals) -> None:
"""Premium LLM + BYOK image + premium vision (explicit ids) → allowed."""
result = get_model_eligibility(
agent_llm_id=-1, image_generation_config_id=5, vision_llm_config_id=-1
chat_model_id=-1, image_gen_model_id=5, vision_model_id=-1
)
assert result == {"allowed": True, "violations": []}
@ -161,28 +143,28 @@ def test_get_model_eligibility_all_billable(patched_globals) -> None:
def test_get_model_eligibility_reports_each_violation(patched_globals) -> None:
"""Free LLM, Auto image, free vision (explicit ids) each produce a violation."""
result = get_model_eligibility(
agent_llm_id=-2, image_generation_config_id=0, vision_llm_config_id=-2
chat_model_id=-2, image_gen_model_id=0, vision_model_id=-2
)
assert result["allowed"] is False
by_kind = {v["kind"]: v["config_id"] for v in result["violations"]}
assert by_kind == {"llm": -2, "image": 0, "vision": -2}
by_kind = {v["kind"]: v["model_id"] for v in result["violations"]}
assert by_kind == {"chat": -2, "image": 0, "vision": -2}
def test_assert_models_billable_raises(patched_globals) -> None:
"""``assert_models_billable`` raises when any explicit id is blocked."""
with pytest.raises(AutomationModelPolicyError) as exc_info:
assert_models_billable(
agent_llm_id=0, image_generation_config_id=5, vision_llm_config_id=-1
chat_model_id=0, image_gen_model_id=5, vision_model_id=-1
)
assert len(exc_info.value.violations) == 1
assert exc_info.value.violations[0]["kind"] == "llm"
assert exc_info.value.violations[0]["kind"] == "chat"
def test_assert_models_billable_passes(patched_globals) -> None:
"""No exception when every explicit id is premium or BYOK."""
assert (
assert_models_billable(
agent_llm_id=3, image_generation_config_id=-1, vision_llm_config_id=4
chat_model_id=3, image_gen_model_id=-1, vision_model_id=4
)
is None
)
@ -192,5 +174,5 @@ def test_search_space_wrapper_delegates_to_core(patched_globals) -> None:
"""The search-space wrapper produces the same result as the ID core."""
search_space = _search_space(llm=-2, image=0, vision=-2)
assert get_automation_model_eligibility(search_space) == get_model_eligibility(
agent_llm_id=-2, image_generation_config_id=0, vision_llm_config_id=-2
chat_model_id=-2, image_gen_model_id=0, vision_model_id=-2
)

View file

@ -1,110 +0,0 @@
"""Unit tests for ``supports_image_input`` derivation on BYOK chat config
endpoints (``GET /new-llm-configs`` list, ``GET /new-llm-configs/{id}``).
There is no DB column for ``supports_image_input`` on
``NewLLMConfig`` the value is resolved at the API boundary by
``derive_supports_image_input`` so the new-chat selector / streaming
task can read the same field shape regardless of source (BYOK vs YAML
vs OpenRouter dynamic). Default-allow on unknown so we don't lock the
user out of their own model choice.
"""
from __future__ import annotations
from datetime import UTC, datetime
from types import SimpleNamespace
from uuid import uuid4
import pytest
from app.db import LiteLLMProvider
from app.routes import new_llm_config_routes
pytestmark = pytest.mark.unit
def _byok_row(
*,
id_: int,
model_name: str,
base_model: str | None = None,
provider: LiteLLMProvider = LiteLLMProvider.OPENAI,
custom_provider: str | None = None,
) -> object:
"""Mimic the SQLAlchemy row's attribute surface; ``model_validate``
walks ``from_attributes=True`` so a ``SimpleNamespace`` is enough.
``provider`` is a real ``LiteLLMProvider`` enum value so Pydantic's
enum validator accepts it same as the ORM row would carry."""
return SimpleNamespace(
id=id_,
name=f"BYOK-{id_}",
description=None,
provider=provider,
custom_provider=custom_provider,
model_name=model_name,
api_key="sk-byok",
api_base=None,
litellm_params={"base_model": base_model} if base_model else None,
system_instructions="",
use_default_system_instructions=True,
citations_enabled=True,
created_at=datetime.now(tz=UTC),
search_space_id=42,
user_id=uuid4(),
)
def test_serialize_byok_known_vision_model_resolves_true():
"""The catalog resolver consults LiteLLM's map for ``gpt-4o`` ->
True. The serialized row carries that value through to the
``NewLLMConfigRead`` schema."""
row = _byok_row(id_=1, model_name="gpt-4o")
serialized = new_llm_config_routes._serialize_byok_config(row)
assert serialized.supports_image_input is True
assert serialized.id == 1
assert serialized.model_name == "gpt-4o"
def test_serialize_byok_unknown_model_default_allows():
"""Unknown / unmapped: default-allow. The streaming-task safety net
is the actual block, and it requires LiteLLM to *explicitly* say
text-only so a brand new BYOK model should not be pre-judged."""
row = _byok_row(
id_=2,
model_name="brand-new-model-x9-unmapped",
provider=LiteLLMProvider.CUSTOM,
custom_provider="brand_new_proxy",
)
serialized = new_llm_config_routes._serialize_byok_config(row)
assert serialized.supports_image_input is True
def test_serialize_byok_uses_base_model_when_present():
"""Azure-style: ``model_name`` is the deployment id, ``base_model``
inside ``litellm_params`` is the canonical sku LiteLLM knows. The
helper must consult ``base_model`` first or unrecognised deployment
ids would shadow the real capability."""
row = _byok_row(
id_=3,
model_name="my-azure-deployment-id-no-litellm-knows-this",
base_model="gpt-4o",
provider=LiteLLMProvider.AZURE_OPENAI,
)
serialized = new_llm_config_routes._serialize_byok_config(row)
assert serialized.supports_image_input is True
def test_serialize_byok_returns_pydantic_read_model():
"""The route now returns ``NewLLMConfigRead`` (not the raw ORM) so
the schema additions are guaranteed to be present in the API
surface. This guards against a future regression where someone
deletes the augmentation step and falls back to ORM passthrough."""
from app.schemas import NewLLMConfigRead
row = _byok_row(id_=4, model_name="gpt-4o")
serialized = new_llm_config_routes._serialize_byok_config(row)
assert isinstance(serialized, NewLLMConfigRead)

View file

@ -1,184 +0,0 @@
"""Unit tests for ``is_premium`` derivation on the global image-gen and
vision-LLM list endpoints.
Chat globals (``GET /global-llm-configs``) already emit
``is_premium = (billing_tier == "premium")``. Image and vision did not,
which made the new-chat ``model-selector`` render the Free/Premium badge
on the Chat tab but skip it on the Image and Vision tabs (the selector
keys its badge logic off ``is_premium``). These tests pin parity:
* YAML free entry ``is_premium=False``
* YAML premium entry ``is_premium=True``
* OpenRouter dynamic premium entry ``is_premium=True``
* Auto stub (always emitted when at least one config is present)
``is_premium=False``
"""
from __future__ import annotations
import pytest
pytestmark = pytest.mark.unit
_IMAGE_FIXTURE: list[dict] = [
{
"id": -1,
"name": "DALL-E 3",
"provider": "OPENAI",
"model_name": "dall-e-3",
"api_key": "sk-test",
"billing_tier": "free",
},
{
"id": -2,
"name": "GPT-Image 1 (premium)",
"provider": "OPENAI",
"model_name": "gpt-image-1",
"api_key": "sk-test",
"billing_tier": "premium",
},
{
"id": -20_001,
"name": "google/gemini-2.5-flash-image (OpenRouter)",
"provider": "OPENROUTER",
"model_name": "google/gemini-2.5-flash-image",
"api_key": "sk-or-test",
"api_base": "https://openrouter.ai/api/v1",
"billing_tier": "premium",
},
]
_VISION_FIXTURE: list[dict] = [
{
"id": -1,
"name": "GPT-4o Vision",
"provider": "OPENAI",
"model_name": "gpt-4o",
"api_key": "sk-test",
"billing_tier": "free",
},
{
"id": -2,
"name": "Claude 3.5 Sonnet (premium)",
"provider": "ANTHROPIC",
"model_name": "claude-3-5-sonnet",
"api_key": "sk-ant-test",
"billing_tier": "premium",
},
{
"id": -30_001,
"name": "openai/gpt-4o (OpenRouter)",
"provider": "OPENROUTER",
"model_name": "openai/gpt-4o",
"api_key": "sk-or-test",
"api_base": "https://openrouter.ai/api/v1",
"billing_tier": "premium",
},
]
# =============================================================================
# Image generation
# =============================================================================
@pytest.mark.asyncio
async def test_global_image_gen_configs_emit_is_premium(monkeypatch):
"""Each emitted config must carry ``is_premium`` derived server-side
from ``billing_tier``. The Auto stub is always free.
"""
from app.config import config
from app.routes import image_generation_routes
monkeypatch.setattr(
config, "GLOBAL_IMAGE_GEN_CONFIGS", _IMAGE_FIXTURE, raising=False
)
payload = await image_generation_routes.get_global_image_gen_configs(user=None)
by_id = {c["id"]: c for c in payload}
# Auto stub is always emitted when at least one global config exists,
# and it must always declare itself free (Auto-mode billing-tier
# surfacing is a separate follow-up).
assert 0 in by_id, "Auto stub should be emitted when at least one config exists"
assert by_id[0]["is_premium"] is False
assert by_id[0]["billing_tier"] == "free"
# YAML free entry — ``is_premium=False``
assert by_id[-1]["is_premium"] is False
assert by_id[-1]["billing_tier"] == "free"
# YAML premium entry — ``is_premium=True``
assert by_id[-2]["is_premium"] is True
assert by_id[-2]["billing_tier"] == "premium"
# OpenRouter dynamic premium entry — same field, same derivation
assert by_id[-20_001]["is_premium"] is True
assert by_id[-20_001]["billing_tier"] == "premium"
# Every emitted dict (including Auto) must have the field — never missing.
for cfg in payload:
assert "is_premium" in cfg, f"is_premium missing from {cfg.get('id')}"
assert isinstance(cfg["is_premium"], bool)
@pytest.mark.asyncio
async def test_global_image_gen_configs_no_globals_no_auto_stub(monkeypatch):
"""When there are no global configs at all, the endpoint emits an
empty list (no Auto stub) Auto mode would have nothing to route to.
"""
from app.config import config
from app.routes import image_generation_routes
monkeypatch.setattr(config, "GLOBAL_IMAGE_GEN_CONFIGS", [], raising=False)
payload = await image_generation_routes.get_global_image_gen_configs(user=None)
assert payload == []
# =============================================================================
# Vision LLM
# =============================================================================
@pytest.mark.asyncio
async def test_global_vision_llm_configs_emit_is_premium(monkeypatch):
from app.config import config
from app.routes import vision_llm_routes
monkeypatch.setattr(
config, "GLOBAL_VISION_LLM_CONFIGS", _VISION_FIXTURE, raising=False
)
payload = await vision_llm_routes.get_global_vision_llm_configs(user=None)
by_id = {c["id"]: c for c in payload}
assert 0 in by_id, "Auto stub should be emitted when at least one config exists"
assert by_id[0]["is_premium"] is False
assert by_id[0]["billing_tier"] == "free"
assert by_id[-1]["is_premium"] is False
assert by_id[-1]["billing_tier"] == "free"
assert by_id[-2]["is_premium"] is True
assert by_id[-2]["billing_tier"] == "premium"
assert by_id[-30_001]["is_premium"] is True
assert by_id[-30_001]["billing_tier"] == "premium"
for cfg in payload:
assert "is_premium" in cfg, f"is_premium missing from {cfg.get('id')}"
assert isinstance(cfg["is_premium"], bool)
@pytest.mark.asyncio
async def test_global_vision_llm_configs_no_globals_no_auto_stub(monkeypatch):
from app.config import config
from app.routes import vision_llm_routes
monkeypatch.setattr(config, "GLOBAL_VISION_LLM_CONFIGS", [], raising=False)
payload = await vision_llm_routes.get_global_vision_llm_configs(user=None)
assert payload == []

View file

@ -1,106 +0,0 @@
"""Unit tests for ``supports_image_input`` derivation on the chat global
config endpoint (``GET /global-new-llm-configs``).
Resolution order (matches ``new_llm_config_routes.get_global_new_llm_configs``):
1. Explicit ``supports_image_input`` on the cfg dict (set by the YAML
loader for operator overrides, or by the OpenRouter integration from
``architecture.input_modalities``) wins.
2. ``derive_supports_image_input`` helper default-allow on unknown
models, only False when LiteLLM / OR modalities are definitive.
The flag is purely informational at the API boundary. The streaming
task safety net (``is_known_text_only_chat_model``) is the actual block,
and it requires LiteLLM to *explicitly* mark the model as text-only.
"""
from __future__ import annotations
import pytest
pytestmark = pytest.mark.unit
_FIXTURE: list[dict] = [
{
"id": -1,
"name": "GPT-4o (explicit true)",
"description": "vision-capable, explicit YAML override",
"provider": "OPENAI",
"model_name": "gpt-4o",
"api_key": "sk-test",
"billing_tier": "free",
"supports_image_input": True,
},
{
"id": -2,
"name": "DeepSeek V3 (explicit false)",
"description": "OpenRouter dynamic — modality-derived false",
"provider": "OPENROUTER",
"model_name": "deepseek/deepseek-v3.2-exp",
"api_key": "sk-or-test",
"api_base": "https://openrouter.ai/api/v1",
"billing_tier": "free",
"supports_image_input": False,
},
{
"id": -10_010,
"name": "Unannotated GPT-4o",
"description": "no flag set — resolver should derive True via LiteLLM",
"provider": "OPENAI",
"model_name": "gpt-4o",
"api_key": "sk-test",
"billing_tier": "free",
# supports_image_input intentionally absent
},
{
"id": -10_011,
"name": "Unannotated unknown model",
"description": "unmapped — default-allow True",
"provider": "CUSTOM",
"custom_provider": "brand_new_proxy",
"model_name": "brand-new-model-x9",
"api_key": "sk-test",
"billing_tier": "free",
},
]
@pytest.mark.asyncio
async def test_global_new_llm_configs_emit_supports_image_input(monkeypatch):
"""Each emitted chat config carries ``supports_image_input`` as a
bool. Explicit values win; unannotated entries are resolved via the
helper (default-allow True)."""
from app.config import config
from app.routes import new_llm_config_routes
monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", _FIXTURE, raising=False)
payload = await new_llm_config_routes.get_global_new_llm_configs(user=None)
by_id = {c["id"]: c for c in payload}
# Auto stub: optimistic True so the user can keep Auto selected with
# vision-capable deployments somewhere in the pool.
assert 0 in by_id, "Auto stub should be emitted when configs exist"
assert by_id[0]["supports_image_input"] is True
assert by_id[0]["is_auto_mode"] is True
# Explicit True is preserved.
assert by_id[-1]["supports_image_input"] is True
# Explicit False is preserved (the exact failure mode the safety net
# guards against — DeepSeek V3 over OpenRouter would 404 with "No
# endpoints found that support image input").
assert by_id[-2]["supports_image_input"] is False
# Unannotated GPT-4o: resolver consults LiteLLM, which says vision.
assert by_id[-10_010]["supports_image_input"] is True
# Unknown / unmapped model: default-allow rather than pre-judge.
assert by_id[-10_011]["supports_image_input"] is True
for cfg in payload:
assert "supports_image_input" in cfg, (
f"supports_image_input missing from {cfg.get('id')}"
)
assert isinstance(cfg["supports_image_input"], bool)

View file

@ -27,9 +27,18 @@ async def test_resolve_billing_for_auto_mode(monkeypatch):
from app.routes import image_generation_routes
from app.services.billable_calls import DEFAULT_IMAGE_RESERVE_MICROS
search_space = SimpleNamespace(image_generation_config_id=None)
async def _no_auto_candidates(*_args, **_kwargs):
return []
monkeypatch.setattr(
image_generation_routes,
"auto_model_candidates",
_no_auto_candidates,
)
search_space = SimpleNamespace(id=1, user_id=None, image_gen_model_id=None)
tier, model, reserve = await image_generation_routes._resolve_billing_for_image_gen(
session=None, # Not consumed on this code path.
session=None,
config_id=0, # IMAGE_GEN_AUTO_MODE_ID
search_space=search_space,
)
@ -45,26 +54,48 @@ async def test_resolve_billing_for_premium_global_config(monkeypatch):
monkeypatch.setattr(
config,
"GLOBAL_IMAGE_GEN_CONFIGS",
"GLOBAL_MODELS",
[
{
"id": -1,
"provider": "OPENAI",
"model_name": "gpt-image-1",
"connection_id": -101,
"model_id": "gpt-image-1",
"billing_tier": "premium",
"quota_reserve_micros": 75_000,
"catalog": {"quota_reserve_micros": 75_000},
},
{
"id": -2,
"provider": "OPENROUTER",
"model_name": "google/gemini-2.5-flash-image",
"connection_id": -102,
"model_id": "google/gemini-2.5-flash-image",
"billing_tier": "free",
"catalog": {},
},
],
raising=False,
)
monkeypatch.setattr(
config,
"GLOBAL_CONNECTIONS",
[
{
"id": -101,
"provider": "openai",
"api_key": "sk-test",
"base_url": None,
"extra": {},
},
{
"id": -102,
"provider": "openrouter",
"api_key": "sk-or-test",
"base_url": "https://openrouter.ai/api/v1",
"extra": {},
},
],
raising=False,
)
search_space = SimpleNamespace(image_generation_config_id=None)
search_space = SimpleNamespace(id=1, user_id=None, image_gen_model_id=None)
# Premium with override.
tier, model, reserve = await image_generation_routes._resolve_billing_for_image_gen(
@ -94,7 +125,7 @@ async def test_resolve_billing_for_user_owned_byok_is_free():
from app.routes import image_generation_routes
from app.services.billable_calls import DEFAULT_IMAGE_RESERVE_MICROS
search_space = SimpleNamespace(image_generation_config_id=None)
search_space = SimpleNamespace(id=1, user_id=None, image_gen_model_id=None)
tier, model, reserve = await image_generation_routes._resolve_billing_for_image_gen(
session=None, config_id=42, search_space=search_space
)
@ -105,7 +136,7 @@ async def test_resolve_billing_for_user_owned_byok_is_free():
@pytest.mark.asyncio
async def test_resolve_billing_falls_back_to_search_space_default(monkeypatch):
"""When the request omits ``image_generation_config_id``, the helper
"""When the request omits ``image_gen_model_id``, the helper
must consult the search space's default — so a search space pinned
to a premium global config still gates new requests by quota.
"""
@ -114,19 +145,34 @@ async def test_resolve_billing_falls_back_to_search_space_default(monkeypatch):
monkeypatch.setattr(
config,
"GLOBAL_IMAGE_GEN_CONFIGS",
"GLOBAL_MODELS",
[
{
"id": -7,
"provider": "OPENAI",
"model_name": "gpt-image-1",
"connection_id": -101,
"model_id": "gpt-image-1",
"billing_tier": "premium",
"catalog": {},
}
],
raising=False,
)
monkeypatch.setattr(
config,
"GLOBAL_CONNECTIONS",
[
{
"id": -101,
"provider": "openai",
"api_key": "sk-test",
"base_url": None,
"extra": {},
}
],
raising=False,
)
search_space = SimpleNamespace(image_generation_config_id=-7)
search_space = SimpleNamespace(id=1, user_id=None, image_gen_model_id=-7)
(
tier,
model,

View file

@ -1,27 +1,4 @@
"""Unit tests for ``_resolve_agent_billing_for_search_space``.
Validates the resolver used by Celery podcast/video tasks to compute
``(owner_user_id, billing_tier, base_model)`` from a search space and its
agent LLM config. The resolver mirrors chat's billing-resolution pattern at
``stream_new_chat.py:2294-2351`` and is the single integration point that
prevents Auto-mode podcast/video from leaking premium credit.
Coverage:
* Auto mode + ``thread_id`` set, pin resolves to a negative-id premium
global returns ``("premium", <base_model>)``.
* Auto mode + ``thread_id`` set, pin resolves to a negative-id free
global returns ``("free", <base_model>)``.
* Auto mode + ``thread_id`` set, pin resolves to a positive-id BYOK config
always ``"free"``.
* Auto mode + ``thread_id=None`` fallback to ``("free", "auto")`` without
hitting the pin service.
* Negative id (no Auto) uses ``get_global_llm_config``'s
``billing_tier``.
* Positive id (user BYOK) always ``"free"``.
* Search space not found raises ``ValueError``.
* ``agent_llm_id`` is None raises ``ValueError``.
"""
"""Unit tests for ``_resolve_agent_billing_for_search_space``."""
from __future__ import annotations
@ -34,11 +11,6 @@ import pytest
pytestmark = pytest.mark.unit
# ---------------------------------------------------------------------------
# Fakes
# ---------------------------------------------------------------------------
class _FakeExecResult:
def __init__(self, obj):
self._obj = obj
@ -51,14 +23,6 @@ class _FakeExecResult:
class _FakeSession:
"""Tiny AsyncSession stub.
``responses`` is a list of objects to return from successive
``execute()`` calls (in order). The resolver makes at most two
``execute()`` calls (search-space lookup, then optionally NewLLMConfig
lookup), so two queued responses cover the matrix.
"""
def __init__(self, responses: list):
self._responses = list(responses)
@ -67,9 +31,6 @@ class _FakeSession:
return _FakeExecResult(None)
return _FakeExecResult(self._responses.pop(0))
async def commit(self) -> None:
pass
@dataclass
class _FakePinResolution:
@ -78,53 +39,33 @@ class _FakePinResolution:
from_existing_pin: bool = False
def _make_search_space(*, agent_llm_id: int | None, user_id: UUID) -> SimpleNamespace:
return SimpleNamespace(
id=42,
agent_llm_id=agent_llm_id,
user_id=user_id,
)
def _make_search_space(*, chat_model_id: int | None, user_id: UUID) -> SimpleNamespace:
return SimpleNamespace(id=42, chat_model_id=chat_model_id, user_id=user_id)
def _make_byok_config(
*, id_: int, base_model: str | None = None, model_name: str = "gpt-byok"
def _make_byok_model(
*, id_: int, base_model: str | None = None, model_id: str = "gpt-byok"
) -> SimpleNamespace:
return SimpleNamespace(
id=id_,
model_name=model_name,
litellm_params={"base_model": base_model} if base_model else {},
model_id=model_id,
catalog={"base_model": base_model} if base_model else {},
connection=SimpleNamespace(enabled=True, search_space_id=42, user_id=None),
)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_auto_mode_with_thread_id_resolves_to_premium_global(monkeypatch):
"""Auto + thread → pin service resolves to negative-id premium config →
resolver returns ``("premium", <base_model>)``."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=0, user_id=user_id)])
# Mock the pin service to return a concrete premium config id.
async def _fake_resolve_pin(
sess,
*,
thread_id,
search_space_id,
user_id,
selected_llm_config_id,
force_repin_free=False,
):
assert selected_llm_config_id == 0
assert thread_id == 99
async def _fake_resolve_pin(*_args, **kwargs):
assert kwargs["selected_llm_config_id"] == 0
assert kwargs["thread_id"] == 99
return _FakePinResolution(resolved_llm_config_id=-1, resolved_tier="premium")
# Mock global config lookup to return a premium entry.
def _fake_get_global(cfg_id):
if cfg_id == -1:
return {
@ -135,8 +76,6 @@ async def test_auto_mode_with_thread_id_resolves_to_premium_global(monkeypatch):
}
return None
# Lazy imports inside the resolver — patch the *target* modules so the
# imported names resolve to our fakes.
import app.services.auto_model_pin_service as pin_module
import app.services.llm_service as llm_module
@ -154,77 +93,18 @@ async def test_auto_mode_with_thread_id_resolves_to_premium_global(monkeypatch):
assert base_model == "gpt-5.4"
@pytest.mark.asyncio
async def test_auto_mode_with_thread_id_resolves_to_free_global(monkeypatch):
"""Auto + thread → pin returns negative-id free config → resolver
returns ``("free", <base_model>)``. Same path the pin service takes for
out-of-credit users (graceful degradation)."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
async def _fake_resolve_pin(
sess,
*,
thread_id,
search_space_id,
user_id,
selected_llm_config_id,
force_repin_free=False,
):
return _FakePinResolution(resolved_llm_config_id=-3, resolved_tier="free")
def _fake_get_global(cfg_id):
if cfg_id == -3:
return {
"id": -3,
"model_name": "openrouter/free-model",
"billing_tier": "free",
"litellm_params": {"base_model": "openrouter/free-model"},
}
return None
import app.services.auto_model_pin_service as pin_module
import app.services.llm_service as llm_module
monkeypatch.setattr(
pin_module, "resolve_or_get_pinned_llm_config_id", _fake_resolve_pin
)
monkeypatch.setattr(llm_module, "get_global_llm_config", _fake_get_global)
owner, tier, base_model = await _resolve_agent_billing_for_search_space(
session, search_space_id=42, thread_id=99
)
assert owner == user_id
assert tier == "free"
assert base_model == "openrouter/free-model"
@pytest.mark.asyncio
async def test_auto_mode_with_thread_id_resolves_to_byok_is_free(monkeypatch):
"""Auto + thread → pin returns positive-id BYOK config → resolver
returns ``("free", ...)`` (BYOK is always free per
``AgentConfig.from_new_llm_config``)."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
search_space = _make_search_space(agent_llm_id=0, user_id=user_id)
byok_cfg = _make_byok_config(
id_=17, base_model="anthropic/claude-3-haiku", model_name="my-claude"
search_space = _make_search_space(chat_model_id=0, user_id=user_id)
byok_model = _make_byok_model(
id_=17, base_model="anthropic/claude-3-haiku", model_id="my-claude"
)
session = _FakeSession([search_space, byok_cfg])
session = _FakeSession([search_space, byok_model])
async def _fake_resolve_pin(
sess,
*,
thread_id,
search_space_id,
user_id,
selected_llm_config_id,
force_repin_free=False,
):
async def _fake_resolve_pin(*_args, **_kwargs):
return _FakePinResolution(resolved_llm_config_id=17, resolved_tier="free")
import app.services.auto_model_pin_service as pin_module
@ -244,13 +124,10 @@ async def test_auto_mode_with_thread_id_resolves_to_byok_is_free(monkeypatch):
@pytest.mark.asyncio
async def test_auto_mode_without_thread_id_falls_back_to_free():
"""Auto + ``thread_id=None`` → ``("free", "auto")`` without invoking
the pin service. Forward-compat fallback for any future direct-API
entrypoint that doesn't have a chat thread."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=0, user_id=user_id)])
owner, tier, base_model = await _resolve_agent_billing_for_search_space(
session, search_space_id=42, thread_id=None
@ -263,13 +140,10 @@ async def test_auto_mode_without_thread_id_falls_back_to_free():
@pytest.mark.asyncio
async def test_auto_mode_pin_failure_falls_back_to_free(monkeypatch):
"""If the pin service raises ``ValueError`` (thread missing /
mismatched search space), the resolver should log and return free
rather than killing the whole task."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=0, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=0, user_id=user_id)])
async def _fake_resolve_pin(*args, **kwargs):
raise ValueError("thread missing")
@ -291,12 +165,10 @@ async def test_auto_mode_pin_failure_falls_back_to_free(monkeypatch):
@pytest.mark.asyncio
async def test_negative_id_premium_global_returns_premium(monkeypatch):
"""Explicit negative agent_llm_id → ``get_global_llm_config`` →
return its ``billing_tier``."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=-1, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=-1, user_id=user_id)])
def _fake_get_global(cfg_id):
return {
@ -319,50 +191,15 @@ async def test_negative_id_premium_global_returns_premium(monkeypatch):
assert base_model == "gpt-5.4"
@pytest.mark.asyncio
async def test_negative_id_free_global_returns_free(monkeypatch):
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=-2, user_id=user_id)])
def _fake_get_global(cfg_id):
return {
"id": cfg_id,
"model_name": "openrouter/some-free",
"billing_tier": "free",
"litellm_params": {"base_model": "openrouter/some-free"},
}
import app.services.llm_service as llm_module
monkeypatch.setattr(llm_module, "get_global_llm_config", _fake_get_global)
owner, tier, base_model = await _resolve_agent_billing_for_search_space(
session, search_space_id=42, thread_id=None
)
assert owner == user_id
assert tier == "free"
assert base_model == "openrouter/some-free"
@pytest.mark.asyncio
async def test_negative_id_missing_base_model_falls_back_to_model_name(monkeypatch):
"""When the global config has no ``litellm_params.base_model``, the
resolver falls back to ``model_name`` matching chat's behavior."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=-5, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=-5, user_id=user_id)])
def _fake_get_global(cfg_id):
return {
"id": cfg_id,
"model_name": "fallback-model",
"billing_tier": "premium",
# No litellm_params.
}
return {"id": cfg_id, "model_name": "fallback-model", "billing_tier": "premium"}
import app.services.llm_service as llm_module
@ -378,14 +215,12 @@ async def test_negative_id_missing_base_model_falls_back_to_model_name(monkeypat
@pytest.mark.asyncio
async def test_positive_id_byok_is_always_free():
"""Positive agent_llm_id → user-owned BYOK NewLLMConfig → always free,
regardless of underlying provider tier."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
search_space = _make_search_space(agent_llm_id=23, user_id=user_id)
byok_cfg = _make_byok_config(id_=23, base_model="anthropic/claude-3.5-sonnet")
session = _FakeSession([search_space, byok_cfg])
search_space = _make_search_space(chat_model_id=23, user_id=user_id)
byok_model = _make_byok_model(id_=23, base_model="anthropic/claude-3.5-sonnet")
session = _FakeSession([search_space, byok_model])
owner, tier, base_model = await _resolve_agent_billing_for_search_space(
session, search_space_id=42
@ -398,13 +233,10 @@ async def test_positive_id_byok_is_always_free():
@pytest.mark.asyncio
async def test_positive_id_byok_missing_returns_free_with_empty_base_model():
"""If the BYOK config row is missing/deleted but the search space still
points at it, the resolver still returns free (no debit) with an empty
base_model billable_call's premium path is skipped, no harm done."""
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=99, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=99, user_id=user_id)])
owner, tier, base_model = await _resolve_agent_billing_for_search_space(
session, search_space_id=42
@ -419,18 +251,18 @@ async def test_positive_id_byok_missing_returns_free_with_empty_base_model():
async def test_search_space_not_found_raises_value_error():
from app.services.billable_calls import _resolve_agent_billing_for_search_space
session = _FakeSession([None])
with pytest.raises(ValueError, match="Search space"):
await _resolve_agent_billing_for_search_space(session, search_space_id=999)
await _resolve_agent_billing_for_search_space(
_FakeSession([None]), search_space_id=999
)
@pytest.mark.asyncio
async def test_agent_llm_id_none_raises_value_error():
async def test_chat_model_id_none_raises_value_error():
from app.services.billable_calls import _resolve_agent_billing_for_search_space
user_id = uuid4()
session = _FakeSession([_make_search_space(agent_llm_id=None, user_id=user_id)])
session = _FakeSession([_make_search_space(chat_model_id=None, user_id=user_id)])
with pytest.raises(ValueError, match="agent_llm_id"):
with pytest.raises(ValueError, match="chat_model_id"):
await _resolve_agent_billing_for_search_space(session, search_space_id=42)

View file

@ -17,8 +17,39 @@ from app.services.auto_model_pin_service import (
pytestmark = pytest.mark.unit
class _FakeRedis:
def __init__(self):
self.values: dict[str, str] = {}
self.ttls: dict[str, int] = {}
def set(self, key: str, value: str, *, ex: int | None = None):
self.values[key] = value
if ex is not None:
self.ttls[key] = ex
return True
def mget(self, keys: list[str]):
return [self.values.get(key) for key in keys]
def delete(self, *keys: str):
removed = 0
for key in keys:
if key in self.values:
removed += 1
self.values.pop(key, None)
self.ttls.pop(key, None)
return removed
def scan_iter(self, pattern: str):
prefix = pattern.removesuffix("*")
return (key for key in list(self.values) if key.startswith(prefix))
@pytest.fixture(autouse=True)
def _clear_runtime_cooldown_map():
def _clear_runtime_cooldown_map(monkeypatch):
import app.services.auto_model_pin_service as svc
monkeypatch.setattr(svc, "_runtime_cooldown_redis", _FakeRedis())
clear_runtime_cooldown()
clear_healthy()
yield
@ -32,8 +63,9 @@ class _FakeQuotaResult:
class _FakeExecResult:
def __init__(self, thread):
def __init__(self, *, thread=None, scalars=None):
self._thread = thread
self._scalars = scalars or []
def unique(self):
return self
@ -41,19 +73,71 @@ class _FakeExecResult:
def scalar_one_or_none(self):
return self._thread
def scalars(self):
return SimpleNamespace(all=lambda: self._scalars)
class _FakeSession:
def __init__(self, thread):
def __init__(self, thread, *, models=None):
self.thread = thread
self.models = models or []
self.commit_count = 0
self.execute_count = 0
async def execute(self, _stmt):
return _FakeExecResult(self.thread)
self.execute_count += 1
if self.execute_count == 1:
return _FakeExecResult(thread=self.thread)
return _FakeExecResult(scalars=self.models)
async def commit(self):
self.commit_count += 1
def _set_global_llm_configs(monkeypatch, config, configs: list[dict]):
"""Patch the new global model catalog shape from compact legacy cfg fixtures."""
connections = []
models = []
for cfg in configs:
config_id = int(cfg["id"])
connection_id = config_id - 100_000
provider = cfg.get("provider") or cfg.get("litellm_provider")
model_name = cfg["model_name"]
connections.append(
{
"id": connection_id,
"provider": provider,
"scope": "GLOBAL",
"enabled": True,
}
)
models.append(
{
"id": config_id,
"connection_id": connection_id,
"model_id": model_name,
"display_name": cfg.get("name") or model_name,
"supports_chat": cfg.get("supports_chat", True),
"supports_image_input": cfg.get("supports_image_input", True),
"supports_tools": cfg.get("supports_tools", True),
"supports_image_generation": cfg.get(
"supports_image_generation", False
),
"capabilities_override": cfg.get("capabilities_override") or {},
"billing_tier": cfg.get("billing_tier", "free"),
"catalog": {
"auto_pin_tier": cfg.get("auto_pin_tier"),
"quality_score": cfg.get("quality_score")
or cfg.get("quality_score_static"),
},
}
)
monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", configs)
monkeypatch.setattr(config, "GLOBAL_CONNECTIONS", connections)
monkeypatch.setattr(config, "GLOBAL_MODELS", models)
def _thread(
*,
search_space_id: int = 10,
@ -71,14 +155,19 @@ async def test_auto_first_turn_pins_one_model(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"},
{
"id": -2,
"litellm_provider": "openai",
"model_name": "gpt-free",
"api_key": "k1",
},
{
"id": -1,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-prem",
"api_key": "k2",
"billing_tier": "premium",
@ -111,13 +200,13 @@ async def test_premium_eligible_auto_prefers_premium_over_free(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -2,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-free",
"api_key": "k1",
"billing_tier": "free",
@ -125,7 +214,7 @@ async def test_premium_eligible_auto_prefers_premium_over_free(monkeypatch):
},
{
"id": -1,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-prem",
"api_key": "k2",
"billing_tier": "premium",
@ -154,17 +243,19 @@ async def test_premium_eligible_auto_prefers_premium_over_free(monkeypatch):
@pytest.mark.asyncio
async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
async def test_premium_eligible_auto_uses_quality_pool_not_single_preferred_model(
monkeypatch,
):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "gpt-5.1",
"api_key": "k1",
"billing_tier": "premium",
@ -173,7 +264,7 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
},
{
"id": -2,
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "gpt-5.4",
"api_key": "k2",
"billing_tier": "premium",
@ -182,12 +273,39 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
},
{
"id": -3,
"provider": "OPENROUTER",
"model_name": "openai/gpt-5.4",
"litellm_provider": "anthropic",
"model_name": "claude-opus",
"api_key": "k3",
"billing_tier": "premium",
"auto_pin_tier": "B",
"quality_score": 100,
"auto_pin_tier": "A",
"quality_score": 99,
},
{
"id": -4,
"litellm_provider": "openai",
"model_name": "gpt-5.3",
"api_key": "k4",
"billing_tier": "premium",
"auto_pin_tier": "A",
"quality_score": 98,
},
{
"id": -5,
"litellm_provider": "gemini",
"model_name": "gemini-3-pro",
"api_key": "k5",
"billing_tier": "premium",
"auto_pin_tier": "A",
"quality_score": 97,
},
{
"id": -6,
"litellm_provider": "xai",
"model_name": "grok-5",
"api_key": "k6",
"billing_tier": "premium",
"auto_pin_tier": "A",
"quality_score": 96,
},
],
)
@ -207,7 +325,7 @@ async def test_premium_eligible_auto_prefers_azure_gpt_5_4(monkeypatch):
user_id="00000000-0000-0000-0000-000000000001",
selected_llm_config_id=0,
)
assert result.resolved_llm_config_id == -2
assert result.resolved_llm_config_id in {-1, -3, -4, -5, -6}
assert result.resolved_tier == "premium"
@ -216,13 +334,13 @@ async def test_next_turn_reuses_existing_pin(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-prem",
"api_key": "k2",
"billing_tier": "premium",
@ -257,13 +375,13 @@ async def test_premium_eligible_auto_can_pin_premium(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-prem",
"api_key": "k2",
"billing_tier": "premium",
@ -295,20 +413,20 @@ async def test_premium_ineligible_auto_pins_free_only(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -2,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-free",
"api_key": "k1",
"billing_tier": "free",
},
{
"id": -1,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-prem",
"api_key": "k2",
"billing_tier": "premium",
@ -340,20 +458,20 @@ async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -2,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-free",
"api_key": "k1",
"billing_tier": "free",
},
{
"id": -1,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-prem",
"api_key": "k2",
"billing_tier": "premium",
@ -385,20 +503,20 @@ async def test_force_repin_free_switches_auto_premium_pin_to_free(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -2,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-free",
"api_key": "k1",
"billing_tier": "free",
},
{
"id": -1,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-prem",
"api_key": "k2",
"billing_tier": "premium",
@ -433,11 +551,16 @@ async def test_explicit_user_model_change_clears_pin(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-2))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"},
{
"id": -2,
"litellm_provider": "openai",
"model_name": "gpt-free",
"api_key": "k1",
},
],
)
@ -458,11 +581,16 @@ async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-999))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"},
{
"id": -2,
"litellm_provider": "openai",
"model_name": "gpt-free",
"api_key": "k1",
},
],
)
@ -487,7 +615,7 @@ async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
# ---------------------------------------------------------------------------
# Quality-aware pin selection (Auto Fastest upgrade)
# Quality-aware pin selection (Auto upgrade)
# ---------------------------------------------------------------------------
@ -498,13 +626,13 @@ async def test_health_gated_config_is_excluded_from_selection(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "venice/dead-model",
"api_key": "k1",
"billing_tier": "free",
@ -514,7 +642,7 @@ async def test_health_gated_config_is_excluded_from_selection(monkeypatch):
},
{
"id": -2,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "google/gemini-flash",
"api_key": "k1",
"billing_tier": "free",
@ -550,13 +678,13 @@ async def test_tier_a_locks_first_premium_user_skips_or(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "gpt-5",
"api_key": "k-yaml",
"billing_tier": "premium",
@ -566,7 +694,7 @@ async def test_tier_a_locks_first_premium_user_skips_or(monkeypatch):
},
{
"id": -2,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "openai/gpt-5",
"api_key": "k-or",
"billing_tier": "premium",
@ -602,13 +730,13 @@ async def test_tier_a_falls_through_to_or_when_a_pool_empty_for_user(monkeypatch
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "gpt-5",
"api_key": "k-yaml",
"billing_tier": "premium",
@ -618,7 +746,7 @@ async def test_tier_a_falls_through_to_or_when_a_pool_empty_for_user(monkeypatch
},
{
"id": -2,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "google/gemini-flash:free",
"api_key": "k-or",
"billing_tier": "free",
@ -656,7 +784,7 @@ async def test_top_k_picks_only_high_score_models(monkeypatch):
high_score_cfgs = [
{
"id": -i,
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": f"gpt-x-{i}",
"api_key": "k",
"billing_tier": "premium",
@ -668,7 +796,7 @@ async def test_top_k_picks_only_high_score_models(monkeypatch):
]
low_score_trap = {
"id": -99,
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "tiny-legacy",
"api_key": "k",
"billing_tier": "premium",
@ -676,9 +804,9 @@ async def test_top_k_picks_only_high_score_models(monkeypatch):
"quality_score": 10,
"health_gated": False,
}
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[*high_score_cfgs, low_score_trap],
)
@ -723,13 +851,13 @@ async def test_pin_reuse_survives_health_gating_for_existing_pin(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "venice/dead-model",
"api_key": "k",
"billing_tier": "premium",
@ -739,7 +867,7 @@ async def test_pin_reuse_survives_health_gating_for_existing_pin(monkeypatch):
},
{
"id": -2,
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "gpt-5",
"api_key": "k",
"billing_tier": "premium",
@ -775,13 +903,13 @@ async def test_pin_reuse_regression_existing_healthy_pin(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "gpt-5",
"api_key": "k",
"billing_tier": "premium",
@ -791,7 +919,7 @@ async def test_pin_reuse_regression_existing_healthy_pin(monkeypatch):
},
{
"id": -2,
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "gpt-5-pro",
"api_key": "k",
"billing_tier": "premium",
@ -833,13 +961,13 @@ async def test_runtime_cooled_down_pin_is_not_reused(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "google/gemma-4-26b-a4b-it:free",
"api_key": "k",
"billing_tier": "free",
@ -849,7 +977,7 @@ async def test_runtime_cooled_down_pin_is_not_reused(monkeypatch):
},
{
"id": -2,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "google/gemini-2.5-flash:free",
"api_key": "k",
"billing_tier": "free",
@ -881,18 +1009,86 @@ async def test_runtime_cooled_down_pin_is_not_reused(monkeypatch):
assert result.from_existing_pin is False
def test_mark_runtime_cooldown_writes_shared_redis(monkeypatch):
import app.services.auto_model_pin_service as svc
mark_runtime_cooldown(-9, reason="provider_rate_limited", cooldown_seconds=123)
redis_client = svc._runtime_cooldown_redis
assert redis_client.values["auto:cooldown:llm:-9"] == "provider_rate_limited"
assert redis_client.ttls["auto:cooldown:llm:-9"] == 123
@pytest.mark.asyncio
async def test_shared_runtime_cooldown_blocks_pin_across_workers(monkeypatch):
"""A Redis cooldown written by another worker should invalidate local pins."""
import app.services.auto_model_pin_service as svc
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
_set_global_llm_configs(
monkeypatch,
config,
[
{
"id": -1,
"litellm_provider": "openrouter",
"model_name": "google/gemma-4-26b-a4b-it:free",
"api_key": "k",
"billing_tier": "free",
"auto_pin_tier": "C",
"quality_score": 90,
"health_gated": False,
},
{
"id": -2,
"litellm_provider": "openrouter",
"model_name": "google/gemini-2.5-flash:free",
"api_key": "k",
"billing_tier": "free",
"auto_pin_tier": "C",
"quality_score": 80,
"health_gated": False,
},
],
)
svc._runtime_cooldown_redis.set(
"auto:cooldown:llm:-1",
"provider_rate_limited",
ex=600,
)
async def _blocked(*_args, **_kwargs):
return _FakeQuotaResult(allowed=False)
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.credit_get_usage",
_blocked,
)
result = await resolve_or_get_pinned_llm_config_id(
session,
thread_id=1,
search_space_id=10,
user_id="00000000-0000-0000-0000-000000000001",
selected_llm_config_id=0,
)
assert result.resolved_llm_config_id == -2
assert result.from_existing_pin is False
@pytest.mark.asyncio
async def test_clearing_runtime_cooldown_restores_pin_reuse(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "google/gemma-4-26b-a4b-it:free",
"api_key": "k",
"billing_tier": "free",
@ -931,13 +1127,13 @@ async def test_auto_pin_repin_excludes_previous_config_on_runtime_retry(monkeypa
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "google/gemma-4-26b-a4b-it:free",
"api_key": "k",
"billing_tier": "free",
@ -947,7 +1143,7 @@ async def test_auto_pin_repin_excludes_previous_config_on_runtime_retry(monkeypa
},
{
"id": -2,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "google/gemini-2.5-flash:free",
"api_key": "k",
"billing_tier": "free",

View file

@ -45,8 +45,9 @@ class _FakeQuotaResult:
class _FakeExecResult:
def __init__(self, thread):
def __init__(self, *, thread=None, scalars=None):
self._thread = thread
self._scalars = scalars or []
def unique(self):
return self
@ -54,14 +55,21 @@ class _FakeExecResult:
def scalar_one_or_none(self):
return self._thread
def scalars(self):
return SimpleNamespace(all=lambda: self._scalars)
class _FakeSession:
def __init__(self, thread):
self.thread = thread
self.commit_count = 0
self.execute_count = 0
async def execute(self, _stmt):
return _FakeExecResult(self.thread)
self.execute_count += 1
if self.execute_count == 1:
return _FakeExecResult(thread=self.thread)
return _FakeExecResult(scalars=[])
async def commit(self):
self.commit_count += 1
@ -71,10 +79,64 @@ def _thread(*, pinned: int | None = None):
return SimpleNamespace(id=1, search_space_id=10, pinned_llm_config_id=pinned)
def _set_global_llm_configs(monkeypatch, config, configs: list[dict]):
from app.services.provider_capabilities import derive_supports_image_input
connections = []
models = []
for cfg in configs:
config_id = int(cfg["id"])
connection_id = config_id - 100_000
provider = cfg.get("provider") or cfg.get("litellm_provider")
model_name = cfg["model_name"]
if "supports_image_input" not in cfg:
litellm_params = cfg.get("litellm_params") or {}
base_model = (
litellm_params.get("base_model")
if isinstance(litellm_params, dict)
else None
)
cfg["supports_image_input"] = derive_supports_image_input(
provider=provider,
model_name=model_name,
base_model=base_model,
custom_provider=cfg.get("custom_provider"),
)
connections.append(
{
"id": connection_id,
"provider": provider,
"scope": "GLOBAL",
"enabled": True,
}
)
model = {
"id": config_id,
"connection_id": connection_id,
"model_id": model_name,
"display_name": cfg.get("name") or model_name,
"supports_chat": cfg.get("supports_chat", True),
"supports_tools": cfg.get("supports_tools", True),
"supports_image_generation": cfg.get("supports_image_generation", False),
"capabilities_override": cfg.get("capabilities_override") or {},
"billing_tier": cfg.get("billing_tier", "free"),
"catalog": {
"auto_pin_tier": cfg.get("auto_pin_tier"),
"quality_score": cfg.get("quality_score"),
},
"supports_image_input": cfg["supports_image_input"],
}
models.append(model)
monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", configs)
monkeypatch.setattr(config, "GLOBAL_CONNECTIONS", connections)
monkeypatch.setattr(config, "GLOBAL_MODELS", models)
def _vision_cfg(id_: int, *, tier: str = "free", quality: int = 80) -> dict:
return {
"id": id_,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": f"vision-{id_}",
"api_key": "k",
"billing_tier": tier,
@ -87,7 +149,7 @@ def _vision_cfg(id_: int, *, tier: str = "free", quality: int = 80) -> dict:
def _text_only_cfg(id_: int, *, tier: str = "free", quality: int = 90) -> dict:
return {
"id": id_,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": f"text-{id_}",
"api_key": "k",
"billing_tier": tier,
@ -108,11 +170,7 @@ async def test_image_turn_filters_out_text_only_candidates(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[_text_only_cfg(-1), _vision_cfg(-2)],
)
_set_global_llm_configs(monkeypatch, config, [_text_only_cfg(-1), _vision_cfg(-2)])
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.credit_get_usage",
_premium_allowed,
@ -140,11 +198,7 @@ async def test_image_turn_force_repins_stale_text_only_pin(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned=-1))
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[_text_only_cfg(-1), _vision_cfg(-2)],
)
_set_global_llm_configs(monkeypatch, config, [_text_only_cfg(-1), _vision_cfg(-2)])
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.credit_get_usage",
_premium_allowed,
@ -172,9 +226,9 @@ async def test_image_turn_reuses_existing_vision_pin(monkeypatch):
from app.config import config
session = _FakeSession(_thread(pinned=-2))
monkeypatch.setattr(
_set_global_llm_configs(
monkeypatch,
config,
"GLOBAL_LLM_CONFIGS",
[_text_only_cfg(-1), _vision_cfg(-2), _vision_cfg(-3, quality=70)],
)
monkeypatch.setattr(
@ -203,10 +257,8 @@ async def test_image_turn_with_no_vision_candidates_raises(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[_text_only_cfg(-1), _text_only_cfg(-2)],
_set_global_llm_configs(
monkeypatch, config, [_text_only_cfg(-1), _text_only_cfg(-2)]
)
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.credit_get_usage",
@ -231,11 +283,7 @@ async def test_non_image_turn_keeps_text_only_in_pool(monkeypatch):
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[_text_only_cfg(-1)],
)
_set_global_llm_configs(monkeypatch, config, [_text_only_cfg(-1)])
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.credit_get_usage",
_premium_allowed,
@ -261,7 +309,7 @@ async def test_image_turn_unannotated_cfg_resolves_via_helper(monkeypatch):
session = _FakeSession(_thread())
cfg_unannotated_vision = {
"id": -2,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-4o", # known vision model in LiteLLM map
"api_key": "k",
"billing_tier": "free",
@ -269,7 +317,7 @@ async def test_image_turn_unannotated_cfg_resolves_via_helper(monkeypatch):
"quality_score": 80,
# NOTE: no supports_image_input key
}
monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [cfg_unannotated_vision])
_set_global_llm_configs(monkeypatch, config, [cfg_unannotated_vision])
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.credit_get_usage",
_premium_allowed,

View file

@ -1,19 +1,4 @@
"""Defense-in-depth: image-gen call sites must not let an empty
``api_base`` fall through to LiteLLM's module-global ``litellm.api_base``.
The bug repro: an OpenRouter image-gen config ships
``api_base=""``. The pre-fix call site in
``image_generation_routes._execute_image_generation`` did
``if cfg.get("api_base"): kwargs["api_base"] = cfg["api_base"]`` which
silently dropped the empty string. LiteLLM then fell back to
``litellm.api_base`` (commonly inherited from ``AZURE_OPENAI_ENDPOINT``)
and OpenRouter's ``image_generation/transformation`` appended
``/chat/completions`` to it 404 ``Resource not found``.
This test pins the post-fix behaviour: with an empty ``api_base`` in
the config, the call site MUST set ``api_base`` to OpenRouter's public
URL instead of leaving it unset.
"""
"""Image-gen call sites must pass each config's explicit ``api_base``."""
from __future__ import annotations
@ -26,22 +11,23 @@ pytestmark = pytest.mark.unit
@pytest.mark.asyncio
async def test_global_openrouter_image_gen_sets_api_base_when_config_empty():
"""The global-config branch (``config_id < 0``) of
``_execute_image_generation`` must apply the resolver and pin
``api_base`` to OpenRouter when the config ships an empty string.
"""
async def test_global_openrouter_image_gen_sets_explicit_api_base():
"""The global-config branch forwards the explicit OpenRouter base."""
from app.routes import image_generation_routes
cfg = {
global_model = {
"id": -20_001,
"name": "GPT Image 1 (OpenRouter)",
"provider": "OPENROUTER",
"model_name": "openai/gpt-image-1",
"connection_id": -101,
"model_id": "openai/gpt-image-1",
"supports_image_generation": True,
"capabilities_override": {},
}
global_connection = {
"id": -101,
"provider": "openrouter",
"api_key": "sk-or-test",
"api_base": "", # the original bug shape
"api_version": None,
"litellm_params": {},
"base_url": "https://openrouter.ai/api/v1",
"extra": {},
}
captured: dict = {}
@ -51,7 +37,7 @@ async def test_global_openrouter_image_gen_sets_api_base_when_config_empty():
return MagicMock(model_dump=lambda: {"data": []}, _hidden_params={})
image_gen = MagicMock()
image_gen.image_generation_config_id = cfg["id"]
image_gen.image_gen_model_id = global_model["id"]
image_gen.prompt = "test"
image_gen.n = 1
image_gen.quality = None
@ -61,14 +47,19 @@ async def test_global_openrouter_image_gen_sets_api_base_when_config_empty():
image_gen.model = None
search_space = MagicMock()
search_space.image_generation_config_id = cfg["id"]
search_space.image_gen_model_id = global_model["id"]
session = MagicMock()
with (
patch.object(
image_generation_routes,
"_get_global_image_gen_config",
return_value=cfg,
"_get_global_model",
return_value=global_model,
),
patch.object(
image_generation_routes,
"_get_global_connection",
return_value=global_connection,
),
patch.object(
image_generation_routes,
@ -80,30 +71,31 @@ async def test_global_openrouter_image_gen_sets_api_base_when_config_empty():
session=session, image_gen=image_gen, search_space=search_space
)
# The whole point of the fix: even with empty ``api_base`` in the
# config, we forward OpenRouter's public URL so the call doesn't
# inherit an Azure endpoint.
assert captured.get("api_base") == "https://openrouter.ai/api/v1"
assert captured["model"] == "openrouter/openai/gpt-image-1"
@pytest.mark.asyncio
async def test_generate_image_tool_global_sets_api_base_when_config_empty():
"""Same defense at the agent tool entry point — both surfaces share
async def test_generate_image_tool_global_sets_explicit_api_base():
"""Same explicit-base behavior at the agent tool entry point — both surfaces share
the same OpenRouter config payloads."""
from app.agents.chat.multi_agent_chat.subagents.builtins.deliverables.tools import (
generate_image as gi_module,
)
cfg = {
global_model = {
"id": -20_001,
"name": "GPT Image 1 (OpenRouter)",
"provider": "OPENROUTER",
"model_name": "openai/gpt-image-1",
"connection_id": -101,
"model_id": "openai/gpt-image-1",
"supports_image_generation": True,
"capabilities_override": {},
}
global_connection = {
"id": -101,
"provider": "openrouter",
"api_key": "sk-or-test",
"api_base": "",
"api_version": None,
"litellm_params": {},
"base_url": "https://openrouter.ai/api/v1",
"extra": {},
}
captured: dict = {}
@ -119,7 +111,7 @@ async def test_generate_image_tool_global_sets_api_base_when_config_empty():
search_space = MagicMock()
search_space.id = 1
search_space.image_generation_config_id = cfg["id"]
search_space.image_gen_model_id = global_model["id"]
session_cm = AsyncMock()
session = AsyncMock()
@ -142,7 +134,10 @@ async def test_generate_image_tool_global_sets_api_base_when_config_empty():
with (
patch.object(gi_module, "shielded_async_session", return_value=session_cm),
patch.object(gi_module, "_get_global_image_gen_config", return_value=cfg),
patch.object(gi_module, "_get_global_model", return_value=global_model),
patch.object(
gi_module, "_get_global_connection", return_value=global_connection
),
patch.object(
gi_module, "aimage_generation", side_effect=fake_aimage_generation
),
@ -171,20 +166,16 @@ async def test_generate_image_tool_global_sets_api_base_when_config_empty():
assert captured["model"] == "openrouter/openai/gpt-image-1"
def test_image_gen_router_deployment_sets_api_base_when_config_empty():
"""The Auto-mode router pool must also resolve ``api_base`` when an
OpenRouter config ships an empty string. The deployment dict is fed
straight to ``litellm.Router``, so a missing ``api_base`` would
leak the same way as the direct call sites.
"""
def test_image_gen_router_deployment_sets_explicit_api_base():
"""The Auto-mode router pool carries explicit api_base into deployments."""
from app.services.image_gen_router_service import ImageGenRouterService
deployment = ImageGenRouterService._config_to_deployment(
{
"model_name": "openai/gpt-image-1",
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"api_key": "sk-or-test",
"api_base": "",
"api_base": "https://openrouter.ai/api/v1",
}
)
assert deployment is not None

View file

@ -25,10 +25,10 @@ def _fake_yaml_config(
return {
"id": id,
"name": f"yaml-{id}",
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": model_name,
"api_key": "sk-test",
"api_base": "",
"api_base": "https://api.openai.com/v1",
"billing_tier": billing_tier,
"rpm": 100,
"tpm": 100_000,
@ -54,10 +54,10 @@ def _fake_openrouter_config(
return {
"id": id,
"name": f"or-{id}",
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": model_name,
"api_key": "sk-or-test",
"api_base": "",
"api_base": "https://openrouter.ai/api/v1",
"billing_tier": billing_tier,
"rpm": 20 if billing_tier == "free" else 200,
"tpm": 100_000 if billing_tier == "free" else 1_000_000,
@ -217,10 +217,64 @@ def test_auto_model_pin_candidates_include_dynamic_openrouter():
model_name="meta-llama/llama-3.3-70b:free",
billing_tier="free",
)
original = config.GLOBAL_LLM_CONFIGS
global_connections = [
{
"id": -110_001,
"provider": "openrouter",
"scope": "GLOBAL",
"enabled": True,
},
{
"id": -110_002,
"provider": "openrouter",
"scope": "GLOBAL",
"enabled": True,
},
]
global_models = [
{
"id": or_premium["id"],
"connection_id": -110_001,
"model_id": or_premium["model_name"],
"display_name": or_premium["name"],
"supports_chat": True,
"supports_image_input": True,
"supports_tools": True,
"supports_image_generation": False,
"capabilities_override": {},
"billing_tier": or_premium["billing_tier"],
"catalog": {
"auto_pin_tier": "A",
"quality_score": 50,
},
},
{
"id": or_free["id"],
"connection_id": -110_002,
"model_id": or_free["model_name"],
"display_name": or_free["name"],
"supports_chat": True,
"supports_image_input": True,
"supports_tools": True,
"supports_image_generation": False,
"capabilities_override": {},
"billing_tier": or_free["billing_tier"],
"catalog": {
"auto_pin_tier": "A",
"quality_score": 50,
},
},
]
original_configs = config.GLOBAL_LLM_CONFIGS
original_connections = config.GLOBAL_CONNECTIONS
original_models = config.GLOBAL_MODELS
try:
config.GLOBAL_LLM_CONFIGS = [or_premium, or_free]
config.GLOBAL_CONNECTIONS = global_connections
config.GLOBAL_MODELS = global_models
candidate_ids = {c["id"] for c in _global_candidates()}
assert candidate_ids == {-10_001, -10_002}
finally:
config.GLOBAL_LLM_CONFIGS = original
config.GLOBAL_LLM_CONFIGS = original_configs
config.GLOBAL_CONNECTIONS = original_connections
config.GLOBAL_MODELS = original_models

View file

@ -0,0 +1,78 @@
from app.services.global_model_catalog import materialize_global_model_catalog
from app.services.model_resolver import ensure_v1, to_litellm
def test_openai_compatible_resolver_uses_explicit_api_base() -> None:
model, kwargs = to_litellm(
{
"protocol": "OPENAI_COMPATIBLE",
"provider": "openai",
"base_url": "http://host.docker.internal:1234/v1",
"api_key": "local-key",
"extra": {},
},
"qwen/qwen3",
)
assert model == "openai/qwen/qwen3"
assert kwargs["api_base"] == "http://host.docker.internal:1234/v1"
assert kwargs["api_key"] == "local-key"
assert ensure_v1("http://example.com/v1") == "http://example.com/v1"
def test_ollama_resolver_uses_native_api_base() -> None:
model, kwargs = to_litellm(
{
"protocol": "OLLAMA",
"provider": "ollama_chat",
"base_url": "http://host.docker.internal:11434",
"api_key": None,
"extra": {},
},
"llama3.2",
)
assert model == "ollama_chat/llama3.2"
assert kwargs["api_base"] == "http://host.docker.internal:11434"
def test_global_materialization_preserves_tier_and_keeps_key_server_side() -> None:
connections, models = materialize_global_model_catalog(
chat_configs=[
{
"id": -101,
"name": "OpenRouter Free",
"litellm_provider": "openrouter",
"model_name": "meta-llama/llama-3.1-8b-instruct:free",
"api_key": "sk-global-secret",
"api_base": "https://openrouter.ai/api/v1",
"billing_tier": "free",
"anonymous_enabled": True,
"seo_enabled": True,
"rpm": 10,
"tpm": 1000,
},
{
"id": -102,
"name": "OpenRouter Premium",
"litellm_provider": "openrouter",
"model_name": "anthropic/claude-sonnet-4",
"api_key": "sk-global-secret",
"api_base": "https://openrouter.ai/api/v1",
"billing_tier": "premium",
},
],
image_configs=[],
)
assert len(connections) == 1
assert connections[0]["api_key"] == "sk-global-secret"
assert {model["billing_tier"] for model in models} == {"free", "premium"}
assert models[0]["catalog"]["anonymous_enabled"] is True
assert models[0]["catalog"]["rpm"] == 10
public_connections = [
{key: value for key, value in connection.items() if key != "api_key"}
for connection in connections
]
assert "sk-" not in repr(public_connections)

View file

@ -217,7 +217,7 @@ def test_generate_configs_drops_non_text_and_non_tool_models():
# ---------------------------------------------------------------------------
# _generate_image_gen_configs / _generate_vision_llm_configs
# _generate_image_gen_configs
# ---------------------------------------------------------------------------
@ -263,18 +263,15 @@ def test_generate_image_gen_configs_filters_by_image_output():
# Each config must carry ``billing_tier`` for routing in image_generation_routes.
for c in cfgs:
assert c["billing_tier"] in {"free", "premium"}
assert c["provider"] == "OPENROUTER"
assert c["provider"] == "openrouter"
assert c[_OPENROUTER_DYNAMIC_MARKER] is True
# Defense-in-depth: emit the OpenRouter base URL at source so a
# downstream call site that forgets ``resolve_api_base`` still
# doesn't 404 against an inherited Azure endpoint.
# Emit the OpenRouter base URL at source so every call path passes an
# explicit api_base and cannot inherit a process-global endpoint.
assert c["api_base"] == "https://openrouter.ai/api/v1"
def test_generate_image_gen_configs_assigns_image_id_offset():
"""Image configs use a different id_offset (-20000) so their negative
IDs don't collide with chat configs (-10000) or vision configs (-30000).
"""
"""Image configs use their own id_offset (-20000)."""
from app.services.openrouter_integration_service import (
_generate_image_gen_configs,
)
@ -291,90 +288,3 @@ def test_generate_image_gen_configs_assigns_image_id_offset():
cfgs = _generate_image_gen_configs(raw, dict(_SETTINGS_BASE))
assert all(c["id"] < -20_000 + 1 for c in cfgs)
assert all(c["id"] > -29_000_000 for c in cfgs)
def test_generate_vision_llm_configs_filters_by_image_input_text_output():
"""Vision LLMs must accept image input AND emit text — pure image-gen
(no text out) and text-only (no image in) models are excluded.
"""
from app.services.openrouter_integration_service import (
_generate_vision_llm_configs,
)
raw = [
# GPT-4o: vision LLM (image in, text out) — must emit.
{
"id": "openai/gpt-4o",
"architecture": {
"input_modalities": ["text", "image"],
"output_modalities": ["text"],
},
"context_length": 128_000,
"pricing": {"prompt": "0.000005", "completion": "0.000015"},
},
# Pure image generator — image *output*, no text out. Must NOT emit.
{
"id": "openai/gpt-image-1",
"architecture": {
"input_modalities": ["text"],
"output_modalities": ["image"],
},
"context_length": 4_000,
"pricing": {"prompt": "0", "completion": "0"},
},
# Pure text model (no image in). Must NOT emit.
{
"id": "anthropic/claude-3-haiku",
"architecture": {
"input_modalities": ["text"],
"output_modalities": ["text"],
},
"context_length": 200_000,
"pricing": {"prompt": "0.000001", "completion": "0.000005"},
},
]
cfgs = _generate_vision_llm_configs(raw, dict(_SETTINGS_BASE))
names = {c["model_name"] for c in cfgs}
assert names == {"openai/gpt-4o"}
cfg = cfgs[0]
assert cfg["billing_tier"] == "premium"
# Pricing carried inline so pricing_registration can register vision
# under ``openrouter/openai/gpt-4o`` even if the chat catalogue cache
# is cleared.
assert cfg["input_cost_per_token"] == pytest.approx(5e-6)
assert cfg["output_cost_per_token"] == pytest.approx(15e-6)
assert cfg[_OPENROUTER_DYNAMIC_MARKER] is True
# Defense-in-depth: emit the OpenRouter base URL at source so a
# downstream call site that forgets ``resolve_api_base`` still
# doesn't inherit an Azure endpoint.
assert cfg["api_base"] == "https://openrouter.ai/api/v1"
def test_generate_vision_llm_configs_drops_chat_only_filters():
"""A small-context vision model that doesn't advertise tool calling is
still a valid vision LLM for "describe this image" prompts. The chat
filters (``supports_tool_calling``, ``has_sufficient_context``) must
NOT be applied to vision emission.
"""
from app.services.openrouter_integration_service import (
_generate_vision_llm_configs,
)
raw = [
{
"id": "tiny/vision-mini",
"architecture": {
"input_modalities": ["text", "image"],
"output_modalities": ["text"],
},
"supported_parameters": [], # no tools
"context_length": 4_000, # well below MIN_CONTEXT_LENGTH
"pricing": {"prompt": "0.0000001", "completion": "0.0000005"},
}
]
cfgs = _generate_vision_llm_configs(raw, dict(_SETTINGS_BASE))
assert len(cfgs) == 1
assert cfgs[0]["model_name"] == "tiny/vision-mini"

View file

@ -25,7 +25,7 @@ def _or_cfg(
) -> dict:
return {
"id": cid,
"provider": "OPENROUTER",
"provider": "openrouter",
"model_name": model_name,
"billing_tier": tier,
"auto_pin_tier": "B" if tier == "premium" else "C",
@ -144,7 +144,7 @@ async def test_enrich_health_only_touches_or_provider(monkeypatch):
"""YAML cfgs that aren't OPENROUTER must be skipped entirely."""
yaml_cfg = {
"id": -1,
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "gpt-5",
"billing_tier": "premium",
"auto_pin_tier": "A",
@ -313,7 +313,7 @@ async def test_enrich_health_no_or_cfgs_is_noop(monkeypatch):
"""When the catalogue has no OR cfgs at all, no HTTP calls fire."""
yaml_cfg: dict[str, Any] = {
"id": -1,
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "gpt-5",
"billing_tier": "premium",
}

View file

@ -186,7 +186,7 @@ def test_openrouter_models_register_under_aliases(monkeypatch):
[
{
"id": 1,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "anthropic/claude-3-5-sonnet",
}
],
@ -228,7 +228,7 @@ def test_yaml_override_registers_under_alias_set(monkeypatch):
[
{
"id": 1,
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "gpt-5.4",
"litellm_params": {
"base_model": "gpt-5.4",
@ -243,7 +243,6 @@ def test_yaml_override_registers_under_alias_set(monkeypatch):
keys = spy.all_keys
assert "gpt-5.4" in keys
assert "azure_openai/gpt-5.4" in keys
assert "azure/gpt-5.4" in keys
payload = spy.calls[0]
@ -271,7 +270,7 @@ def test_no_override_means_no_registration(monkeypatch):
[
{
"id": 1,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "gpt-4o",
"litellm_params": {"base_model": "gpt-4o"},
}
@ -302,7 +301,7 @@ def test_openrouter_skipped_when_pricing_missing(monkeypatch):
[
{
"id": 1,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "anthropic/claude-3-5-sonnet",
}
],
@ -349,12 +348,12 @@ def test_register_continues_after_individual_failure(monkeypatch, caplog):
[
{
"id": 1,
"provider": "OPENROUTER",
"litellm_provider": "openrouter",
"model_name": "anthropic/claude-3-5-sonnet",
},
{
"id": 2,
"provider": "OPENAI",
"litellm_provider": "openai",
"model_name": "custom-deployment",
"litellm_params": {
"base_model": "custom-deployment",
@ -369,79 +368,3 @@ def test_register_continues_after_individual_failure(monkeypatch, caplog):
# The good config still registered.
assert any("custom-deployment" in payload for payload in successful_calls)
def test_vision_configs_registered_with_chat_shape(monkeypatch):
"""``register_pricing_from_global_configs`` walks
``GLOBAL_VISION_LLM_CONFIGS`` in addition to the chat configs so vision
calls (during indexing) bill correctly. Vision configs use the same
chat-shape token prices, but image-gen pricing is intentionally NOT
registered here (handled via ``response_cost`` in LiteLLM).
"""
from app.config import config
from app.services.pricing_registration import register_pricing_from_global_configs
spy = _patch_register(monkeypatch)
_patch_openrouter_pricing(
monkeypatch,
{"openai/gpt-4o": {"prompt": "0.000005", "completion": "0.000015"}},
)
# No chat configs — only vision. Proves the vision walk is a separate
# iteration, not piggy-backed on the chat list.
monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [])
monkeypatch.setattr(
config,
"GLOBAL_VISION_LLM_CONFIGS",
[
{
"id": -1,
"provider": "OPENROUTER",
"model_name": "openai/gpt-4o",
"billing_tier": "premium",
"input_cost_per_token": 5e-6,
"output_cost_per_token": 15e-6,
}
],
)
register_pricing_from_global_configs()
assert "openrouter/openai/gpt-4o" in spy.all_keys
payload_value = spy.calls[0]["openrouter/openai/gpt-4o"]
assert payload_value["mode"] == "chat"
assert payload_value["litellm_provider"] == "openrouter"
assert payload_value["input_cost_per_token"] == pytest.approx(5e-6)
assert payload_value["output_cost_per_token"] == pytest.approx(15e-6)
def test_vision_with_inline_pricing_when_or_cache_missing(monkeypatch):
"""If the OpenRouter pricing cache misses a vision model (different
catalogue surface), the vision walk falls back to inline
``input_cost_per_token``/``output_cost_per_token`` on the cfg itself.
"""
from app.config import config
from app.services.pricing_registration import register_pricing_from_global_configs
spy = _patch_register(monkeypatch)
_patch_openrouter_pricing(monkeypatch, {})
monkeypatch.setattr(config, "GLOBAL_LLM_CONFIGS", [])
monkeypatch.setattr(
config,
"GLOBAL_VISION_LLM_CONFIGS",
[
{
"id": -1,
"provider": "OPENROUTER",
"model_name": "google/gemini-2.5-flash",
"billing_tier": "premium",
"input_cost_per_token": 1e-6,
"output_cost_per_token": 4e-6,
}
],
)
register_pricing_from_global_configs()
assert "openrouter/google/gemini-2.5-flash" in spy.all_keys

View file

@ -1,107 +0,0 @@
"""Unit tests for the shared ``api_base`` resolver.
The cascade exists so vision and image-gen call sites can't silently
inherit ``litellm.api_base`` (commonly set by ``AZURE_OPENAI_ENDPOINT``)
when an OpenRouter / Groq / etc. config ships an empty string. See
``provider_api_base`` module docstring for the original repro
(OpenRouter image-gen 404-ing against an Azure endpoint).
"""
from __future__ import annotations
import pytest
from app.services.provider_api_base import (
PROVIDER_DEFAULT_API_BASE,
PROVIDER_KEY_DEFAULT_API_BASE,
resolve_api_base,
)
pytestmark = pytest.mark.unit
def test_config_value_wins_over_defaults():
"""A non-empty config value is always returned verbatim, even when the
provider has a default the operator gets the last word."""
result = resolve_api_base(
provider="OPENROUTER",
provider_prefix="openrouter",
config_api_base="https://my-openrouter-mirror.example.com/v1",
)
assert result == "https://my-openrouter-mirror.example.com/v1"
def test_provider_key_default_when_config_missing():
"""``DEEPSEEK`` shares the ``openai`` LiteLLM prefix but has its own
base URL the provider-key map must take precedence over the prefix
map so DeepSeek requests don't go to OpenAI."""
result = resolve_api_base(
provider="DEEPSEEK",
provider_prefix="openai",
config_api_base=None,
)
assert result == PROVIDER_KEY_DEFAULT_API_BASE["DEEPSEEK"]
def test_provider_prefix_default_when_no_key_default():
result = resolve_api_base(
provider="OPENROUTER",
provider_prefix="openrouter",
config_api_base=None,
)
assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
def test_unknown_provider_returns_none():
"""When neither map matches we return ``None`` so the caller can let
LiteLLM apply its own provider-integration default (Azure deployment
URL, custom-provider URL, etc.)."""
result = resolve_api_base(
provider="SOMETHING_NEW",
provider_prefix="something_new",
config_api_base=None,
)
assert result is None
def test_empty_string_config_treated_as_missing():
"""The original bug: OpenRouter dynamic configs ship ``api_base=""``
and downstream call sites use ``if cfg.get("api_base"):`` empty
strings are falsy in Python but the cascade has to step in anyway."""
result = resolve_api_base(
provider="OPENROUTER",
provider_prefix="openrouter",
config_api_base="",
)
assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
def test_whitespace_only_config_treated_as_missing():
"""A config value of ``" "`` is a configuration mistake — treat it
as missing instead of forwarding whitespace to LiteLLM (which would
almost certainly 404)."""
result = resolve_api_base(
provider="OPENROUTER",
provider_prefix="openrouter",
config_api_base=" ",
)
assert result == PROVIDER_DEFAULT_API_BASE["openrouter"]
def test_provider_case_insensitive():
"""Some call sites pass the provider lowercase (DB enum value), others
uppercase (YAML key). Both must resolve."""
upper = resolve_api_base(
provider="DEEPSEEK", provider_prefix="openai", config_api_base=None
)
lower = resolve_api_base(
provider="deepseek", provider_prefix="openai", config_api_base=None
)
assert upper == lower == PROVIDER_KEY_DEFAULT_API_BASE["DEEPSEEK"]
def test_all_inputs_none_returns_none():
assert (
resolve_api_base(provider=None, provider_prefix=None, config_api_base=None)
is None
)

View file

@ -32,7 +32,7 @@ pytestmark = pytest.mark.unit
def test_or_modalities_with_image_returns_true():
assert (
derive_supports_image_input(
provider="OPENROUTER",
provider="openrouter",
model_name="openai/gpt-4o",
openrouter_input_modalities=["text", "image"],
)
@ -43,7 +43,7 @@ def test_or_modalities_with_image_returns_true():
def test_or_modalities_text_only_returns_false():
assert (
derive_supports_image_input(
provider="OPENROUTER",
provider="openrouter",
model_name="deepseek/deepseek-v3.2-exp",
openrouter_input_modalities=["text"],
)
@ -57,7 +57,7 @@ def test_or_modalities_empty_list_returns_false():
to LiteLLM."""
assert (
derive_supports_image_input(
provider="OPENROUTER",
provider="openrouter",
model_name="weird/empty-modalities",
openrouter_input_modalities=[],
)
@ -70,7 +70,7 @@ def test_or_modalities_none_falls_through_to_litellm():
to LiteLLM. Using ``openai/gpt-4o`` which is in LiteLLM's map."""
assert (
derive_supports_image_input(
provider="OPENAI",
provider="openai",
model_name="gpt-4o",
openrouter_input_modalities=None,
)
@ -86,7 +86,7 @@ def test_or_modalities_none_falls_through_to_litellm():
def test_litellm_known_vision_model_returns_true():
assert (
derive_supports_image_input(
provider="OPENAI",
provider="openai",
model_name="gpt-4o",
)
is True
@ -100,7 +100,7 @@ def test_litellm_base_model_wins_over_model_name():
doesn't know) would shadow the real capability."""
assert (
derive_supports_image_input(
provider="AZURE_OPENAI",
provider="azure",
model_name="my-azure-deployment-id",
base_model="gpt-4o",
)
@ -112,7 +112,7 @@ def test_litellm_unknown_model_default_allows():
"""Default-allow on unknown — the safety net is the actual block."""
assert (
derive_supports_image_input(
provider="CUSTOM",
provider="custom",
model_name="brand-new-model-x9-unmapped",
custom_provider="brand_new_proxy",
)
@ -128,7 +128,7 @@ def test_litellm_known_text_only_returns_false():
# Sanity: confirm the helper's negative path. We use a small model
# known not to support vision per the map.
result = derive_supports_image_input(
provider="DEEPSEEK",
provider="openai",
model_name="deepseek-chat",
)
# We accept either False (LiteLLM said explicit no) or True
@ -147,7 +147,7 @@ def test_litellm_known_text_only_returns_false():
def test_is_known_text_only_returns_false_for_vision_model():
assert (
is_known_text_only_chat_model(
provider="OPENAI",
provider="openai",
model_name="gpt-4o",
)
is False
@ -160,7 +160,7 @@ def test_is_known_text_only_returns_false_for_unknown_model():
fixing."""
assert (
is_known_text_only_chat_model(
provider="CUSTOM",
provider="custom",
model_name="brand-new-model-x9-unmapped",
custom_provider="brand_new_proxy",
)
@ -181,7 +181,7 @@ def test_is_known_text_only_returns_false_when_lookup_raises(monkeypatch):
assert (
is_known_text_only_chat_model(
provider="OPENAI",
provider="openai",
model_name="gpt-4o",
)
is False
@ -201,7 +201,7 @@ def test_is_known_text_only_returns_true_on_explicit_false(monkeypatch):
assert (
is_known_text_only_chat_model(
provider="OPENAI",
provider="openai",
model_name="any-model",
)
is True
@ -218,7 +218,7 @@ def test_is_known_text_only_returns_false_on_supports_vision_true(monkeypatch):
assert (
is_known_text_only_chat_model(
provider="OPENAI",
provider="openai",
model_name="any-model",
)
is False
@ -237,7 +237,7 @@ def test_is_known_text_only_returns_false_on_missing_key(monkeypatch):
assert (
is_known_text_only_chat_model(
provider="OPENAI",
provider="openai",
model_name="any-model",
)
is False

View file

@ -1,4 +1,4 @@
"""Unit tests for the Auto (Fastest) quality scoring module."""
"""Unit tests for the Auto quality scoring module."""
from __future__ import annotations
@ -228,7 +228,7 @@ def test_static_score_or_recent_release_beats_year_old_same_provider():
def test_static_score_yaml_includes_operator_bonus():
cfg = {
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "gpt-5",
"litellm_params": {"base_model": "azure/gpt-5"},
}
@ -238,7 +238,7 @@ def test_static_score_yaml_includes_operator_bonus():
def test_static_score_yaml_unknown_provider_still_carries_bonus():
cfg = {
"provider": "SOME_NEW_PROVIDER",
"litellm_provider": "some_new_provider",
"model_name": "weird-model",
}
score = static_score_yaml(cfg)
@ -247,7 +247,7 @@ def test_static_score_yaml_unknown_provider_still_carries_bonus():
def test_static_score_yaml_clamped_0_to_100():
cfg = {
"provider": "AZURE_OPENAI",
"litellm_provider": "azure",
"model_name": "gpt-5",
"litellm_params": {"base_model": "azure/gpt-5"},
}

View file

@ -131,6 +131,10 @@ def test_serialized_calls_includes_cost_micros():
assert serialized == [
{
"model": "m",
"model_ref": None,
"model_id": None,
"display_name": None,
"provider": None,
"prompt_tokens": 1,
"completion_tokens": 1,
"total_tokens": 2,

View file

@ -1,89 +0,0 @@
"""Defense-in-depth: vision-LLM resolution must not leak ``api_base``
defaults from ``litellm.api_base`` either.
Vision shares the same shape as image-gen global YAML / OpenRouter
dynamic configs ship ``api_base=""`` and the pre-fix ``get_vision_llm``
call sites would silently drop the empty string and inherit
``AZURE_OPENAI_ENDPOINT``. ``ChatLiteLLM(...)`` doesn't 404 on
construction so we test the kwargs we hand to it instead.
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
pytestmark = pytest.mark.unit
@pytest.mark.asyncio
async def test_get_vision_llm_global_openrouter_sets_api_base():
"""Global negative-ID branch: an OpenRouter vision config with
``api_base=""`` must end up calling ``SanitizedChatLiteLLM`` with
``api_base="https://openrouter.ai/api/v1"`` never an empty string,
never silently absent."""
from app.services import llm_service
cfg = {
"id": -30_001,
"name": "GPT-4o Vision (OpenRouter)",
"provider": "OPENROUTER",
"model_name": "openai/gpt-4o",
"api_key": "sk-or-test",
"api_base": "",
"api_version": None,
"litellm_params": {},
"billing_tier": "free",
}
search_space = MagicMock()
search_space.id = 1
search_space.user_id = "user-x"
search_space.vision_llm_config_id = cfg["id"]
session = AsyncMock()
scalars = MagicMock()
scalars.first.return_value = search_space
result = MagicMock()
result.scalars.return_value = scalars
session.execute.return_value = result
captured: dict = {}
class FakeSanitized:
def __init__(self, **kwargs):
captured.update(kwargs)
with (
patch(
"app.services.vision_llm_router_service.get_global_vision_llm_config",
return_value=cfg,
),
patch(
"app.agents.chat.runtime.llm_config.SanitizedChatLiteLLM",
new=FakeSanitized,
),
):
await llm_service.get_vision_llm(session=session, search_space_id=1)
assert captured.get("api_base") == "https://openrouter.ai/api/v1"
assert captured["model"] == "openrouter/openai/gpt-4o"
def test_vision_router_deployment_sets_api_base_when_config_empty():
"""Auto-mode vision router: deployments are fed to ``litellm.Router``,
so the resolver has to apply at deployment construction time too."""
from app.services.vision_llm_router_service import VisionLLMRouterService
deployment = VisionLLMRouterService._config_to_deployment(
{
"model_name": "openai/gpt-4o",
"provider": "OPENROUTER",
"api_key": "sk-or-test",
"api_base": "",
}
)
assert deployment is not None
assert deployment["litellm_params"]["api_base"] == "https://openrouter.ai/api/v1"
assert deployment["litellm_params"]["model"] == "openrouter/openai/gpt-4o"

View file

@ -0,0 +1,79 @@
from __future__ import annotations
import pytest
from app.services.llm_error_adapter import LLMErrorCategory, adapt_llm_exception
from app.tasks.chat.streaming.errors.classifier import classify_stream_exception
pytestmark = pytest.mark.unit
def _exception_named(name: str, message: str) -> Exception:
return type(name, (Exception,), {})(message)
def test_adapter_classifies_authentication_error_by_class_name() -> None:
exc = _exception_named("AuthenticationError", "provider rejected credentials")
adapted = adapt_llm_exception(exc)
assert adapted.category is LLMErrorCategory.AUTH_FAILED
assert adapted.retryable is False
assert adapted.user_message == "LLM authentication failed. Check your API key."
def test_adapter_classifies_embedded_provider_401_payload() -> None:
exc = RuntimeError(
'litellm.AuthenticationError: OpenrouterException - {"error":{"message":"User not found.","code":401}}'
)
adapted = adapt_llm_exception(exc)
assert adapted.category is LLMErrorCategory.AUTH_FAILED
assert adapted.provider_status_code == 401
def test_adapter_preserves_rate_limit_classification() -> None:
exc = RuntimeError('{"error":{"message":"Slow down","code":429}}')
adapted = adapt_llm_exception(exc)
assert adapted.category is LLMErrorCategory.RATE_LIMITED
assert adapted.retryable is True
def test_stream_classifier_maps_model_auth_to_stable_code() -> None:
exc = RuntimeError(
'litellm.AuthenticationError: OpenrouterException - {"error":{"message":"User not found.","code":401}}'
)
kind, code, severity, expected, message, extra = classify_stream_exception(
exc,
flow_label="chat",
)
assert kind == "model_auth_failed"
assert code == "MODEL_AUTH_FAILED"
assert severity == "warn"
assert expected is True
assert "API key" in message
assert extra == {
"provider_error_category": "auth_failed",
"provider_status_code": 401,
}
def test_stream_classifier_keeps_unknown_errors_generic() -> None:
exc = RuntimeError("database exploded")
kind, code, severity, expected, message, extra = classify_stream_exception(
exc,
flow_label="chat",
)
assert kind == "server_error"
assert code == "SERVER_ERROR"
assert severity == "error"
assert expected is False
assert message == "Error during chat: database exploded"
assert extra is None

View file

@ -0,0 +1,61 @@
"""Unit tests for provider-safe LLM history normalization."""
from __future__ import annotations
import pytest
from app.tasks.chat.llm_history_normalizer import (
assistant_content_to_llm_text,
user_content_to_llm_content,
)
pytestmark = pytest.mark.unit
def test_assistant_ui_parts_drop_thinking_steps_for_llm_history() -> None:
content = [
{"type": "data-thinking-steps", "data": [{"id": "thinking-1"}]},
{"type": "text", "text": "visible answer"},
]
assert assistant_content_to_llm_text(content) == "visible answer"
def test_provider_thinking_blocks_are_not_replayed_to_llm() -> None:
content = [
{"type": "thinking", "thinking": "private reasoning"},
{"type": "text", "text": "final answer"},
]
assert assistant_content_to_llm_text(content) == "final answer"
def test_unknown_assistant_blocks_are_dropped() -> None:
content = [
{"type": "redacted_thinking", "data": "hidden"},
{"type": "tool_use", "name": "search"},
{"type": "text", "text": "kept"},
]
assert assistant_content_to_llm_text(content) == "kept"
def test_user_images_convert_to_openai_compatible_image_url_blocks() -> None:
content = [
{"type": "text", "text": "look"},
{"type": "image", "image": "data:image/png;base64,abc"},
]
assert user_content_to_llm_content(content, allow_images=True) == [
{"type": "text", "text": "look"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
]
def test_user_images_can_be_dropped_for_text_only_history() -> None:
content = [
{"type": "text", "text": "look"},
{"type": "image", "image": "data:image/png;base64,abc"},
]
assert user_content_to_llm_content(content, allow_images=False) == "look"

View file

@ -0,0 +1,67 @@
"""Unit tests for final assistant message part normalization."""
from __future__ import annotations
import pytest
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
from app.tasks.chat.message_parts_normalizer import (
final_assistant_parts_from_messages,
merge_streamed_and_final_parts,
normalize_ai_message_to_parts,
)
pytestmark = pytest.mark.unit
def test_string_ai_message_content_becomes_text_part() -> None:
assert normalize_ai_message_to_parts(AIMessage(content="hello")) == [
{"type": "text", "text": "hello"}
]
def test_deepseek_thinking_plus_text_blocks_backfill_only_text() -> None:
message = AIMessage(
content=[
{"type": "thinking", "thinking": "hidden reasoning"},
{"type": "text", "text": "Yo bro! What's up?"},
],
additional_kwargs={"reasoning_content": "hidden reasoning"},
)
assert normalize_ai_message_to_parts(message) == [
{"type": "text", "text": "Yo bro! What's up?"}
]
def test_final_parts_use_last_ai_message_and_skip_trailing_tool_messages() -> None:
messages = [
HumanMessage(content="ask"),
AIMessage(content="draft"),
ToolMessage(content="tool output", tool_call_id="tc-1"),
AIMessage(content=[{"type": "text", "text": "final answer"}]),
ToolMessage(content="trailing tool noise", tool_call_id="tc-2"),
]
assert final_assistant_parts_from_messages(messages) == [
{"type": "text", "text": "final answer"}
]
def test_merge_adds_final_text_when_stream_only_has_thinking_steps() -> None:
streamed = [
{
"type": "data-thinking-steps",
"data": [{"id": "thinking-1", "status": "completed"}],
}
]
final = [{"type": "text", "text": "visible answer"}]
assert merge_streamed_and_final_parts(streamed, final) == [*streamed, *final]
def test_merge_does_not_duplicate_when_stream_already_has_text() -> None:
streamed = [{"type": "text", "text": "streamed answer"}]
final = [{"type": "text", "text": "final answer"}]
assert merge_streamed_and_final_parts(streamed, final) == streamed

View file

@ -35,7 +35,7 @@ def test_safety_net_does_not_fire_for_azure_gpt_4o():
it text-only."""
assert (
is_known_text_only_chat_model(
provider="AZURE_OPENAI",
provider="azure",
model_name="my-azure-deployment",
base_model="gpt-4o",
)
@ -49,7 +49,7 @@ def test_safety_net_does_not_fire_for_unknown_model():
LiteLLM doesn't know about must flow through to the provider."""
assert (
is_known_text_only_chat_model(
provider="CUSTOM",
provider="custom",
custom_provider="brand_new_proxy",
model_name="brand-new-model-x9",
)
@ -69,7 +69,7 @@ def test_safety_net_does_not_fire_when_lookup_raises(monkeypatch):
assert (
is_known_text_only_chat_model(
provider="OPENAI",
provider="openai",
model_name="gpt-4o",
)
is False
@ -88,7 +88,7 @@ def test_safety_net_fires_only_on_explicit_false(monkeypatch):
monkeypatch.setattr(pc.litellm, "get_model_info", _info_explicit_false)
assert (
is_known_text_only_chat_model(
provider="OPENAI",
provider="openai",
model_name="text-only-stub",
)
is True
@ -100,7 +100,7 @@ def test_safety_net_fires_only_on_explicit_false(monkeypatch):
monkeypatch.setattr(pc.litellm, "get_model_info", _info_true)
assert (
is_known_text_only_chat_model(
provider="OPENAI",
provider="openai",
model_name="vision-stub",
)
is False
@ -112,7 +112,7 @@ def test_safety_net_fires_only_on_explicit_false(monkeypatch):
monkeypatch.setattr(pc.litellm, "get_model_info", _info_missing)
assert (
is_known_text_only_chat_model(
provider="OPENAI",
provider="openai",
model_name="missing-key-stub",
)
is False