feat(database-migrations): add migration to remove legacy model config tables and remove stale model connection code

This commit is contained in:
Anish Sarkar 2026-06-13 12:45:43 +05:30
parent 50668775f8
commit bd4a04f2e7
93 changed files with 956 additions and 11442 deletions

View file

@ -77,7 +77,7 @@ The walkthrough above is `--scenario head-to-head` (default): both arms answer w
| `symmetric-cheap` | `--provider-model` (cheap, text-only) | `--provider-model` (same) | Does pre-extracted image context let a non-vision LLM reason over image-heavy docs? |
| `cost-arbitrage` | `--native-arm-model` (vision) | `--provider-model` (cheap) | How close does SurfSense get to a vision-native baseline at a fraction of per-query cost?|
In all three modes the **ingest-time** vision LLM is set on the SearchSpace's `vision_llm_config_id` (auto-picked from the strongest registered global OpenRouter vision config`claude-sonnet-4.5` > `claude-opus-4.7` > `gpt-5` > `gemini-2.5-pro`, override with `--vision-llm <slug>`). What changes is which slug the *answering* models hit per arm.
In all three modes the **ingest-time** vision LLM is set on the SearchSpace's `vision_model_id` (auto-picked from the strongest registered global OpenRouter vision-capable model`claude-sonnet-4.5` > `claude-opus-4.7` > `gpt-5` > `gemini-2.5-pro`, override with `--vision-llm <slug>`). What changes is which slug the *answering* models hit per arm.
### Ingest with vision, evaluate with a non-vision LLM (`symmetric-cheap`)
@ -118,7 +118,7 @@ python -m surfsense_evals report --suite medical
Notes:
- `cost-arbitrage` requires both `--provider-model` (the cheap SurfSense slug) AND `--native-arm-model <vision slug>`.
- `--vision-llm <slug>` is optional; if omitted the harness queries `GET /api/v1/global-vision-llm-configs` and auto-picks the strongest registered one. Pass `--no-vision-llm-setup` if you want to keep whatever vision config is already attached to the SearchSpace.
- `--vision-llm <slug>` is optional; if omitted the harness queries `GET /api/v1/model-connections/global` and auto-picks the strongest registered vision-capable model. Pass `--no-vision-llm-setup` if you want to keep whatever vision model is already attached to the SearchSpace.
- The runner's "looks text-only" warning is suppressed (or relabelled as informational) for `symmetric-cheap` so intentional asymmetry doesn't read as a misconfiguration.
- All three scenario fields (`scenario`, `provider_model`, `native_arm_model`, `vision_provider_model`) are persisted to `state.json` and recorded in `run_artifact.extra` + the report header — no need to retrace what was set.

View file

@ -9,7 +9,7 @@
"llamacloud_premium_lc",
"surfsense_agentic"
],
"agent_llm_id": -5138454,
"chat_model_id": -5138454,
"concurrency": 2,
"llm_model": "anthropic/claude-sonnet-4.5",
"n_pdfs": 30,

View file

@ -2,7 +2,7 @@
Subcommands:
* ``setup --suite <name> --provider-model <slug> [--agent-llm-id <int>]``
* ``setup --suite <name> --provider-model <slug> [--chat-model-id <int>]``
* ``teardown --suite <name>``
* ``models list [--provider openrouter] [--grep <s>]``
* ``suites list``
@ -18,7 +18,7 @@ publish its own flags.
Design choices worth flagging:
* ``setup`` rejects ``agent_llm_id == 0`` (Auto / LiteLLM router) so
* ``setup`` rejects ``chat_model_id == 0`` (Auto / LiteLLM router) so
per-question accuracy is reproducible.
* ``setup`` validates that the picked LLM config has
``provider == "OPENROUTER"`` and ``model_name == --provider-model``
@ -59,7 +59,6 @@ if sys.platform == "win32":
from . import registry
from .auth import CredentialError, acquire_token, client_with_auth
from .clients import SearchSpaceClient
from .clients.search_space import LlmPreferences
from .config import (
DEFAULT_SCENARIO,
SCENARIOS,
@ -111,23 +110,30 @@ class LlmConfigEntry:
def from_payload(cls, payload: dict[str, Any]) -> LlmConfigEntry:
return cls(
id=int(payload["id"]),
name=str(payload.get("name", "")),
name=str(payload.get("display_name") or payload.get("name") or ""),
provider=str(payload.get("provider", "")).upper(),
model_name=str(payload.get("model_name", "")),
model_name=str(payload.get("model_id") or payload.get("model_name") or ""),
raw=payload,
)
async def _list_global_llm_configs(http: httpx.AsyncClient, base: str) -> list[LlmConfigEntry]:
response = await http.get(
f"{base}/api/v1/global-new-llm-configs",
f"{base}/api/v1/model-connections/global",
headers={"Accept": "application/json"},
)
response.raise_for_status()
payload = response.json()
if not isinstance(payload, list):
raise RuntimeError(f"Unexpected /global-new-llm-configs payload: {payload!r}")
return [LlmConfigEntry.from_payload(item) for item in payload]
raise RuntimeError(f"Unexpected /model-connections/global payload: {payload!r}")
entries: list[LlmConfigEntry] = []
for connection in payload:
provider = connection.get("provider", "")
for model in connection.get("models") or []:
if not model.get("enabled", True) or not model.get("supports_chat"):
continue
entries.append(LlmConfigEntry.from_payload({**model, "provider": provider}))
return entries
def _resolve_openrouter_id(
@ -143,8 +149,8 @@ def _resolve_openrouter_id(
* If ``explicit_id`` is given: return it directly. The caller is
then expected to GET-validate that the row's
``provider == "OPENROUTER"`` and ``model_name`` matches the slug.
That branch supports positive BYOK ``NewLLMConfig`` rows whose
slugs may overlap with global OpenRouter virtuals.
That branch supports positive BYOK model rows whose slugs may overlap
with global OpenRouter virtuals.
* Otherwise: filter to ``provider == "OPENROUTER"`` and
``model_name == provider_model``. Expect exactly one match
raise with a friendly message otherwise.
@ -173,7 +179,7 @@ def _resolve_openrouter_id(
listing = "\n".join(f" id={c.id} name={c.name!r}" for c in matches)
raise RuntimeError(
f"Multiple OpenRouter configs for slug '{provider_model}':\n{listing}\n"
"Pass --agent-llm-id <id> to disambiguate."
"Pass --chat-model-id <id> to disambiguate."
)
return matches[0].id
@ -186,7 +192,7 @@ def _resolve_openrouter_id(
async def _cmd_setup(args: argparse.Namespace) -> int:
suite = args.suite
provider_model: str = args.provider_model
explicit_id: int | None = args.agent_llm_id
explicit_id: int | None = args.chat_model_id
scenario: str = args.scenario
vision_llm_slug: str | None = args.vision_llm
native_arm_model: str | None = args.native_arm_model
@ -194,7 +200,7 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
if explicit_id == 0:
console.print(
"[red]agent_llm_id == 0 (Auto / LiteLLM router) is not allowed — "
"[red]chat_model_id == 0 (Auto / LiteLLM router) is not allowed — "
"results would not be reproducible.[/red]"
)
return 2
@ -242,7 +248,7 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
candidates = await _list_global_llm_configs(http, config.surfsense_api_base)
try:
agent_llm_id = _resolve_openrouter_id(
chat_model_id = _resolve_openrouter_id(
candidates, provider_model, explicit_id=explicit_id
)
except RuntimeError as exc:
@ -288,7 +294,7 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
vision_provider_model: str | None = None
if not skip_vision_setup and (vision_required or vision_llm_slug is not None):
try:
vision_candidates = await ss_client.list_global_vision_llm_configs()
vision_candidates = await ss_client.list_global_vision_models()
resolved = resolve_vision_llm(
vision_candidates, explicit_slug=vision_llm_slug
)
@ -302,37 +308,34 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
f"(id={vision_config_id}, selected_via={resolved.selected_via})."
)
pref_kwargs: dict[str, Any] = {"agent_llm_id": agent_llm_id}
role_kwargs: dict[str, Any] = {"chat_model_id": chat_model_id}
if vision_config_id is not None:
pref_kwargs["vision_llm_config_id"] = vision_config_id
role_kwargs["vision_model_id"] = vision_config_id
await ss_client.set_llm_preferences(search_space_id, **pref_kwargs)
prefs = await ss_client.get_llm_preferences(search_space_id)
if not _validate_pin(prefs, provider_model):
agent = prefs.agent_llm or {}
await ss_client.set_model_roles(search_space_id, **role_kwargs)
roles = await ss_client.get_model_roles(search_space_id)
if roles.chat_model_id != chat_model_id:
console.print(
f"[red]LLM pin validation FAILED.[/red] After PUT, "
f"agent_llm.provider={agent.get('provider')!r}, "
f"model_name={agent.get('model_name')!r}; expected "
f"provider=OPENROUTER, model_name={provider_model!r}."
f"chat_model_id={roles.chat_model_id!r}; expected {chat_model_id!r}."
)
return 2
if vision_config_id is not None and prefs.vision_llm_config_id != vision_config_id:
if vision_config_id is not None and roles.vision_model_id != vision_config_id:
console.print(
f"[red]Vision LLM pin validation FAILED.[/red] After PUT, "
f"vision_llm_config_id={prefs.vision_llm_config_id!r}; "
f"vision_model_id={roles.vision_model_id!r}; "
f"expected {vision_config_id!r}."
)
return 2
suite_state = SuiteState(
search_space_id=search_space_id,
agent_llm_id=agent_llm_id,
chat_model_id=chat_model_id,
provider_model=provider_model,
created_at=utc_iso_timestamp(),
ingestion_maps=existing.ingestion_maps if existing else {},
scenario=scenario,
vision_llm_config_id=vision_config_id,
vision_model_id=vision_config_id,
vision_provider_model=vision_provider_model,
native_arm_model=native_arm_model,
)
@ -342,7 +345,7 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
f"suite={suite!r}",
f"scenario={scenario!r}",
f"search_space_id={suite_state.search_space_id}",
f"agent_llm_id={suite_state.agent_llm_id}",
f"chat_model_id={suite_state.chat_model_id}",
f"provider_model={suite_state.provider_model!r}",
]
if suite_state.vision_provider_model:
@ -353,14 +356,6 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
return 0
def _validate_pin(prefs: LlmPreferences, provider_model: str) -> bool:
agent = prefs.agent_llm or {}
return (
str(agent.get("provider", "")).upper() == "OPENROUTER"
and str(agent.get("model_name", "")) == provider_model
)
async def _cmd_teardown(args: argparse.Namespace) -> int:
suite = args.suite
config = load_config()
@ -654,10 +649,10 @@ def _build_parser() -> argparse.ArgumentParser:
),
)
p_setup.add_argument(
"--agent-llm-id",
"--chat-model-id",
type=int,
default=None,
help="Optional override for BYOK NewLLMConfig rows.",
help="Optional explicit model id override.",
)
p_setup.add_argument(
"--scenario",

View file

@ -1,17 +1,16 @@
"""Client for ``/api/v1/searchspaces`` and ``/api/v1/search-spaces/{id}/llm-preferences``.
"""Client for ``/api/v1/searchspaces`` and model-role endpoints.
Verified against:
* ``surfsense_backend/app/routes/search_spaces_routes.py:116`` (POST create)
* ``surfsense_backend/app/routes/search_spaces_routes.py:234`` (GET by id)
* ``surfsense_backend/app/routes/search_spaces_routes.py:422`` (DELETE soft-delete)
* ``surfsense_backend/app/routes/search_spaces_routes.py:698-849`` (GET/PUT llm-preferences)
* ``surfsense_backend/app/routes/model_connections_routes.py`` (GET/PUT model roles)
* ``surfsense_backend/app/schemas/search_space.py:14`` (SearchSpaceCreate body)
* ``surfsense_backend/app/routes/vision_llm_routes.py:60`` (GET global vision configs)
Note the inconsistent pluralisation in the backend: ``/searchspaces``
(no hyphen) for CRUD, but ``/search-spaces`` (hyphenated) for the
``llm-preferences`` sub-resource. Both are mirrored verbatim here.
(no hyphen) for CRUD, but ``/search-spaces`` (hyphenated) for model-role
sub-resources. Both are mirrored verbatim here.
"""
from __future__ import annotations
@ -46,13 +45,8 @@ class SearchSpaceRow:
@dataclass
class VisionLlmConfigEntry:
"""Subset of one ``GET /global-vision-llm-configs`` row.
The backend returns negative ids for global / OpenRouter-derived
vision configs and positive ids for per-user BYOK rows. Either is
accepted by ``set_llm_preferences(vision_llm_config_id=...)``.
"""
class VisionModelEntry:
"""Subset of one GLOBAL model-connection model with image input support."""
id: int
name: str
@ -62,45 +56,38 @@ class VisionLlmConfigEntry:
raw: dict[str, Any]
@classmethod
def from_payload(cls, payload: dict[str, Any]) -> VisionLlmConfigEntry:
def from_payload(cls, payload: dict[str, Any]) -> VisionModelEntry:
return cls(
id=int(payload.get("id", 0)),
name=str(payload.get("name", "")),
name=str(payload.get("display_name") or payload.get("model_id") or ""),
provider=str(payload.get("provider", "")).upper(),
model_name=str(payload.get("model_name", "")),
is_auto_mode=bool(payload.get("is_auto_mode", False)),
model_name=str(payload.get("model_id", "")),
is_auto_mode=False,
raw=payload,
)
@dataclass
class LlmPreferences:
"""Resolved LLM preferences with the embedded full config row.
class ModelRoles:
"""Model role ids for a search space."""
Mirrors ``LLMPreferencesRead`` from the backend so the lifecycle
command can introspect ``provider`` / ``model_name`` to validate the
OpenRouter pin.
"""
agent_llm_id: int | None
image_generation_config_id: int | None
vision_llm_config_id: int | None
agent_llm: dict[str, Any] | None
chat_model_id: int | None
image_gen_model_id: int | None
vision_model_id: int | None
raw: dict[str, Any]
@classmethod
def from_payload(cls, payload: dict[str, Any]) -> LlmPreferences:
def from_payload(cls, payload: dict[str, Any]) -> ModelRoles:
return cls(
agent_llm_id=payload.get("agent_llm_id"),
image_generation_config_id=payload.get("image_generation_config_id"),
vision_llm_config_id=payload.get("vision_llm_config_id"),
agent_llm=payload.get("agent_llm"),
chat_model_id=payload.get("chat_model_id"),
image_gen_model_id=payload.get("image_gen_model_id"),
vision_model_id=payload.get("vision_model_id"),
raw=payload,
)
class SearchSpaceClient:
"""Thin wrapper around the SearchSpace + LLM preferences endpoints."""
"""Thin wrapper around the SearchSpace + model role endpoints."""
def __init__(self, http: httpx.AsyncClient, base_url: str) -> None:
self._http = http
@ -139,64 +126,67 @@ class SearchSpaceClient:
return
response.raise_for_status()
async def get_llm_preferences(self, search_space_id: int) -> LlmPreferences:
async def get_model_roles(self, search_space_id: int) -> ModelRoles:
response = await self._http.get(
f"{self._base}/api/v1/search-spaces/{search_space_id}/llm-preferences",
f"{self._base}/api/v1/search-spaces/{search_space_id}/model-roles",
headers={"Accept": "application/json"},
)
response.raise_for_status()
return LlmPreferences.from_payload(response.json())
return ModelRoles.from_payload(response.json())
async def set_llm_preferences(
async def set_model_roles(
self,
search_space_id: int,
*,
agent_llm_id: int | None = None,
image_generation_config_id: int | None = None,
vision_llm_config_id: int | None = None,
) -> LlmPreferences:
"""PUT a partial update to ``/search-spaces/{id}/llm-preferences``.
chat_model_id: int | None = None,
image_gen_model_id: int | None = None,
vision_model_id: int | None = None,
) -> ModelRoles:
"""PUT a partial update to ``/search-spaces/{id}/model-roles``.
Backend uses ``model_dump(exclude_unset=True)`` so omitted fields
are left unchanged.
"""
body: dict[str, Any] = {}
if agent_llm_id is not None:
body["agent_llm_id"] = agent_llm_id
if image_generation_config_id is not None:
body["image_generation_config_id"] = image_generation_config_id
if vision_llm_config_id is not None:
body["vision_llm_config_id"] = vision_llm_config_id
if chat_model_id is not None:
body["chat_model_id"] = chat_model_id
if image_gen_model_id is not None:
body["image_gen_model_id"] = image_gen_model_id
if vision_model_id is not None:
body["vision_model_id"] = vision_model_id
response = await self._http.put(
f"{self._base}/api/v1/search-spaces/{search_space_id}/llm-preferences",
f"{self._base}/api/v1/search-spaces/{search_space_id}/model-roles",
json=body,
headers={"Accept": "application/json"},
)
response.raise_for_status()
return LlmPreferences.from_payload(response.json())
return ModelRoles.from_payload(response.json())
async def list_global_vision_llm_configs(self) -> list[VisionLlmConfigEntry]:
"""List the registered global vision LLM configs.
async def list_global_vision_models(self) -> list[VisionModelEntry]:
"""List registered GLOBAL models that can accept image input.
Used by ``setup`` to (a) resolve an explicit ``--vision-llm <slug>``
to a config id and (b) auto-pick the strongest registered vision
config when the operator doesn't pass one. The ``Auto (Fastest)``
entry (``id=0``) is filtered out accuracy must be reproducible.
Used by ``setup`` to resolve ``--vision-llm <slug>`` or auto-pick a
reproducible ingest-time vision model.
"""
response = await self._http.get(
f"{self._base}/api/v1/global-vision-llm-configs",
f"{self._base}/api/v1/model-connections/global",
headers={"Accept": "application/json"},
)
response.raise_for_status()
payload = response.json()
if not isinstance(payload, list):
raise RuntimeError(
f"Unexpected /global-vision-llm-configs payload: {payload!r}"
f"Unexpected /model-connections/global payload: {payload!r}"
)
return [
VisionLlmConfigEntry.from_payload(item)
for item in payload
if not bool(item.get("is_auto_mode", False))
]
entries: list[VisionModelEntry] = []
for connection in payload:
provider = str(connection.get("provider", ""))
for model in connection.get("models") or []:
if not model.get("enabled", True) or not model.get("supports_image_input"):
continue
entries.append(
VisionModelEntry.from_payload({**model, "provider": provider})
)
return entries

View file

@ -147,35 +147,35 @@ class SuiteState:
"""Per-suite persisted state.
``provider_model`` is the slug pinned to the SearchSpace's
``agent_llm`` what answers SurfSense queries (and what the native
``chat_model_id`` what answers SurfSense queries (and what the native
arm uses too, unless ``native_arm_model`` is set for cost-arbitrage).
``vision_provider_model`` is the slug of the OpenRouter vision LLM
config attached to the SearchSpace's ``vision_llm_config_id`` — what
``vision_provider_model`` is the slug of the OpenRouter vision model
attached to the SearchSpace's ``vision_model_id`` — what
SurfSense uses to extract image content at ingest time when
``use_vision_llm=True``. ``None`` means no vision config was attached
at setup (legacy or text-only suite).
"""
search_space_id: int
agent_llm_id: int
chat_model_id: int
provider_model: str
created_at: str
ingestion_maps: dict[str, str] = field(default_factory=dict)
scenario: str = DEFAULT_SCENARIO
vision_llm_config_id: int | None = None
vision_model_id: int | None = None
vision_provider_model: str | None = None
native_arm_model: str | None = None
def to_dict(self) -> dict[str, Any]:
return {
"search_space_id": self.search_space_id,
"agent_llm_id": self.agent_llm_id,
"chat_model_id": self.chat_model_id,
"provider_model": self.provider_model,
"created_at": self.created_at,
"ingestion_maps": dict(self.ingestion_maps),
"scenario": self.scenario,
"vision_llm_config_id": self.vision_llm_config_id,
"vision_model_id": self.vision_model_id,
"vision_provider_model": self.vision_provider_model,
"native_arm_model": self.native_arm_model,
}
@ -187,15 +187,16 @@ class SuiteState:
scenario = str(payload.get("scenario") or DEFAULT_SCENARIO)
if scenario not in SCENARIOS:
scenario = DEFAULT_SCENARIO
raw_vision_id = payload.get("vision_llm_config_id")
raw_chat_id = payload.get("chat_model_id")
raw_vision_id = payload.get("vision_model_id")
return cls(
search_space_id=int(payload["search_space_id"]),
agent_llm_id=int(payload["agent_llm_id"]),
chat_model_id=int(raw_chat_id),
provider_model=str(payload["provider_model"]),
created_at=str(payload.get("created_at") or ""),
ingestion_maps=dict(payload.get("ingestion_maps") or {}),
scenario=scenario,
vision_llm_config_id=int(raw_vision_id) if raw_vision_id is not None else None,
vision_model_id=int(raw_vision_id) if raw_vision_id is not None else None,
vision_provider_model=(
str(payload["vision_provider_model"])
if payload.get("vision_provider_model")

View file

@ -53,8 +53,8 @@ class RunContext:
return self.suite_state.search_space_id
@property
def agent_llm_id(self) -> int:
return self.suite_state.agent_llm_id
def chat_model_id(self) -> int:
return self.suite_state.chat_model_id
@property
def provider_model(self) -> str:

View file

@ -3,8 +3,8 @@
Two responsibilities:
1. Resolve an explicit ``--vision-llm <slug>`` to a global OpenRouter
vision LLM config id that ``set_llm_preferences(vision_llm_config_id=...)``
can accept.
vision-capable model id that ``set_model_roles(vision_model_id=...)`` can
accept.
2. Auto-pick the strongest registered vision config when the operator
doesn't pass ``--vision-llm`` but the scenario / benchmark needs one.

View file

@ -371,7 +371,7 @@ class MedXpertQAMMBenchmark:
"provider_model": ctx.provider_model,
"native_arm_model": native_arm_model,
"vision_provider_model": ctx.vision_provider_model,
"agent_llm_id": ctx.agent_llm_id,
"chat_model_id": ctx.chat_model_id,
"ingest_settings": ingest_settings,
},
)

View file

@ -391,7 +391,7 @@ class MMLongBenchDocBenchmark:
"provider_model": ctx.provider_model,
"native_arm_model": native_arm_model,
"vision_provider_model": ctx.vision_provider_model,
"agent_llm_id": ctx.agent_llm_id,
"chat_model_id": ctx.chat_model_id,
"ingest_settings": ingest_settings,
},
)

View file

@ -554,7 +554,7 @@ class ParserCompareBenchmark:
"scenario": ctx.scenario,
"provider_model": ctx.provider_model,
"vision_provider_model": ctx.vision_provider_model,
"agent_llm_id": ctx.agent_llm_id,
"chat_model_id": ctx.chat_model_id,
"preprocess_tariff": {
"basic_per_1k_pages": 1.0,
"premium_per_1k_pages": 10.0,

View file

@ -467,7 +467,7 @@ class CragBenchmark:
"provider_model": ctx.provider_model,
"native_arm_model": ctx.native_arm_model,
"vision_provider_model": ctx.vision_provider_model,
"agent_llm_id": ctx.agent_llm_id,
"chat_model_id": ctx.chat_model_id,
"ingest_settings": ingest_settings,
"per_page_char_cap": per_page_char_cap,
"max_output_tokens": max_output_tokens,

View file

@ -372,7 +372,7 @@ class FramesBenchmark:
"provider_model": ctx.provider_model,
"native_arm_model": ctx.native_arm_model,
"vision_provider_model": ctx.vision_provider_model,
"agent_llm_id": ctx.agent_llm_id,
"chat_model_id": ctx.chat_model_id,
"ingest_settings": ingest_settings,
"bare_arm_label": "bare_llm",
},

View file

@ -63,29 +63,22 @@ async def test_delete_search_space_idempotent_on_404(respx_mock, http):
@pytest.mark.asyncio
@respx.mock(base_url=_BASE)
async def test_set_llm_preferences_partial_update(respx_mock, http):
route = respx_mock.put("/api/v1/search-spaces/42/llm-preferences").mock(
async def test_set_model_roles_partial_update(respx_mock, http):
route = respx_mock.put("/api/v1/search-spaces/42/model-roles").mock(
return_value=httpx.Response(
200,
json={
"agent_llm_id": -10042,
"agent_llm_id": None,
"image_generation_config_id": None,
"vision_llm_config_id": None,
"agent_llm": {
"id": -10042,
"provider": "OPENROUTER",
"model_name": "anthropic/claude-sonnet-4.5",
},
"chat_model_id": -10042,
"image_gen_model_id": None,
"vision_model_id": None,
},
)
)
client = SearchSpaceClient(http, _BASE)
prefs = await client.set_llm_preferences(42, agent_llm_id=-10042)
assert prefs.agent_llm_id == -10042
assert prefs.agent_llm["provider"] == "OPENROUTER"
roles = await client.set_model_roles(42, chat_model_id=-10042)
assert roles.chat_model_id == -10042
sent_body = json.loads(route.calls[-1].request.content)
assert sent_body == {"agent_llm_id": -10042}
assert sent_body == {"chat_model_id": -10042}
# ---------------------------------------------------------------------------

View file

@ -41,14 +41,14 @@ def test_state_roundtrip_per_suite(tmp_env): # noqa: ARG001
assert get_suite_state(config, "medical") is None
state = SuiteState(
search_space_id=1,
agent_llm_id=-10042,
chat_model_id=-10042,
provider_model="anthropic/claude-sonnet-4.5",
created_at="2026-05-11T20-30-00Z",
)
set_suite_state(config, "medical", state)
legal = SuiteState(
search_space_id=2,
agent_llm_id=-1,
chat_model_id=-1,
provider_model="openai/gpt-5",
created_at="2026-05-11T21-00-00Z",
)
@ -84,25 +84,19 @@ def test_paths_are_per_suite(tmp_env): # noqa: ARG001
# ---------------------------------------------------------------------------
def test_legacy_state_back_compat_defaults_to_head_to_head():
"""state.json files written before scenarios shipped must still load.
def test_minimal_state_defaults_to_head_to_head():
"""Missing scenario / vision / native fields default safely."""
Missing ``scenario`` / ``vision_*`` / ``native_arm_model`` keys all
default to ``head-to-head`` / ``None`` so old setups keep working
after upgrade the runner's behaviour exactly mirrors the legacy
one (both arms answer with ``provider_model``).
"""
legacy = {
payload = {
"search_space_id": 7,
"agent_llm_id": -123,
"chat_model_id": -123,
"provider_model": "anthropic/claude-sonnet-4.5",
"created_at": "2026-05-11T20-30-00Z",
"ingestion_maps": {},
}
state = SuiteState.from_dict(legacy)
state = SuiteState.from_dict(payload)
assert state.scenario == DEFAULT_SCENARIO == "head-to-head"
assert state.vision_llm_config_id is None
assert state.vision_model_id is None
assert state.vision_provider_model is None
assert state.native_arm_model is None
# The native arm should still answer with the same slug as SurfSense.
@ -118,7 +112,7 @@ def test_unknown_scenario_falls_back_to_default():
payload = {
"search_space_id": 1,
"agent_llm_id": -1,
"chat_model_id": -1,
"provider_model": "openai/gpt-5",
"scenario": "unknown-scenario-name",
}
@ -130,11 +124,11 @@ def test_cost_arbitrage_state_persists_native_arm_model(tmp_env): # noqa: ARG00
config = load_config()
state = SuiteState(
search_space_id=42,
agent_llm_id=-1,
chat_model_id=-1,
provider_model="openai/gpt-5.4-mini",
created_at="2026-05-11T20-30-00Z",
scenario="cost-arbitrage",
vision_llm_config_id=-101,
vision_model_id=-101,
vision_provider_model="anthropic/claude-sonnet-4.5",
native_arm_model="anthropic/claude-sonnet-4.5",
)
@ -142,7 +136,7 @@ def test_cost_arbitrage_state_persists_native_arm_model(tmp_env): # noqa: ARG00
fetched = get_suite_state(config, "medical")
assert fetched.scenario == "cost-arbitrage"
assert fetched.vision_llm_config_id == -101
assert fetched.vision_model_id == -101
assert fetched.vision_provider_model == "anthropic/claude-sonnet-4.5"
assert fetched.native_arm_model == "anthropic/claude-sonnet-4.5"
# Cost arbitrage's whole point: native arm slug != surfsense slug.

View file

@ -27,7 +27,7 @@ async def test_smoke_against_localhost():
pytest.skip("No credentials in environment; skipping integration smoke")
bundle = await acquire_token(config)
async with client_with_auth(config, bundle) as client:
response = await client.get(f"{config.surfsense_api_base}/api/v1/global-new-llm-configs")
response = await client.get(f"{config.surfsense_api_base}/api/v1/model-connections/global")
try:
response.raise_for_status()
except httpx.HTTPStatusError as exc: