feat(database-migrations): add migration to remove legacy model config tables and remove stale model connection code

2026-06-16 21:05:20 +02:00 · 2026-06-13 12:45:43 +05:30 · 2026-06-13 12:45:43 +05:30 · bd4a04f2e7
commit bd4a04f2e7
parent 50668775f8
93 changed files with 956 additions and 11442 deletions
--- a/surfsense_evals/README.md
+++ b/surfsense_evals/README.md
@ -77,7 +77,7 @@ The walkthrough above is `--scenario head-to-head` (default): both arms answer w
 | `symmetric-cheap`  | `--provider-model` (cheap, text-only)  | `--provider-model` (same)      | Does pre-extracted image context let a non-vision LLM reason over image-heavy docs?      |
 | `cost-arbitrage`   | `--native-arm-model` (vision)          | `--provider-model` (cheap)     | How close does SurfSense get to a vision-native baseline at a fraction of per-query cost?|

-In all three modes the **ingest-time** vision LLM is set on the SearchSpace's `vision_llm_config_id` (auto-picked from the strongest registered global OpenRouter vision config — `claude-sonnet-4.5` > `claude-opus-4.7` > `gpt-5` > `gemini-2.5-pro`, override with `--vision-llm <slug>`). What changes is which slug the *answering* models hit per arm.
+In all three modes the **ingest-time** vision LLM is set on the SearchSpace's `vision_model_id` (auto-picked from the strongest registered global OpenRouter vision-capable model — `claude-sonnet-4.5` > `claude-opus-4.7` > `gpt-5` > `gemini-2.5-pro`, override with `--vision-llm <slug>`). What changes is which slug the *answering* models hit per arm.

 ### Ingest with vision, evaluate with a non-vision LLM (`symmetric-cheap`)

@ -118,7 +118,7 @@ python -m surfsense_evals report --suite medical

 Notes:
 - `cost-arbitrage` requires both `--provider-model` (the cheap SurfSense slug) AND `--native-arm-model <vision slug>`.
- `--vision-llm <slug>` is optional; if omitted the harness queries `GET /api/v1/global-vision-llm-configs` and auto-picks the strongest registered one. Pass `--no-vision-llm-setup` if you want to keep whatever vision config is already attached to the SearchSpace.
+- `--vision-llm <slug>` is optional; if omitted the harness queries `GET /api/v1/model-connections/global` and auto-picks the strongest registered vision-capable model. Pass `--no-vision-llm-setup` if you want to keep whatever vision model is already attached to the SearchSpace.
 - The runner's "looks text-only" warning is suppressed (or relabelled as informational) for `symmetric-cheap` so intentional asymmetry doesn't read as a misconfiguration.
 - All three scenario fields (`scenario`, `provider_model`, `native_arm_model`, `vision_provider_model`) are persisted to `state.json` and recorded in `run_artifact.extra` + the report header — no need to retrace what was set.

--- a/surfsense_evals/data/multimodal_doc/runs/2026-05-14T00-53-19Z/parser_compare/run_artifact.json
+++ b/surfsense_evals/data/multimodal_doc/runs/2026-05-14T00-53-19Z/parser_compare/run_artifact.json
@ -9,7 +9,7 @@
      "llamacloud_premium_lc",
      "surfsense_agentic"
    ],
-    "agent_llm_id": -5138454,
+    "chat_model_id": -5138454,
    "concurrency": 2,
    "llm_model": "anthropic/claude-sonnet-4.5",
    "n_pdfs": 30,
--- a/surfsense_evals/src/surfsense_evals/core/cli.py
+++ b/surfsense_evals/src/surfsense_evals/core/cli.py
@ -2,7 +2,7 @@

 Subcommands:

-* ``setup    --suite <name> --provider-model <slug> [--agent-llm-id <int>]``
+* ``setup    --suite <name> --provider-model <slug> [--chat-model-id <int>]``
 * ``teardown --suite <name>``
 * ``models  list [--provider openrouter] [--grep <s>]``
 * ``suites  list``
@ -18,7 +18,7 @@ publish its own flags.

 Design choices worth flagging:

-* ``setup`` rejects ``agent_llm_id == 0`` (Auto / LiteLLM router) so
+* ``setup`` rejects ``chat_model_id == 0`` (Auto / LiteLLM router) so
  per-question accuracy is reproducible.
 * ``setup`` validates that the picked LLM config has
  ``provider == "OPENROUTER"`` and ``model_name == --provider-model``
@ -59,7 +59,6 @@ if sys.platform == "win32":
 from . import registry
 from .auth import CredentialError, acquire_token, client_with_auth
 from .clients import SearchSpaceClient
-from .clients.search_space import LlmPreferences
 from .config import (
    DEFAULT_SCENARIO,
    SCENARIOS,
@ -111,23 +110,30 @@ class LlmConfigEntry:
    def from_payload(cls, payload: dict[str, Any]) -> LlmConfigEntry:
        return cls(
            id=int(payload["id"]),
-            name=str(payload.get("name", "")),
+            name=str(payload.get("display_name") or payload.get("name") or ""),
            provider=str(payload.get("provider", "")).upper(),
-            model_name=str(payload.get("model_name", "")),
+            model_name=str(payload.get("model_id") or payload.get("model_name") or ""),
            raw=payload,
        )


 async def _list_global_llm_configs(http: httpx.AsyncClient, base: str) -> list[LlmConfigEntry]:
    response = await http.get(
-        f"{base}/api/v1/global-new-llm-configs",
+        f"{base}/api/v1/model-connections/global",
        headers={"Accept": "application/json"},
    )
    response.raise_for_status()
    payload = response.json()
    if not isinstance(payload, list):
-        raise RuntimeError(f"Unexpected /global-new-llm-configs payload: {payload!r}")
-    return [LlmConfigEntry.from_payload(item) for item in payload]
+        raise RuntimeError(f"Unexpected /model-connections/global payload: {payload!r}")
+    entries: list[LlmConfigEntry] = []
+    for connection in payload:
+        provider = connection.get("provider", "")
+        for model in connection.get("models") or []:
+            if not model.get("enabled", True) or not model.get("supports_chat"):
+                continue
+            entries.append(LlmConfigEntry.from_payload({**model, "provider": provider}))
+    return entries


 def _resolve_openrouter_id(
@ -143,8 +149,8 @@ def _resolve_openrouter_id(
    * If ``explicit_id`` is given: return it directly. The caller is
      then expected to GET-validate that the row's
      ``provider == "OPENROUTER"`` and ``model_name`` matches the slug.
-      That branch supports positive BYOK ``NewLLMConfig`` rows whose
-      slugs may overlap with global OpenRouter virtuals.
+      That branch supports positive BYOK model rows whose slugs may overlap
+      with global OpenRouter virtuals.
    * Otherwise: filter to ``provider == "OPENROUTER"`` and
      ``model_name == provider_model``. Expect exactly one match —
      raise with a friendly message otherwise.
@ -173,7 +179,7 @@ def _resolve_openrouter_id(
        listing = "\n".join(f"  id={c.id}  name={c.name!r}" for c in matches)
        raise RuntimeError(
            f"Multiple OpenRouter configs for slug '{provider_model}':\n{listing}\n"
-            "Pass --agent-llm-id <id> to disambiguate."
+            "Pass --chat-model-id <id> to disambiguate."
        )
    return matches[0].id

@ -186,7 +192,7 @@ def _resolve_openrouter_id(
 async def _cmd_setup(args: argparse.Namespace) -> int:
    suite = args.suite
    provider_model: str = args.provider_model
-    explicit_id: int | None = args.agent_llm_id
+    explicit_id: int | None = args.chat_model_id
    scenario: str = args.scenario
    vision_llm_slug: str | None = args.vision_llm
    native_arm_model: str | None = args.native_arm_model
@ -194,7 +200,7 @@ async def _cmd_setup(args: argparse.Namespace) -> int:

    if explicit_id == 0:
        console.print(
-            "[red]agent_llm_id == 0 (Auto / LiteLLM router) is not allowed — "
+            "[red]chat_model_id == 0 (Auto / LiteLLM router) is not allowed — "
            "results would not be reproducible.[/red]"
        )
        return 2
@ -242,7 +248,7 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
        candidates = await _list_global_llm_configs(http, config.surfsense_api_base)

        try:
-            agent_llm_id = _resolve_openrouter_id(
+            chat_model_id = _resolve_openrouter_id(
                candidates, provider_model, explicit_id=explicit_id
            )
        except RuntimeError as exc:
@ -288,7 +294,7 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
        vision_provider_model: str | None = None
        if not skip_vision_setup and (vision_required or vision_llm_slug is not None):
            try:
-                vision_candidates = await ss_client.list_global_vision_llm_configs()
+                vision_candidates = await ss_client.list_global_vision_models()
                resolved = resolve_vision_llm(
                    vision_candidates, explicit_slug=vision_llm_slug
                )
@ -302,37 +308,34 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
                f"(id={vision_config_id}, selected_via={resolved.selected_via})."
            )

-        pref_kwargs: dict[str, Any] = {"agent_llm_id": agent_llm_id}
+        role_kwargs: dict[str, Any] = {"chat_model_id": chat_model_id}
        if vision_config_id is not None:
-            pref_kwargs["vision_llm_config_id"] = vision_config_id
+            role_kwargs["vision_model_id"] = vision_config_id

-        await ss_client.set_llm_preferences(search_space_id, **pref_kwargs)
-        prefs = await ss_client.get_llm_preferences(search_space_id)
-        if not _validate_pin(prefs, provider_model):
-            agent = prefs.agent_llm or {}
+        await ss_client.set_model_roles(search_space_id, **role_kwargs)
+        roles = await ss_client.get_model_roles(search_space_id)
+        if roles.chat_model_id != chat_model_id:
            console.print(
                f"[red]LLM pin validation FAILED.[/red] After PUT, "
-                f"agent_llm.provider={agent.get('provider')!r}, "
-                f"model_name={agent.get('model_name')!r}; expected "
-                f"provider=OPENROUTER, model_name={provider_model!r}."
+                f"chat_model_id={roles.chat_model_id!r}; expected {chat_model_id!r}."
            )
            return 2
-        if vision_config_id is not None and prefs.vision_llm_config_id != vision_config_id:
+        if vision_config_id is not None and roles.vision_model_id != vision_config_id:
            console.print(
                f"[red]Vision LLM pin validation FAILED.[/red] After PUT, "
-                f"vision_llm_config_id={prefs.vision_llm_config_id!r}; "
+                f"vision_model_id={roles.vision_model_id!r}; "
                f"expected {vision_config_id!r}."
            )
            return 2

        suite_state = SuiteState(
            search_space_id=search_space_id,
-            agent_llm_id=agent_llm_id,
+            chat_model_id=chat_model_id,
            provider_model=provider_model,
            created_at=utc_iso_timestamp(),
            ingestion_maps=existing.ingestion_maps if existing else {},
            scenario=scenario,
-            vision_llm_config_id=vision_config_id,
+            vision_model_id=vision_config_id,
            vision_provider_model=vision_provider_model,
            native_arm_model=native_arm_model,
        )
@ -342,7 +345,7 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
        f"suite={suite!r}",
        f"scenario={scenario!r}",
        f"search_space_id={suite_state.search_space_id}",
-        f"agent_llm_id={suite_state.agent_llm_id}",
+        f"chat_model_id={suite_state.chat_model_id}",
        f"provider_model={suite_state.provider_model!r}",
    ]
    if suite_state.vision_provider_model:
@ -353,14 +356,6 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
    return 0


-def _validate_pin(prefs: LlmPreferences, provider_model: str) -> bool:
-    agent = prefs.agent_llm or {}
-    return (
-        str(agent.get("provider", "")).upper() == "OPENROUTER"
-        and str(agent.get("model_name", "")) == provider_model
-    )
-
-
 async def _cmd_teardown(args: argparse.Namespace) -> int:
    suite = args.suite
    config = load_config()
@ -654,10 +649,10 @@ def _build_parser() -> argparse.ArgumentParser:
        ),
    )
    p_setup.add_argument(
-        "--agent-llm-id",
+        "--chat-model-id",
        type=int,
        default=None,
-        help="Optional override for BYOK NewLLMConfig rows.",
+        help="Optional explicit model id override.",
    )
    p_setup.add_argument(
        "--scenario",
--- a/surfsense_evals/src/surfsense_evals/core/clients/search_space.py
+++ b/surfsense_evals/src/surfsense_evals/core/clients/search_space.py
@ -1,17 +1,16 @@
-"""Client for ``/api/v1/searchspaces`` and ``/api/v1/search-spaces/{id}/llm-preferences``.
+"""Client for ``/api/v1/searchspaces`` and model-role endpoints.

 Verified against:

 * ``surfsense_backend/app/routes/search_spaces_routes.py:116`` (POST create)
 * ``surfsense_backend/app/routes/search_spaces_routes.py:234`` (GET by id)
 * ``surfsense_backend/app/routes/search_spaces_routes.py:422`` (DELETE soft-delete)
-* ``surfsense_backend/app/routes/search_spaces_routes.py:698-849`` (GET/PUT llm-preferences)
+* ``surfsense_backend/app/routes/model_connections_routes.py`` (GET/PUT model roles)
 * ``surfsense_backend/app/schemas/search_space.py:14`` (SearchSpaceCreate body)
-* ``surfsense_backend/app/routes/vision_llm_routes.py:60`` (GET global vision configs)

 Note the inconsistent pluralisation in the backend: ``/searchspaces``
-(no hyphen) for CRUD, but ``/search-spaces`` (hyphenated) for the
-``llm-preferences`` sub-resource. Both are mirrored verbatim here.
+(no hyphen) for CRUD, but ``/search-spaces`` (hyphenated) for model-role
+sub-resources. Both are mirrored verbatim here.
 """

 from __future__ import annotations
@ -46,13 +45,8 @@ class SearchSpaceRow:


@dataclass
-class VisionLlmConfigEntry:
-    """Subset of one ``GET /global-vision-llm-configs`` row.
-
-    The backend returns negative ids for global / OpenRouter-derived
-    vision configs and positive ids for per-user BYOK rows. Either is
-    accepted by ``set_llm_preferences(vision_llm_config_id=...)``.
-    """
+class VisionModelEntry:
+    """Subset of one GLOBAL model-connection model with image input support."""

    id: int
    name: str
@ -62,45 +56,38 @@ class VisionLlmConfigEntry:
    raw: dict[str, Any]

    @classmethod
-    def from_payload(cls, payload: dict[str, Any]) -> VisionLlmConfigEntry:
+    def from_payload(cls, payload: dict[str, Any]) -> VisionModelEntry:
        return cls(
            id=int(payload.get("id", 0)),
-            name=str(payload.get("name", "")),
+            name=str(payload.get("display_name") or payload.get("model_id") or ""),
            provider=str(payload.get("provider", "")).upper(),
-            model_name=str(payload.get("model_name", "")),
-            is_auto_mode=bool(payload.get("is_auto_mode", False)),
+            model_name=str(payload.get("model_id", "")),
+            is_auto_mode=False,
            raw=payload,
        )


@dataclass
-class LlmPreferences:
-    """Resolved LLM preferences with the embedded full config row.
+class ModelRoles:
+    """Model role ids for a search space."""

-    Mirrors ``LLMPreferencesRead`` from the backend so the lifecycle
-    command can introspect ``provider`` / ``model_name`` to validate the
-    OpenRouter pin.
-    """
-
-    agent_llm_id: int | None
-    image_generation_config_id: int | None
-    vision_llm_config_id: int | None
-    agent_llm: dict[str, Any] | None
+    chat_model_id: int | None
+    image_gen_model_id: int | None
+    vision_model_id: int | None
    raw: dict[str, Any]

    @classmethod
-    def from_payload(cls, payload: dict[str, Any]) -> LlmPreferences:
+    def from_payload(cls, payload: dict[str, Any]) -> ModelRoles:
        return cls(
-            agent_llm_id=payload.get("agent_llm_id"),
-            image_generation_config_id=payload.get("image_generation_config_id"),
-            vision_llm_config_id=payload.get("vision_llm_config_id"),
-            agent_llm=payload.get("agent_llm"),
+            chat_model_id=payload.get("chat_model_id"),
+            image_gen_model_id=payload.get("image_gen_model_id"),
+            vision_model_id=payload.get("vision_model_id"),
            raw=payload,
        )


 class SearchSpaceClient:
-    """Thin wrapper around the SearchSpace + LLM preferences endpoints."""
+    """Thin wrapper around the SearchSpace + model role endpoints."""

    def __init__(self, http: httpx.AsyncClient, base_url: str) -> None:
        self._http = http
@ -139,64 +126,67 @@ class SearchSpaceClient:
            return
        response.raise_for_status()

-    async def get_llm_preferences(self, search_space_id: int) -> LlmPreferences:
+    async def get_model_roles(self, search_space_id: int) -> ModelRoles:
        response = await self._http.get(
-            f"{self._base}/api/v1/search-spaces/{search_space_id}/llm-preferences",
+            f"{self._base}/api/v1/search-spaces/{search_space_id}/model-roles",
            headers={"Accept": "application/json"},
        )
        response.raise_for_status()
-        return LlmPreferences.from_payload(response.json())
+        return ModelRoles.from_payload(response.json())

-    async def set_llm_preferences(
+    async def set_model_roles(
        self,
        search_space_id: int,
        *,
-        agent_llm_id: int | None = None,
-        image_generation_config_id: int | None = None,
-        vision_llm_config_id: int | None = None,
-    ) -> LlmPreferences:
-        """PUT a partial update to ``/search-spaces/{id}/llm-preferences``.
+        chat_model_id: int | None = None,
+        image_gen_model_id: int | None = None,
+        vision_model_id: int | None = None,
+    ) -> ModelRoles:
+        """PUT a partial update to ``/search-spaces/{id}/model-roles``.

        Backend uses ``model_dump(exclude_unset=True)`` so omitted fields
        are left unchanged.
        """

        body: dict[str, Any] = {}
-        if agent_llm_id is not None:
-            body["agent_llm_id"] = agent_llm_id
-        if image_generation_config_id is not None:
-            body["image_generation_config_id"] = image_generation_config_id
-        if vision_llm_config_id is not None:
-            body["vision_llm_config_id"] = vision_llm_config_id
+        if chat_model_id is not None:
+            body["chat_model_id"] = chat_model_id
+        if image_gen_model_id is not None:
+            body["image_gen_model_id"] = image_gen_model_id
+        if vision_model_id is not None:
+            body["vision_model_id"] = vision_model_id
        response = await self._http.put(
-            f"{self._base}/api/v1/search-spaces/{search_space_id}/llm-preferences",
+            f"{self._base}/api/v1/search-spaces/{search_space_id}/model-roles",
            json=body,
            headers={"Accept": "application/json"},
        )
        response.raise_for_status()
-        return LlmPreferences.from_payload(response.json())
+        return ModelRoles.from_payload(response.json())

-    async def list_global_vision_llm_configs(self) -> list[VisionLlmConfigEntry]:
-        """List the registered global vision LLM configs.
+    async def list_global_vision_models(self) -> list[VisionModelEntry]:
+        """List registered GLOBAL models that can accept image input.

-        Used by ``setup`` to (a) resolve an explicit ``--vision-llm <slug>``
-        to a config id and (b) auto-pick the strongest registered vision
-        config when the operator doesn't pass one. The ``Auto (Fastest)``
-        entry (``id=0``) is filtered out — accuracy must be reproducible.
+        Used by ``setup`` to resolve ``--vision-llm <slug>`` or auto-pick a
+        reproducible ingest-time vision model.
        """

        response = await self._http.get(
-            f"{self._base}/api/v1/global-vision-llm-configs",
+            f"{self._base}/api/v1/model-connections/global",
            headers={"Accept": "application/json"},
        )
        response.raise_for_status()
        payload = response.json()
        if not isinstance(payload, list):
            raise RuntimeError(
-                f"Unexpected /global-vision-llm-configs payload: {payload!r}"
+                f"Unexpected /model-connections/global payload: {payload!r}"
            )
-        return [
-            VisionLlmConfigEntry.from_payload(item)
-            for item in payload
-            if not bool(item.get("is_auto_mode", False))
-        ]
+        entries: list[VisionModelEntry] = []
+        for connection in payload:
+            provider = str(connection.get("provider", ""))
+            for model in connection.get("models") or []:
+                if not model.get("enabled", True) or not model.get("supports_image_input"):
+                    continue
+                entries.append(
+                    VisionModelEntry.from_payload({**model, "provider": provider})
+                )
+        return entries
--- a/surfsense_evals/src/surfsense_evals/core/config.py
+++ b/surfsense_evals/src/surfsense_evals/core/config.py
@ -147,35 +147,35 @@ class SuiteState:
    """Per-suite persisted state.

    ``provider_model`` is the slug pinned to the SearchSpace's
-    ``agent_llm`` — what answers SurfSense queries (and what the native
+    ``chat_model_id`` — what answers SurfSense queries (and what the native
    arm uses too, unless ``native_arm_model`` is set for cost-arbitrage).

-    ``vision_provider_model`` is the slug of the OpenRouter vision LLM
-    config attached to the SearchSpace's ``vision_llm_config_id`` — what
+    ``vision_provider_model`` is the slug of the OpenRouter vision model
+    attached to the SearchSpace's ``vision_model_id`` — what
    SurfSense uses to extract image content at ingest time when
    ``use_vision_llm=True``. ``None`` means no vision config was attached
    at setup (legacy or text-only suite).
    """

    search_space_id: int
-    agent_llm_id: int
+    chat_model_id: int
    provider_model: str
    created_at: str
    ingestion_maps: dict[str, str] = field(default_factory=dict)
    scenario: str = DEFAULT_SCENARIO
-    vision_llm_config_id: int | None = None
+    vision_model_id: int | None = None
    vision_provider_model: str | None = None
    native_arm_model: str | None = None

    def to_dict(self) -> dict[str, Any]:
        return {
            "search_space_id": self.search_space_id,
-            "agent_llm_id": self.agent_llm_id,
+            "chat_model_id": self.chat_model_id,
            "provider_model": self.provider_model,
            "created_at": self.created_at,
            "ingestion_maps": dict(self.ingestion_maps),
            "scenario": self.scenario,
-            "vision_llm_config_id": self.vision_llm_config_id,
+            "vision_model_id": self.vision_model_id,
            "vision_provider_model": self.vision_provider_model,
            "native_arm_model": self.native_arm_model,
        }
@ -187,15 +187,16 @@ class SuiteState:
        scenario = str(payload.get("scenario") or DEFAULT_SCENARIO)
        if scenario not in SCENARIOS:
            scenario = DEFAULT_SCENARIO
-        raw_vision_id = payload.get("vision_llm_config_id")
+        raw_chat_id = payload.get("chat_model_id")
+        raw_vision_id = payload.get("vision_model_id")
        return cls(
            search_space_id=int(payload["search_space_id"]),
-            agent_llm_id=int(payload["agent_llm_id"]),
+            chat_model_id=int(raw_chat_id),
            provider_model=str(payload["provider_model"]),
            created_at=str(payload.get("created_at") or ""),
            ingestion_maps=dict(payload.get("ingestion_maps") or {}),
            scenario=scenario,
-            vision_llm_config_id=int(raw_vision_id) if raw_vision_id is not None else None,
+            vision_model_id=int(raw_vision_id) if raw_vision_id is not None else None,
            vision_provider_model=(
                str(payload["vision_provider_model"])
                if payload.get("vision_provider_model")
--- a/surfsense_evals/src/surfsense_evals/core/registry.py
+++ b/surfsense_evals/src/surfsense_evals/core/registry.py
@ -53,8 +53,8 @@ class RunContext:
        return self.suite_state.search_space_id

    @property
-    def agent_llm_id(self) -> int:
-        return self.suite_state.agent_llm_id
+    def chat_model_id(self) -> int:
+        return self.suite_state.chat_model_id

    @property
    def provider_model(self) -> str:
--- a/surfsense_evals/src/surfsense_evals/core/vision_llm.py
+++ b/surfsense_evals/src/surfsense_evals/core/vision_llm.py
@ -3,8 +3,8 @@
 Two responsibilities:

 1. Resolve an explicit ``--vision-llm <slug>`` to a global OpenRouter
-   vision LLM config id that ``set_llm_preferences(vision_llm_config_id=...)``
-   can accept.
+   vision-capable model id that ``set_model_roles(vision_model_id=...)`` can
+   accept.
 2. Auto-pick the strongest registered vision config when the operator
   doesn't pass ``--vision-llm`` but the scenario / benchmark needs one.

--- a/surfsense_evals/src/surfsense_evals/suites/medical/medxpertqa/runner.py
+++ b/surfsense_evals/src/surfsense_evals/suites/medical/medxpertqa/runner.py
@ -371,7 +371,7 @@ class MedXpertQAMMBenchmark:
                "provider_model": ctx.provider_model,
                "native_arm_model": native_arm_model,
                "vision_provider_model": ctx.vision_provider_model,
-                "agent_llm_id": ctx.agent_llm_id,
+                "chat_model_id": ctx.chat_model_id,
                "ingest_settings": ingest_settings,
            },
        )
--- a/surfsense_evals/src/surfsense_evals/suites/multimodal_doc/mmlongbench/runner.py
+++ b/surfsense_evals/src/surfsense_evals/suites/multimodal_doc/mmlongbench/runner.py
@ -391,7 +391,7 @@ class MMLongBenchDocBenchmark:
                "provider_model": ctx.provider_model,
                "native_arm_model": native_arm_model,
                "vision_provider_model": ctx.vision_provider_model,
-                "agent_llm_id": ctx.agent_llm_id,
+                "chat_model_id": ctx.chat_model_id,
                "ingest_settings": ingest_settings,
            },
        )
--- a/surfsense_evals/src/surfsense_evals/suites/multimodal_doc/parser_compare/runner.py
+++ b/surfsense_evals/src/surfsense_evals/suites/multimodal_doc/parser_compare/runner.py
@ -554,7 +554,7 @@ class ParserCompareBenchmark:
                "scenario": ctx.scenario,
                "provider_model": ctx.provider_model,
                "vision_provider_model": ctx.vision_provider_model,
-                "agent_llm_id": ctx.agent_llm_id,
+                "chat_model_id": ctx.chat_model_id,
                "preprocess_tariff": {
                    "basic_per_1k_pages": 1.0,
                    "premium_per_1k_pages": 10.0,
--- a/surfsense_evals/src/surfsense_evals/suites/research/crag/runner.py
+++ b/surfsense_evals/src/surfsense_evals/suites/research/crag/runner.py
@ -467,7 +467,7 @@ class CragBenchmark:
                "provider_model": ctx.provider_model,
                "native_arm_model": ctx.native_arm_model,
                "vision_provider_model": ctx.vision_provider_model,
-                "agent_llm_id": ctx.agent_llm_id,
+                "chat_model_id": ctx.chat_model_id,
                "ingest_settings": ingest_settings,
                "per_page_char_cap": per_page_char_cap,
                "max_output_tokens": max_output_tokens,
--- a/surfsense_evals/src/surfsense_evals/suites/research/frames/runner.py
+++ b/surfsense_evals/src/surfsense_evals/suites/research/frames/runner.py
@ -372,7 +372,7 @@ class FramesBenchmark:
                "provider_model": ctx.provider_model,
                "native_arm_model": ctx.native_arm_model,
                "vision_provider_model": ctx.vision_provider_model,
-                "agent_llm_id": ctx.agent_llm_id,
+                "chat_model_id": ctx.chat_model_id,
                "ingest_settings": ingest_settings,
                "bare_arm_label": "bare_llm",
            },
--- a/surfsense_evals/tests/core/test_clients.py
+++ b/surfsense_evals/tests/core/test_clients.py
@ -63,29 +63,22 @@ async def test_delete_search_space_idempotent_on_404(respx_mock, http):

@pytest.mark.asyncio
@respx.mock(base_url=_BASE)
-async def test_set_llm_preferences_partial_update(respx_mock, http):
-    route = respx_mock.put("/api/v1/search-spaces/42/llm-preferences").mock(
+async def test_set_model_roles_partial_update(respx_mock, http):
+    route = respx_mock.put("/api/v1/search-spaces/42/model-roles").mock(
        return_value=httpx.Response(
            200,
            json={
-                "agent_llm_id": -10042,
-                "agent_llm_id": None,
-                "image_generation_config_id": None,
-                "vision_llm_config_id": None,
-                "agent_llm": {
-                    "id": -10042,
-                    "provider": "OPENROUTER",
-                    "model_name": "anthropic/claude-sonnet-4.5",
-                },
+                "chat_model_id": -10042,
+                "image_gen_model_id": None,
+                "vision_model_id": None,
            },
        )
    )
    client = SearchSpaceClient(http, _BASE)
-    prefs = await client.set_llm_preferences(42, agent_llm_id=-10042)
-    assert prefs.agent_llm_id == -10042
-    assert prefs.agent_llm["provider"] == "OPENROUTER"
+    roles = await client.set_model_roles(42, chat_model_id=-10042)
+    assert roles.chat_model_id == -10042
    sent_body = json.loads(route.calls[-1].request.content)
-    assert sent_body == {"agent_llm_id": -10042}
+    assert sent_body == {"chat_model_id": -10042}


 # ---------------------------------------------------------------------------
--- a/surfsense_evals/tests/core/test_config.py
+++ b/surfsense_evals/tests/core/test_config.py
@ -41,14 +41,14 @@ def test_state_roundtrip_per_suite(tmp_env):  # noqa: ARG001
    assert get_suite_state(config, "medical") is None
    state = SuiteState(
        search_space_id=1,
-        agent_llm_id=-10042,
+        chat_model_id=-10042,
        provider_model="anthropic/claude-sonnet-4.5",
        created_at="2026-05-11T20-30-00Z",
    )
    set_suite_state(config, "medical", state)
    legal = SuiteState(
        search_space_id=2,
-        agent_llm_id=-1,
+        chat_model_id=-1,
        provider_model="openai/gpt-5",
        created_at="2026-05-11T21-00-00Z",
    )
@ -84,25 +84,19 @@ def test_paths_are_per_suite(tmp_env):  # noqa: ARG001
 # ---------------------------------------------------------------------------


-def test_legacy_state_back_compat_defaults_to_head_to_head():
-    """state.json files written before scenarios shipped must still load.
+def test_minimal_state_defaults_to_head_to_head():
+    """Missing scenario / vision / native fields default safely."""

-    Missing ``scenario`` / ``vision_*`` / ``native_arm_model`` keys all
-    default to ``head-to-head`` / ``None`` so old setups keep working
-    after upgrade — the runner's behaviour exactly mirrors the legacy
-    one (both arms answer with ``provider_model``).
-    """
-
-    legacy = {
+    payload = {
        "search_space_id": 7,
-        "agent_llm_id": -123,
+        "chat_model_id": -123,
        "provider_model": "anthropic/claude-sonnet-4.5",
        "created_at": "2026-05-11T20-30-00Z",
        "ingestion_maps": {},
    }
-    state = SuiteState.from_dict(legacy)
+    state = SuiteState.from_dict(payload)
    assert state.scenario == DEFAULT_SCENARIO == "head-to-head"
-    assert state.vision_llm_config_id is None
+    assert state.vision_model_id is None
    assert state.vision_provider_model is None
    assert state.native_arm_model is None
    # The native arm should still answer with the same slug as SurfSense.
@ -118,7 +112,7 @@ def test_unknown_scenario_falls_back_to_default():

    payload = {
        "search_space_id": 1,
-        "agent_llm_id": -1,
+        "chat_model_id": -1,
        "provider_model": "openai/gpt-5",
        "scenario": "unknown-scenario-name",
    }
@ -130,11 +124,11 @@ def test_cost_arbitrage_state_persists_native_arm_model(tmp_env):  # noqa: ARG00
    config = load_config()
    state = SuiteState(
        search_space_id=42,
-        agent_llm_id=-1,
+        chat_model_id=-1,
        provider_model="openai/gpt-5.4-mini",
        created_at="2026-05-11T20-30-00Z",
        scenario="cost-arbitrage",
-        vision_llm_config_id=-101,
+        vision_model_id=-101,
        vision_provider_model="anthropic/claude-sonnet-4.5",
        native_arm_model="anthropic/claude-sonnet-4.5",
    )
@ -142,7 +136,7 @@ def test_cost_arbitrage_state_persists_native_arm_model(tmp_env):  # noqa: ARG00

    fetched = get_suite_state(config, "medical")
    assert fetched.scenario == "cost-arbitrage"
-    assert fetched.vision_llm_config_id == -101
+    assert fetched.vision_model_id == -101
    assert fetched.vision_provider_model == "anthropic/claude-sonnet-4.5"
    assert fetched.native_arm_model == "anthropic/claude-sonnet-4.5"
    # Cost arbitrage's whole point: native arm slug != surfsense slug.
--- a/surfsense_evals/tests/test_integration_smoke.py
+++ b/surfsense_evals/tests/test_integration_smoke.py
@ -27,7 +27,7 @@ async def test_smoke_against_localhost():
        pytest.skip("No credentials in environment; skipping integration smoke")
    bundle = await acquire_token(config)
    async with client_with_auth(config, bundle) as client:
-        response = await client.get(f"{config.surfsense_api_base}/api/v1/global-new-llm-configs")
+        response = await client.get(f"{config.surfsense_api_base}/api/v1/model-connections/global")
        try:
            response.raise_for_status()
        except httpx.HTTPStatusError as exc: