mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-16 21:05:20 +02:00
feat(database-migrations): add migration to remove legacy model config tables and remove stale model connection code
This commit is contained in:
parent
50668775f8
commit
bd4a04f2e7
93 changed files with 956 additions and 11442 deletions
|
|
@ -77,7 +77,7 @@ The walkthrough above is `--scenario head-to-head` (default): both arms answer w
|
|||
| `symmetric-cheap` | `--provider-model` (cheap, text-only) | `--provider-model` (same) | Does pre-extracted image context let a non-vision LLM reason over image-heavy docs? |
|
||||
| `cost-arbitrage` | `--native-arm-model` (vision) | `--provider-model` (cheap) | How close does SurfSense get to a vision-native baseline at a fraction of per-query cost?|
|
||||
|
||||
In all three modes the **ingest-time** vision LLM is set on the SearchSpace's `vision_llm_config_id` (auto-picked from the strongest registered global OpenRouter vision config — `claude-sonnet-4.5` > `claude-opus-4.7` > `gpt-5` > `gemini-2.5-pro`, override with `--vision-llm <slug>`). What changes is which slug the *answering* models hit per arm.
|
||||
In all three modes the **ingest-time** vision LLM is set on the SearchSpace's `vision_model_id` (auto-picked from the strongest registered global OpenRouter vision-capable model — `claude-sonnet-4.5` > `claude-opus-4.7` > `gpt-5` > `gemini-2.5-pro`, override with `--vision-llm <slug>`). What changes is which slug the *answering* models hit per arm.
|
||||
|
||||
### Ingest with vision, evaluate with a non-vision LLM (`symmetric-cheap`)
|
||||
|
||||
|
|
@ -118,7 +118,7 @@ python -m surfsense_evals report --suite medical
|
|||
|
||||
Notes:
|
||||
- `cost-arbitrage` requires both `--provider-model` (the cheap SurfSense slug) AND `--native-arm-model <vision slug>`.
|
||||
- `--vision-llm <slug>` is optional; if omitted the harness queries `GET /api/v1/global-vision-llm-configs` and auto-picks the strongest registered one. Pass `--no-vision-llm-setup` if you want to keep whatever vision config is already attached to the SearchSpace.
|
||||
- `--vision-llm <slug>` is optional; if omitted the harness queries `GET /api/v1/model-connections/global` and auto-picks the strongest registered vision-capable model. Pass `--no-vision-llm-setup` if you want to keep whatever vision model is already attached to the SearchSpace.
|
||||
- The runner's "looks text-only" warning is suppressed (or relabelled as informational) for `symmetric-cheap` so intentional asymmetry doesn't read as a misconfiguration.
|
||||
- All three scenario fields (`scenario`, `provider_model`, `native_arm_model`, `vision_provider_model`) are persisted to `state.json` and recorded in `run_artifact.extra` + the report header — no need to retrace what was set.
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
"llamacloud_premium_lc",
|
||||
"surfsense_agentic"
|
||||
],
|
||||
"agent_llm_id": -5138454,
|
||||
"chat_model_id": -5138454,
|
||||
"concurrency": 2,
|
||||
"llm_model": "anthropic/claude-sonnet-4.5",
|
||||
"n_pdfs": 30,
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
Subcommands:
|
||||
|
||||
* ``setup --suite <name> --provider-model <slug> [--agent-llm-id <int>]``
|
||||
* ``setup --suite <name> --provider-model <slug> [--chat-model-id <int>]``
|
||||
* ``teardown --suite <name>``
|
||||
* ``models list [--provider openrouter] [--grep <s>]``
|
||||
* ``suites list``
|
||||
|
|
@ -18,7 +18,7 @@ publish its own flags.
|
|||
|
||||
Design choices worth flagging:
|
||||
|
||||
* ``setup`` rejects ``agent_llm_id == 0`` (Auto / LiteLLM router) so
|
||||
* ``setup`` rejects ``chat_model_id == 0`` (Auto / LiteLLM router) so
|
||||
per-question accuracy is reproducible.
|
||||
* ``setup`` validates that the picked LLM config has
|
||||
``provider == "OPENROUTER"`` and ``model_name == --provider-model``
|
||||
|
|
@ -59,7 +59,6 @@ if sys.platform == "win32":
|
|||
from . import registry
|
||||
from .auth import CredentialError, acquire_token, client_with_auth
|
||||
from .clients import SearchSpaceClient
|
||||
from .clients.search_space import LlmPreferences
|
||||
from .config import (
|
||||
DEFAULT_SCENARIO,
|
||||
SCENARIOS,
|
||||
|
|
@ -111,23 +110,30 @@ class LlmConfigEntry:
|
|||
def from_payload(cls, payload: dict[str, Any]) -> LlmConfigEntry:
|
||||
return cls(
|
||||
id=int(payload["id"]),
|
||||
name=str(payload.get("name", "")),
|
||||
name=str(payload.get("display_name") or payload.get("name") or ""),
|
||||
provider=str(payload.get("provider", "")).upper(),
|
||||
model_name=str(payload.get("model_name", "")),
|
||||
model_name=str(payload.get("model_id") or payload.get("model_name") or ""),
|
||||
raw=payload,
|
||||
)
|
||||
|
||||
|
||||
async def _list_global_llm_configs(http: httpx.AsyncClient, base: str) -> list[LlmConfigEntry]:
|
||||
response = await http.get(
|
||||
f"{base}/api/v1/global-new-llm-configs",
|
||||
f"{base}/api/v1/model-connections/global",
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
if not isinstance(payload, list):
|
||||
raise RuntimeError(f"Unexpected /global-new-llm-configs payload: {payload!r}")
|
||||
return [LlmConfigEntry.from_payload(item) for item in payload]
|
||||
raise RuntimeError(f"Unexpected /model-connections/global payload: {payload!r}")
|
||||
entries: list[LlmConfigEntry] = []
|
||||
for connection in payload:
|
||||
provider = connection.get("provider", "")
|
||||
for model in connection.get("models") or []:
|
||||
if not model.get("enabled", True) or not model.get("supports_chat"):
|
||||
continue
|
||||
entries.append(LlmConfigEntry.from_payload({**model, "provider": provider}))
|
||||
return entries
|
||||
|
||||
|
||||
def _resolve_openrouter_id(
|
||||
|
|
@ -143,8 +149,8 @@ def _resolve_openrouter_id(
|
|||
* If ``explicit_id`` is given: return it directly. The caller is
|
||||
then expected to GET-validate that the row's
|
||||
``provider == "OPENROUTER"`` and ``model_name`` matches the slug.
|
||||
That branch supports positive BYOK ``NewLLMConfig`` rows whose
|
||||
slugs may overlap with global OpenRouter virtuals.
|
||||
That branch supports positive BYOK model rows whose slugs may overlap
|
||||
with global OpenRouter virtuals.
|
||||
* Otherwise: filter to ``provider == "OPENROUTER"`` and
|
||||
``model_name == provider_model``. Expect exactly one match —
|
||||
raise with a friendly message otherwise.
|
||||
|
|
@ -173,7 +179,7 @@ def _resolve_openrouter_id(
|
|||
listing = "\n".join(f" id={c.id} name={c.name!r}" for c in matches)
|
||||
raise RuntimeError(
|
||||
f"Multiple OpenRouter configs for slug '{provider_model}':\n{listing}\n"
|
||||
"Pass --agent-llm-id <id> to disambiguate."
|
||||
"Pass --chat-model-id <id> to disambiguate."
|
||||
)
|
||||
return matches[0].id
|
||||
|
||||
|
|
@ -186,7 +192,7 @@ def _resolve_openrouter_id(
|
|||
async def _cmd_setup(args: argparse.Namespace) -> int:
|
||||
suite = args.suite
|
||||
provider_model: str = args.provider_model
|
||||
explicit_id: int | None = args.agent_llm_id
|
||||
explicit_id: int | None = args.chat_model_id
|
||||
scenario: str = args.scenario
|
||||
vision_llm_slug: str | None = args.vision_llm
|
||||
native_arm_model: str | None = args.native_arm_model
|
||||
|
|
@ -194,7 +200,7 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
|
|||
|
||||
if explicit_id == 0:
|
||||
console.print(
|
||||
"[red]agent_llm_id == 0 (Auto / LiteLLM router) is not allowed — "
|
||||
"[red]chat_model_id == 0 (Auto / LiteLLM router) is not allowed — "
|
||||
"results would not be reproducible.[/red]"
|
||||
)
|
||||
return 2
|
||||
|
|
@ -242,7 +248,7 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
|
|||
candidates = await _list_global_llm_configs(http, config.surfsense_api_base)
|
||||
|
||||
try:
|
||||
agent_llm_id = _resolve_openrouter_id(
|
||||
chat_model_id = _resolve_openrouter_id(
|
||||
candidates, provider_model, explicit_id=explicit_id
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
|
|
@ -288,7 +294,7 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
|
|||
vision_provider_model: str | None = None
|
||||
if not skip_vision_setup and (vision_required or vision_llm_slug is not None):
|
||||
try:
|
||||
vision_candidates = await ss_client.list_global_vision_llm_configs()
|
||||
vision_candidates = await ss_client.list_global_vision_models()
|
||||
resolved = resolve_vision_llm(
|
||||
vision_candidates, explicit_slug=vision_llm_slug
|
||||
)
|
||||
|
|
@ -302,37 +308,34 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
|
|||
f"(id={vision_config_id}, selected_via={resolved.selected_via})."
|
||||
)
|
||||
|
||||
pref_kwargs: dict[str, Any] = {"agent_llm_id": agent_llm_id}
|
||||
role_kwargs: dict[str, Any] = {"chat_model_id": chat_model_id}
|
||||
if vision_config_id is not None:
|
||||
pref_kwargs["vision_llm_config_id"] = vision_config_id
|
||||
role_kwargs["vision_model_id"] = vision_config_id
|
||||
|
||||
await ss_client.set_llm_preferences(search_space_id, **pref_kwargs)
|
||||
prefs = await ss_client.get_llm_preferences(search_space_id)
|
||||
if not _validate_pin(prefs, provider_model):
|
||||
agent = prefs.agent_llm or {}
|
||||
await ss_client.set_model_roles(search_space_id, **role_kwargs)
|
||||
roles = await ss_client.get_model_roles(search_space_id)
|
||||
if roles.chat_model_id != chat_model_id:
|
||||
console.print(
|
||||
f"[red]LLM pin validation FAILED.[/red] After PUT, "
|
||||
f"agent_llm.provider={agent.get('provider')!r}, "
|
||||
f"model_name={agent.get('model_name')!r}; expected "
|
||||
f"provider=OPENROUTER, model_name={provider_model!r}."
|
||||
f"chat_model_id={roles.chat_model_id!r}; expected {chat_model_id!r}."
|
||||
)
|
||||
return 2
|
||||
if vision_config_id is not None and prefs.vision_llm_config_id != vision_config_id:
|
||||
if vision_config_id is not None and roles.vision_model_id != vision_config_id:
|
||||
console.print(
|
||||
f"[red]Vision LLM pin validation FAILED.[/red] After PUT, "
|
||||
f"vision_llm_config_id={prefs.vision_llm_config_id!r}; "
|
||||
f"vision_model_id={roles.vision_model_id!r}; "
|
||||
f"expected {vision_config_id!r}."
|
||||
)
|
||||
return 2
|
||||
|
||||
suite_state = SuiteState(
|
||||
search_space_id=search_space_id,
|
||||
agent_llm_id=agent_llm_id,
|
||||
chat_model_id=chat_model_id,
|
||||
provider_model=provider_model,
|
||||
created_at=utc_iso_timestamp(),
|
||||
ingestion_maps=existing.ingestion_maps if existing else {},
|
||||
scenario=scenario,
|
||||
vision_llm_config_id=vision_config_id,
|
||||
vision_model_id=vision_config_id,
|
||||
vision_provider_model=vision_provider_model,
|
||||
native_arm_model=native_arm_model,
|
||||
)
|
||||
|
|
@ -342,7 +345,7 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
|
|||
f"suite={suite!r}",
|
||||
f"scenario={scenario!r}",
|
||||
f"search_space_id={suite_state.search_space_id}",
|
||||
f"agent_llm_id={suite_state.agent_llm_id}",
|
||||
f"chat_model_id={suite_state.chat_model_id}",
|
||||
f"provider_model={suite_state.provider_model!r}",
|
||||
]
|
||||
if suite_state.vision_provider_model:
|
||||
|
|
@ -353,14 +356,6 @@ async def _cmd_setup(args: argparse.Namespace) -> int:
|
|||
return 0
|
||||
|
||||
|
||||
def _validate_pin(prefs: LlmPreferences, provider_model: str) -> bool:
|
||||
agent = prefs.agent_llm or {}
|
||||
return (
|
||||
str(agent.get("provider", "")).upper() == "OPENROUTER"
|
||||
and str(agent.get("model_name", "")) == provider_model
|
||||
)
|
||||
|
||||
|
||||
async def _cmd_teardown(args: argparse.Namespace) -> int:
|
||||
suite = args.suite
|
||||
config = load_config()
|
||||
|
|
@ -654,10 +649,10 @@ def _build_parser() -> argparse.ArgumentParser:
|
|||
),
|
||||
)
|
||||
p_setup.add_argument(
|
||||
"--agent-llm-id",
|
||||
"--chat-model-id",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Optional override for BYOK NewLLMConfig rows.",
|
||||
help="Optional explicit model id override.",
|
||||
)
|
||||
p_setup.add_argument(
|
||||
"--scenario",
|
||||
|
|
|
|||
|
|
@ -1,17 +1,16 @@
|
|||
"""Client for ``/api/v1/searchspaces`` and ``/api/v1/search-spaces/{id}/llm-preferences``.
|
||||
"""Client for ``/api/v1/searchspaces`` and model-role endpoints.
|
||||
|
||||
Verified against:
|
||||
|
||||
* ``surfsense_backend/app/routes/search_spaces_routes.py:116`` (POST create)
|
||||
* ``surfsense_backend/app/routes/search_spaces_routes.py:234`` (GET by id)
|
||||
* ``surfsense_backend/app/routes/search_spaces_routes.py:422`` (DELETE soft-delete)
|
||||
* ``surfsense_backend/app/routes/search_spaces_routes.py:698-849`` (GET/PUT llm-preferences)
|
||||
* ``surfsense_backend/app/routes/model_connections_routes.py`` (GET/PUT model roles)
|
||||
* ``surfsense_backend/app/schemas/search_space.py:14`` (SearchSpaceCreate body)
|
||||
* ``surfsense_backend/app/routes/vision_llm_routes.py:60`` (GET global vision configs)
|
||||
|
||||
Note the inconsistent pluralisation in the backend: ``/searchspaces``
|
||||
(no hyphen) for CRUD, but ``/search-spaces`` (hyphenated) for the
|
||||
``llm-preferences`` sub-resource. Both are mirrored verbatim here.
|
||||
(no hyphen) for CRUD, but ``/search-spaces`` (hyphenated) for model-role
|
||||
sub-resources. Both are mirrored verbatim here.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -46,13 +45,8 @@ class SearchSpaceRow:
|
|||
|
||||
|
||||
@dataclass
|
||||
class VisionLlmConfigEntry:
|
||||
"""Subset of one ``GET /global-vision-llm-configs`` row.
|
||||
|
||||
The backend returns negative ids for global / OpenRouter-derived
|
||||
vision configs and positive ids for per-user BYOK rows. Either is
|
||||
accepted by ``set_llm_preferences(vision_llm_config_id=...)``.
|
||||
"""
|
||||
class VisionModelEntry:
|
||||
"""Subset of one GLOBAL model-connection model with image input support."""
|
||||
|
||||
id: int
|
||||
name: str
|
||||
|
|
@ -62,45 +56,38 @@ class VisionLlmConfigEntry:
|
|||
raw: dict[str, Any]
|
||||
|
||||
@classmethod
|
||||
def from_payload(cls, payload: dict[str, Any]) -> VisionLlmConfigEntry:
|
||||
def from_payload(cls, payload: dict[str, Any]) -> VisionModelEntry:
|
||||
return cls(
|
||||
id=int(payload.get("id", 0)),
|
||||
name=str(payload.get("name", "")),
|
||||
name=str(payload.get("display_name") or payload.get("model_id") or ""),
|
||||
provider=str(payload.get("provider", "")).upper(),
|
||||
model_name=str(payload.get("model_name", "")),
|
||||
is_auto_mode=bool(payload.get("is_auto_mode", False)),
|
||||
model_name=str(payload.get("model_id", "")),
|
||||
is_auto_mode=False,
|
||||
raw=payload,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LlmPreferences:
|
||||
"""Resolved LLM preferences with the embedded full config row.
|
||||
class ModelRoles:
|
||||
"""Model role ids for a search space."""
|
||||
|
||||
Mirrors ``LLMPreferencesRead`` from the backend so the lifecycle
|
||||
command can introspect ``provider`` / ``model_name`` to validate the
|
||||
OpenRouter pin.
|
||||
"""
|
||||
|
||||
agent_llm_id: int | None
|
||||
image_generation_config_id: int | None
|
||||
vision_llm_config_id: int | None
|
||||
agent_llm: dict[str, Any] | None
|
||||
chat_model_id: int | None
|
||||
image_gen_model_id: int | None
|
||||
vision_model_id: int | None
|
||||
raw: dict[str, Any]
|
||||
|
||||
@classmethod
|
||||
def from_payload(cls, payload: dict[str, Any]) -> LlmPreferences:
|
||||
def from_payload(cls, payload: dict[str, Any]) -> ModelRoles:
|
||||
return cls(
|
||||
agent_llm_id=payload.get("agent_llm_id"),
|
||||
image_generation_config_id=payload.get("image_generation_config_id"),
|
||||
vision_llm_config_id=payload.get("vision_llm_config_id"),
|
||||
agent_llm=payload.get("agent_llm"),
|
||||
chat_model_id=payload.get("chat_model_id"),
|
||||
image_gen_model_id=payload.get("image_gen_model_id"),
|
||||
vision_model_id=payload.get("vision_model_id"),
|
||||
raw=payload,
|
||||
)
|
||||
|
||||
|
||||
class SearchSpaceClient:
|
||||
"""Thin wrapper around the SearchSpace + LLM preferences endpoints."""
|
||||
"""Thin wrapper around the SearchSpace + model role endpoints."""
|
||||
|
||||
def __init__(self, http: httpx.AsyncClient, base_url: str) -> None:
|
||||
self._http = http
|
||||
|
|
@ -139,64 +126,67 @@ class SearchSpaceClient:
|
|||
return
|
||||
response.raise_for_status()
|
||||
|
||||
async def get_llm_preferences(self, search_space_id: int) -> LlmPreferences:
|
||||
async def get_model_roles(self, search_space_id: int) -> ModelRoles:
|
||||
response = await self._http.get(
|
||||
f"{self._base}/api/v1/search-spaces/{search_space_id}/llm-preferences",
|
||||
f"{self._base}/api/v1/search-spaces/{search_space_id}/model-roles",
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return LlmPreferences.from_payload(response.json())
|
||||
return ModelRoles.from_payload(response.json())
|
||||
|
||||
async def set_llm_preferences(
|
||||
async def set_model_roles(
|
||||
self,
|
||||
search_space_id: int,
|
||||
*,
|
||||
agent_llm_id: int | None = None,
|
||||
image_generation_config_id: int | None = None,
|
||||
vision_llm_config_id: int | None = None,
|
||||
) -> LlmPreferences:
|
||||
"""PUT a partial update to ``/search-spaces/{id}/llm-preferences``.
|
||||
chat_model_id: int | None = None,
|
||||
image_gen_model_id: int | None = None,
|
||||
vision_model_id: int | None = None,
|
||||
) -> ModelRoles:
|
||||
"""PUT a partial update to ``/search-spaces/{id}/model-roles``.
|
||||
|
||||
Backend uses ``model_dump(exclude_unset=True)`` so omitted fields
|
||||
are left unchanged.
|
||||
"""
|
||||
|
||||
body: dict[str, Any] = {}
|
||||
if agent_llm_id is not None:
|
||||
body["agent_llm_id"] = agent_llm_id
|
||||
if image_generation_config_id is not None:
|
||||
body["image_generation_config_id"] = image_generation_config_id
|
||||
if vision_llm_config_id is not None:
|
||||
body["vision_llm_config_id"] = vision_llm_config_id
|
||||
if chat_model_id is not None:
|
||||
body["chat_model_id"] = chat_model_id
|
||||
if image_gen_model_id is not None:
|
||||
body["image_gen_model_id"] = image_gen_model_id
|
||||
if vision_model_id is not None:
|
||||
body["vision_model_id"] = vision_model_id
|
||||
response = await self._http.put(
|
||||
f"{self._base}/api/v1/search-spaces/{search_space_id}/llm-preferences",
|
||||
f"{self._base}/api/v1/search-spaces/{search_space_id}/model-roles",
|
||||
json=body,
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return LlmPreferences.from_payload(response.json())
|
||||
return ModelRoles.from_payload(response.json())
|
||||
|
||||
async def list_global_vision_llm_configs(self) -> list[VisionLlmConfigEntry]:
|
||||
"""List the registered global vision LLM configs.
|
||||
async def list_global_vision_models(self) -> list[VisionModelEntry]:
|
||||
"""List registered GLOBAL models that can accept image input.
|
||||
|
||||
Used by ``setup`` to (a) resolve an explicit ``--vision-llm <slug>``
|
||||
to a config id and (b) auto-pick the strongest registered vision
|
||||
config when the operator doesn't pass one. The ``Auto (Fastest)``
|
||||
entry (``id=0``) is filtered out — accuracy must be reproducible.
|
||||
Used by ``setup`` to resolve ``--vision-llm <slug>`` or auto-pick a
|
||||
reproducible ingest-time vision model.
|
||||
"""
|
||||
|
||||
response = await self._http.get(
|
||||
f"{self._base}/api/v1/global-vision-llm-configs",
|
||||
f"{self._base}/api/v1/model-connections/global",
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
if not isinstance(payload, list):
|
||||
raise RuntimeError(
|
||||
f"Unexpected /global-vision-llm-configs payload: {payload!r}"
|
||||
f"Unexpected /model-connections/global payload: {payload!r}"
|
||||
)
|
||||
return [
|
||||
VisionLlmConfigEntry.from_payload(item)
|
||||
for item in payload
|
||||
if not bool(item.get("is_auto_mode", False))
|
||||
]
|
||||
entries: list[VisionModelEntry] = []
|
||||
for connection in payload:
|
||||
provider = str(connection.get("provider", ""))
|
||||
for model in connection.get("models") or []:
|
||||
if not model.get("enabled", True) or not model.get("supports_image_input"):
|
||||
continue
|
||||
entries.append(
|
||||
VisionModelEntry.from_payload({**model, "provider": provider})
|
||||
)
|
||||
return entries
|
||||
|
|
|
|||
|
|
@ -147,35 +147,35 @@ class SuiteState:
|
|||
"""Per-suite persisted state.
|
||||
|
||||
``provider_model`` is the slug pinned to the SearchSpace's
|
||||
``agent_llm`` — what answers SurfSense queries (and what the native
|
||||
``chat_model_id`` — what answers SurfSense queries (and what the native
|
||||
arm uses too, unless ``native_arm_model`` is set for cost-arbitrage).
|
||||
|
||||
``vision_provider_model`` is the slug of the OpenRouter vision LLM
|
||||
config attached to the SearchSpace's ``vision_llm_config_id`` — what
|
||||
``vision_provider_model`` is the slug of the OpenRouter vision model
|
||||
attached to the SearchSpace's ``vision_model_id`` — what
|
||||
SurfSense uses to extract image content at ingest time when
|
||||
``use_vision_llm=True``. ``None`` means no vision config was attached
|
||||
at setup (legacy or text-only suite).
|
||||
"""
|
||||
|
||||
search_space_id: int
|
||||
agent_llm_id: int
|
||||
chat_model_id: int
|
||||
provider_model: str
|
||||
created_at: str
|
||||
ingestion_maps: dict[str, str] = field(default_factory=dict)
|
||||
scenario: str = DEFAULT_SCENARIO
|
||||
vision_llm_config_id: int | None = None
|
||||
vision_model_id: int | None = None
|
||||
vision_provider_model: str | None = None
|
||||
native_arm_model: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"search_space_id": self.search_space_id,
|
||||
"agent_llm_id": self.agent_llm_id,
|
||||
"chat_model_id": self.chat_model_id,
|
||||
"provider_model": self.provider_model,
|
||||
"created_at": self.created_at,
|
||||
"ingestion_maps": dict(self.ingestion_maps),
|
||||
"scenario": self.scenario,
|
||||
"vision_llm_config_id": self.vision_llm_config_id,
|
||||
"vision_model_id": self.vision_model_id,
|
||||
"vision_provider_model": self.vision_provider_model,
|
||||
"native_arm_model": self.native_arm_model,
|
||||
}
|
||||
|
|
@ -187,15 +187,16 @@ class SuiteState:
|
|||
scenario = str(payload.get("scenario") or DEFAULT_SCENARIO)
|
||||
if scenario not in SCENARIOS:
|
||||
scenario = DEFAULT_SCENARIO
|
||||
raw_vision_id = payload.get("vision_llm_config_id")
|
||||
raw_chat_id = payload.get("chat_model_id")
|
||||
raw_vision_id = payload.get("vision_model_id")
|
||||
return cls(
|
||||
search_space_id=int(payload["search_space_id"]),
|
||||
agent_llm_id=int(payload["agent_llm_id"]),
|
||||
chat_model_id=int(raw_chat_id),
|
||||
provider_model=str(payload["provider_model"]),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
ingestion_maps=dict(payload.get("ingestion_maps") or {}),
|
||||
scenario=scenario,
|
||||
vision_llm_config_id=int(raw_vision_id) if raw_vision_id is not None else None,
|
||||
vision_model_id=int(raw_vision_id) if raw_vision_id is not None else None,
|
||||
vision_provider_model=(
|
||||
str(payload["vision_provider_model"])
|
||||
if payload.get("vision_provider_model")
|
||||
|
|
|
|||
|
|
@ -53,8 +53,8 @@ class RunContext:
|
|||
return self.suite_state.search_space_id
|
||||
|
||||
@property
|
||||
def agent_llm_id(self) -> int:
|
||||
return self.suite_state.agent_llm_id
|
||||
def chat_model_id(self) -> int:
|
||||
return self.suite_state.chat_model_id
|
||||
|
||||
@property
|
||||
def provider_model(self) -> str:
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@
|
|||
Two responsibilities:
|
||||
|
||||
1. Resolve an explicit ``--vision-llm <slug>`` to a global OpenRouter
|
||||
vision LLM config id that ``set_llm_preferences(vision_llm_config_id=...)``
|
||||
can accept.
|
||||
vision-capable model id that ``set_model_roles(vision_model_id=...)`` can
|
||||
accept.
|
||||
2. Auto-pick the strongest registered vision config when the operator
|
||||
doesn't pass ``--vision-llm`` but the scenario / benchmark needs one.
|
||||
|
||||
|
|
|
|||
|
|
@ -371,7 +371,7 @@ class MedXpertQAMMBenchmark:
|
|||
"provider_model": ctx.provider_model,
|
||||
"native_arm_model": native_arm_model,
|
||||
"vision_provider_model": ctx.vision_provider_model,
|
||||
"agent_llm_id": ctx.agent_llm_id,
|
||||
"chat_model_id": ctx.chat_model_id,
|
||||
"ingest_settings": ingest_settings,
|
||||
},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -391,7 +391,7 @@ class MMLongBenchDocBenchmark:
|
|||
"provider_model": ctx.provider_model,
|
||||
"native_arm_model": native_arm_model,
|
||||
"vision_provider_model": ctx.vision_provider_model,
|
||||
"agent_llm_id": ctx.agent_llm_id,
|
||||
"chat_model_id": ctx.chat_model_id,
|
||||
"ingest_settings": ingest_settings,
|
||||
},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -554,7 +554,7 @@ class ParserCompareBenchmark:
|
|||
"scenario": ctx.scenario,
|
||||
"provider_model": ctx.provider_model,
|
||||
"vision_provider_model": ctx.vision_provider_model,
|
||||
"agent_llm_id": ctx.agent_llm_id,
|
||||
"chat_model_id": ctx.chat_model_id,
|
||||
"preprocess_tariff": {
|
||||
"basic_per_1k_pages": 1.0,
|
||||
"premium_per_1k_pages": 10.0,
|
||||
|
|
|
|||
|
|
@ -467,7 +467,7 @@ class CragBenchmark:
|
|||
"provider_model": ctx.provider_model,
|
||||
"native_arm_model": ctx.native_arm_model,
|
||||
"vision_provider_model": ctx.vision_provider_model,
|
||||
"agent_llm_id": ctx.agent_llm_id,
|
||||
"chat_model_id": ctx.chat_model_id,
|
||||
"ingest_settings": ingest_settings,
|
||||
"per_page_char_cap": per_page_char_cap,
|
||||
"max_output_tokens": max_output_tokens,
|
||||
|
|
|
|||
|
|
@ -372,7 +372,7 @@ class FramesBenchmark:
|
|||
"provider_model": ctx.provider_model,
|
||||
"native_arm_model": ctx.native_arm_model,
|
||||
"vision_provider_model": ctx.vision_provider_model,
|
||||
"agent_llm_id": ctx.agent_llm_id,
|
||||
"chat_model_id": ctx.chat_model_id,
|
||||
"ingest_settings": ingest_settings,
|
||||
"bare_arm_label": "bare_llm",
|
||||
},
|
||||
|
|
|
|||
|
|
@ -63,29 +63,22 @@ async def test_delete_search_space_idempotent_on_404(respx_mock, http):
|
|||
|
||||
@pytest.mark.asyncio
|
||||
@respx.mock(base_url=_BASE)
|
||||
async def test_set_llm_preferences_partial_update(respx_mock, http):
|
||||
route = respx_mock.put("/api/v1/search-spaces/42/llm-preferences").mock(
|
||||
async def test_set_model_roles_partial_update(respx_mock, http):
|
||||
route = respx_mock.put("/api/v1/search-spaces/42/model-roles").mock(
|
||||
return_value=httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"agent_llm_id": -10042,
|
||||
"agent_llm_id": None,
|
||||
"image_generation_config_id": None,
|
||||
"vision_llm_config_id": None,
|
||||
"agent_llm": {
|
||||
"id": -10042,
|
||||
"provider": "OPENROUTER",
|
||||
"model_name": "anthropic/claude-sonnet-4.5",
|
||||
},
|
||||
"chat_model_id": -10042,
|
||||
"image_gen_model_id": None,
|
||||
"vision_model_id": None,
|
||||
},
|
||||
)
|
||||
)
|
||||
client = SearchSpaceClient(http, _BASE)
|
||||
prefs = await client.set_llm_preferences(42, agent_llm_id=-10042)
|
||||
assert prefs.agent_llm_id == -10042
|
||||
assert prefs.agent_llm["provider"] == "OPENROUTER"
|
||||
roles = await client.set_model_roles(42, chat_model_id=-10042)
|
||||
assert roles.chat_model_id == -10042
|
||||
sent_body = json.loads(route.calls[-1].request.content)
|
||||
assert sent_body == {"agent_llm_id": -10042}
|
||||
assert sent_body == {"chat_model_id": -10042}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -41,14 +41,14 @@ def test_state_roundtrip_per_suite(tmp_env): # noqa: ARG001
|
|||
assert get_suite_state(config, "medical") is None
|
||||
state = SuiteState(
|
||||
search_space_id=1,
|
||||
agent_llm_id=-10042,
|
||||
chat_model_id=-10042,
|
||||
provider_model="anthropic/claude-sonnet-4.5",
|
||||
created_at="2026-05-11T20-30-00Z",
|
||||
)
|
||||
set_suite_state(config, "medical", state)
|
||||
legal = SuiteState(
|
||||
search_space_id=2,
|
||||
agent_llm_id=-1,
|
||||
chat_model_id=-1,
|
||||
provider_model="openai/gpt-5",
|
||||
created_at="2026-05-11T21-00-00Z",
|
||||
)
|
||||
|
|
@ -84,25 +84,19 @@ def test_paths_are_per_suite(tmp_env): # noqa: ARG001
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_legacy_state_back_compat_defaults_to_head_to_head():
|
||||
"""state.json files written before scenarios shipped must still load.
|
||||
def test_minimal_state_defaults_to_head_to_head():
|
||||
"""Missing scenario / vision / native fields default safely."""
|
||||
|
||||
Missing ``scenario`` / ``vision_*`` / ``native_arm_model`` keys all
|
||||
default to ``head-to-head`` / ``None`` so old setups keep working
|
||||
after upgrade — the runner's behaviour exactly mirrors the legacy
|
||||
one (both arms answer with ``provider_model``).
|
||||
"""
|
||||
|
||||
legacy = {
|
||||
payload = {
|
||||
"search_space_id": 7,
|
||||
"agent_llm_id": -123,
|
||||
"chat_model_id": -123,
|
||||
"provider_model": "anthropic/claude-sonnet-4.5",
|
||||
"created_at": "2026-05-11T20-30-00Z",
|
||||
"ingestion_maps": {},
|
||||
}
|
||||
state = SuiteState.from_dict(legacy)
|
||||
state = SuiteState.from_dict(payload)
|
||||
assert state.scenario == DEFAULT_SCENARIO == "head-to-head"
|
||||
assert state.vision_llm_config_id is None
|
||||
assert state.vision_model_id is None
|
||||
assert state.vision_provider_model is None
|
||||
assert state.native_arm_model is None
|
||||
# The native arm should still answer with the same slug as SurfSense.
|
||||
|
|
@ -118,7 +112,7 @@ def test_unknown_scenario_falls_back_to_default():
|
|||
|
||||
payload = {
|
||||
"search_space_id": 1,
|
||||
"agent_llm_id": -1,
|
||||
"chat_model_id": -1,
|
||||
"provider_model": "openai/gpt-5",
|
||||
"scenario": "unknown-scenario-name",
|
||||
}
|
||||
|
|
@ -130,11 +124,11 @@ def test_cost_arbitrage_state_persists_native_arm_model(tmp_env): # noqa: ARG00
|
|||
config = load_config()
|
||||
state = SuiteState(
|
||||
search_space_id=42,
|
||||
agent_llm_id=-1,
|
||||
chat_model_id=-1,
|
||||
provider_model="openai/gpt-5.4-mini",
|
||||
created_at="2026-05-11T20-30-00Z",
|
||||
scenario="cost-arbitrage",
|
||||
vision_llm_config_id=-101,
|
||||
vision_model_id=-101,
|
||||
vision_provider_model="anthropic/claude-sonnet-4.5",
|
||||
native_arm_model="anthropic/claude-sonnet-4.5",
|
||||
)
|
||||
|
|
@ -142,7 +136,7 @@ def test_cost_arbitrage_state_persists_native_arm_model(tmp_env): # noqa: ARG00
|
|||
|
||||
fetched = get_suite_state(config, "medical")
|
||||
assert fetched.scenario == "cost-arbitrage"
|
||||
assert fetched.vision_llm_config_id == -101
|
||||
assert fetched.vision_model_id == -101
|
||||
assert fetched.vision_provider_model == "anthropic/claude-sonnet-4.5"
|
||||
assert fetched.native_arm_model == "anthropic/claude-sonnet-4.5"
|
||||
# Cost arbitrage's whole point: native arm slug != surfsense slug.
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ async def test_smoke_against_localhost():
|
|||
pytest.skip("No credentials in environment; skipping integration smoke")
|
||||
bundle = await acquire_token(config)
|
||||
async with client_with_auth(config, bundle) as client:
|
||||
response = await client.get(f"{config.surfsense_api_base}/api/v1/global-new-llm-configs")
|
||||
response = await client.get(f"{config.surfsense_api_base}/api/v1/model-connections/global")
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as exc:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue