feat(evals): Remove summary ingest settings from evals

This commit is contained in:
Anish Sarkar 2026-06-04 00:53:18 +05:30
parent e427501482
commit dc6a17930b
17 changed files with 19 additions and 59 deletions

View file

@ -110,7 +110,6 @@ class DocumentsClient:
files: Iterable[Path],
*,
search_space_id: int,
should_summarize: bool = False,
use_vision_llm: bool = False,
processing_mode: str = "basic",
) -> FileUploadResult:
@ -149,7 +148,6 @@ class DocumentsClient:
f"{self._base}/api/v1/documents/fileupload",
data={
"search_space_id": str(search_space_id),
"should_summarize": "true" if should_summarize else "false",
"use_vision_llm": "true" if use_vision_llm else "false",
"processing_mode": processing_mode,
},

View file

@ -83,7 +83,6 @@ class LlmPreferences:
"""
agent_llm_id: int | None
document_summary_llm_id: int | None
image_generation_config_id: int | None
vision_llm_config_id: int | None
agent_llm: dict[str, Any] | None
@ -93,7 +92,6 @@ class LlmPreferences:
def from_payload(cls, payload: dict[str, Any]) -> LlmPreferences:
return cls(
agent_llm_id=payload.get("agent_llm_id"),
document_summary_llm_id=payload.get("document_summary_llm_id"),
image_generation_config_id=payload.get("image_generation_config_id"),
vision_llm_config_id=payload.get("vision_llm_config_id"),
agent_llm=payload.get("agent_llm"),
@ -154,7 +152,6 @@ class SearchSpaceClient:
search_space_id: int,
*,
agent_llm_id: int | None = None,
document_summary_llm_id: int | None = None,
image_generation_config_id: int | None = None,
vision_llm_config_id: int | None = None,
) -> LlmPreferences:
@ -167,8 +164,6 @@ class SearchSpaceClient:
body: dict[str, Any] = {}
if agent_llm_id is not None:
body["agent_llm_id"] = agent_llm_id
if document_summary_llm_id is not None:
body["document_summary_llm_id"] = document_summary_llm_id
if image_generation_config_id is not None:
body["image_generation_config_id"] = image_generation_config_id
if vision_llm_config_id is not None:

View file

@ -8,15 +8,13 @@ exactly three knobs (verified at
* ``processing_mode`` ``"basic"`` (default) | ``"premium"``
* ``use_vision_llm`` ``bool`` (run vision LLM during ingest to
extract image content / captions / tables)
* ``should_summarize`` ``bool`` (generate document summary)
This module gives every benchmark a uniform way to:
1. Receive sensible per-benchmark defaults (text-only benchmarks
default vision off; image-bearing benchmarks default vision on).
2. Accept CLI overrides (``--use-vision-llm`` / ``--no-vision-llm``,
``--processing-mode {basic,premium}``,
``--should-summarize`` / ``--no-summarize``).
``--processing-mode {basic,premium}``).
3. Persist the *actual* settings used into the doc-map manifest and
the run artifact so reports can show "vision=ON, mode=premium →
65% accuracy" head-to-head with "vision=OFF, mode=basic 52%".
@ -71,13 +69,11 @@ class IngestSettings:
use_vision_llm: bool = False
processing_mode: str = "basic"
should_summarize: bool = False
def to_dict(self) -> dict[str, Any]:
return {
"use_vision_llm": self.use_vision_llm,
"processing_mode": self.processing_mode,
"should_summarize": self.should_summarize,
}
@classmethod
@ -87,14 +83,13 @@ class IngestSettings:
``opts`` is the kwargs dict built by ``core.cli`` from the
argparse namespace (see ``_cmd_ingest`` / ``_cmd_run``). Keys
we look for: ``use_vision_llm`` (bool or None), ``processing_mode``
(str or None), ``should_summarize`` (bool or None). Anything
(str or None). Anything
else is ignored so benchmarks can pass through their own opts.
"""
return cls(
use_vision_llm=_coerce_bool(opts.get("use_vision_llm"), defaults.use_vision_llm),
processing_mode=_coerce_mode(opts.get("processing_mode"), defaults.processing_mode),
should_summarize=_coerce_bool(opts.get("should_summarize"), defaults.should_summarize),
)
def render_label(self) -> str:
@ -102,8 +97,7 @@ class IngestSettings:
return (
f"vision={'on' if self.use_vision_llm else 'off'}, "
f"mode={self.processing_mode}, "
f"summarize={'on' if self.should_summarize else 'off'}"
f"mode={self.processing_mode}"
)
@ -217,18 +211,6 @@ def add_ingest_settings_args(
f"Default for this benchmark: {defaults.processing_mode!r}."
),
)
_add_bool_pair(
settings_group,
dest="should_summarize",
on_flag="--should-summarize",
off_flag="--no-summarize",
on_help=(
"Have SurfSense generate a document summary at ingest "
f"(default for this benchmark: "
f"{'on' if defaults.should_summarize else 'off'})."
),
off_help="Skip per-document summary generation.",
)
# ---------------------------------------------------------------------------
@ -292,7 +274,6 @@ def format_ingest_settings_md(settings: Any) -> str:
return "- SurfSense ingest settings: (not recorded — re-ingest to capture)"
vision = "on" if settings.get("use_vision_llm") else "off"
mode = settings.get("processing_mode") or "basic"
summarize = "on" if settings.get("should_summarize") else "off"
return (
f"- SurfSense ingest settings: vision_llm=`{vision}`, "
f"processing_mode=`{mode}`, summarize=`{summarize}`"

View file

@ -160,8 +160,7 @@ async def run_ingest(
upload_result = await docs_client.upload(
files=[b.path for b in batches],
search_space_id=ctx.search_space_id,
should_summarize=settings.should_summarize,
use_vision_llm=settings.use_vision_llm,
use_vision_llm=settings.use_vision_llm,
processing_mode=settings.processing_mode,
)
new_doc_ids = list(upload_result.document_ids)

View file

@ -63,7 +63,6 @@ _DESCRIPTION = "CUREv1 retrieval (single-arm SurfSense): Recall@k / MRR / nDCG@1
_DEFAULT_INGEST_SETTINGS = IngestSettings(
use_vision_llm=False,
processing_mode="basic",
should_summarize=False,
)

View file

@ -208,7 +208,6 @@ async def _upload_pdfs(
result = await docs_client.upload(
files=batch,
search_space_id=ctx.search_space_id,
should_summarize=settings.should_summarize,
use_vision_llm=settings.use_vision_llm,
processing_mode=settings.processing_mode,
)

View file

@ -169,7 +169,6 @@ _DESCRIPTION = (
_DEFAULT_INGEST_SETTINGS = IngestSettings(
use_vision_llm=True,
processing_mode="basic",
should_summarize=False,
)

View file

@ -480,7 +480,6 @@ async def run_ingest(
upload_result = await docs_client.upload(
files=[b.path for b in batches],
search_space_id=ctx.search_space_id,
should_summarize=settings.should_summarize,
use_vision_llm=settings.use_vision_llm,
processing_mode=settings.processing_mode,
)

View file

@ -48,7 +48,6 @@ _DESCRIPTION = "MIRAGE (7,663 medical MCQs) — single-arm SurfSense per-task ac
_DEFAULT_INGEST_SETTINGS = IngestSettings(
use_vision_llm=False,
processing_mode="basic",
should_summarize=False,
)

View file

@ -225,7 +225,6 @@ async def _upload_pdfs(
result = await docs_client.upload(
files=batch,
search_space_id=ctx.search_space_id,
should_summarize=settings.should_summarize,
use_vision_llm=settings.use_vision_llm,
processing_mode=settings.processing_mode,
)

View file

@ -178,7 +178,6 @@ _TEXT_ONLY_HINTS = ("gpt-5.4-mini", "gpt-3.5", "text-only", "instruct-")
_DEFAULT_INGEST_SETTINGS = IngestSettings(
use_vision_llm=True,
processing_mode="basic",
should_summarize=False,
)

View file

@ -189,7 +189,6 @@ async def _upload_pages(
result = await docs_client.upload(
files=batch,
search_space_id=ctx.search_space_id,
should_summarize=settings.should_summarize,
use_vision_llm=settings.use_vision_llm,
processing_mode=settings.processing_mode,
)
@ -306,8 +305,7 @@ async def run_ingest(
settings = settings or IngestSettings(
use_vision_llm=False,
processing_mode="basic",
should_summarize=False,
)
)
bench_dir = ctx.benchmark_data_dir()
pages_dir = bench_dir / "pages"
raw_cache = bench_dir / ".raw_cache"

View file

@ -177,7 +177,6 @@ _DESCRIPTION = (
_DEFAULT_INGEST_SETTINGS = IngestSettings(
use_vision_llm=False,
processing_mode="basic",
should_summarize=False,
)

View file

@ -136,7 +136,6 @@ async def _upload_markdowns(
result = await docs_client.upload(
files=batch,
search_space_id=ctx.search_space_id,
should_summarize=settings.should_summarize,
use_vision_llm=settings.use_vision_llm,
processing_mode=settings.processing_mode,
)
@ -240,8 +239,7 @@ async def run_ingest(
settings = settings or IngestSettings(
use_vision_llm=False,
processing_mode="basic",
should_summarize=False,
)
)
bench_dir = ctx.benchmark_data_dir()
wiki_cache = bench_dir / "wiki"
wiki_cache.mkdir(parents=True, exist_ok=True)

View file

@ -153,7 +153,6 @@ _DESCRIPTION = (
_DEFAULT_INGEST_SETTINGS = IngestSettings(
use_vision_llm=False,
processing_mode="basic",
should_summarize=False,
)

View file

@ -69,7 +69,7 @@ async def test_set_llm_preferences_partial_update(respx_mock, http):
200,
json={
"agent_llm_id": -10042,
"document_summary_llm_id": None,
"agent_llm_id": None,
"image_generation_config_id": None,
"vision_llm_config_id": None,
"agent_llm": {

View file

@ -40,7 +40,7 @@ from surfsense_evals.core.ingest_settings import (
class TestMerge:
def test_silent_operator_uses_defaults(self) -> None:
defaults = IngestSettings(use_vision_llm=True, processing_mode="basic", should_summarize=True)
defaults = IngestSettings(use_vision_llm=True, processing_mode="basic")
merged = IngestSettings.merge(defaults, {})
assert merged == defaults
@ -111,16 +111,16 @@ class TestMerge:
assert merged.processing_mode == "basic"
def test_to_dict_round_trips(self) -> None:
s = IngestSettings(use_vision_llm=True, processing_mode="premium", should_summarize=False)
s = IngestSettings(use_vision_llm=True, processing_mode="premium")
d = s.to_dict()
assert d == {
"use_vision_llm": True,
"processing_mode": "premium",
"should_summarize": False,
"use_vision_llm": False,
}
def test_render_label_format(self) -> None:
s = IngestSettings(use_vision_llm=True, processing_mode="premium", should_summarize=True)
s = IngestSettings(use_vision_llm=True, processing_mode="premium")
assert s.render_label() == "vision=on, mode=premium, summarize=on"
@ -136,7 +136,7 @@ class TestAddArgs:
add_ingest_settings_args(
p,
defaults=IngestSettings(
use_vision_llm=False, processing_mode="basic", should_summarize=False
use_vision_llm=False, processing_mode="basic"
),
)
return p
@ -145,7 +145,7 @@ class TestAddArgs:
args = parser.parse_args([])
assert args.use_vision_llm is None
assert args.processing_mode is None
assert args.should_summarize is None
assert args.use_vision_llm is None
def test_use_vision_llm_flag(self, parser: argparse.ArgumentParser) -> None:
args = parser.parse_args(["--use-vision-llm"])
@ -168,9 +168,9 @@ class TestAddArgs:
def test_summarize_flag_pair(self, parser: argparse.ArgumentParser) -> None:
on = parser.parse_args(["--should-summarize"])
assert on.should_summarize is True
assert on.use_vision_llm is True
off = parser.parse_args(["--no-summarize"])
assert off.should_summarize is False
assert off.use_vision_llm is False
def test_vision_flags_mutually_exclusive(
self, parser: argparse.ArgumentParser
@ -185,11 +185,11 @@ class TestAddArgs:
["--use-vision-llm", "--processing-mode", "premium"]
)
defaults = IngestSettings(
use_vision_llm=False, processing_mode="basic", should_summarize=False
use_vision_llm=False, processing_mode="basic"
)
merged = IngestSettings.merge(defaults, vars(args))
assert merged == IngestSettings(
use_vision_llm=True, processing_mode="premium", should_summarize=False
use_vision_llm=True, processing_mode="premium"
)
@ -249,7 +249,7 @@ class TestHeader:
class TestFormatMd:
def test_full_settings(self) -> None:
out = format_ingest_settings_md(
{"use_vision_llm": True, "processing_mode": "premium", "should_summarize": True}
{"use_vision_llm": True, "processing_mode": "premium", "use_vision_llm": True}
)
assert "vision_llm=`on`" in out
assert "processing_mode=`premium`" in out
@ -257,7 +257,7 @@ class TestFormatMd:
def test_default_off(self) -> None:
out = format_ingest_settings_md(
{"use_vision_llm": False, "processing_mode": "basic", "should_summarize": False}
{"use_vision_llm": False, "processing_mode": "basic", "use_vision_llm": False}
)
assert "vision_llm=`off`" in out
assert "processing_mode=`basic`" in out