feat(evals): Remove summary ingest settings from evals

This commit is contained in:
Anish Sarkar 2026-06-04 00:53:18 +05:30
parent e427501482
commit dc6a17930b
17 changed files with 19 additions and 59 deletions

View file

@ -110,7 +110,6 @@ class DocumentsClient:
files: Iterable[Path], files: Iterable[Path],
*, *,
search_space_id: int, search_space_id: int,
should_summarize: bool = False,
use_vision_llm: bool = False, use_vision_llm: bool = False,
processing_mode: str = "basic", processing_mode: str = "basic",
) -> FileUploadResult: ) -> FileUploadResult:
@ -149,7 +148,6 @@ class DocumentsClient:
f"{self._base}/api/v1/documents/fileupload", f"{self._base}/api/v1/documents/fileupload",
data={ data={
"search_space_id": str(search_space_id), "search_space_id": str(search_space_id),
"should_summarize": "true" if should_summarize else "false",
"use_vision_llm": "true" if use_vision_llm else "false", "use_vision_llm": "true" if use_vision_llm else "false",
"processing_mode": processing_mode, "processing_mode": processing_mode,
}, },

View file

@ -83,7 +83,6 @@ class LlmPreferences:
""" """
agent_llm_id: int | None agent_llm_id: int | None
document_summary_llm_id: int | None
image_generation_config_id: int | None image_generation_config_id: int | None
vision_llm_config_id: int | None vision_llm_config_id: int | None
agent_llm: dict[str, Any] | None agent_llm: dict[str, Any] | None
@ -93,7 +92,6 @@ class LlmPreferences:
def from_payload(cls, payload: dict[str, Any]) -> LlmPreferences: def from_payload(cls, payload: dict[str, Any]) -> LlmPreferences:
return cls( return cls(
agent_llm_id=payload.get("agent_llm_id"), agent_llm_id=payload.get("agent_llm_id"),
document_summary_llm_id=payload.get("document_summary_llm_id"),
image_generation_config_id=payload.get("image_generation_config_id"), image_generation_config_id=payload.get("image_generation_config_id"),
vision_llm_config_id=payload.get("vision_llm_config_id"), vision_llm_config_id=payload.get("vision_llm_config_id"),
agent_llm=payload.get("agent_llm"), agent_llm=payload.get("agent_llm"),
@ -154,7 +152,6 @@ class SearchSpaceClient:
search_space_id: int, search_space_id: int,
*, *,
agent_llm_id: int | None = None, agent_llm_id: int | None = None,
document_summary_llm_id: int | None = None,
image_generation_config_id: int | None = None, image_generation_config_id: int | None = None,
vision_llm_config_id: int | None = None, vision_llm_config_id: int | None = None,
) -> LlmPreferences: ) -> LlmPreferences:
@ -167,8 +164,6 @@ class SearchSpaceClient:
body: dict[str, Any] = {} body: dict[str, Any] = {}
if agent_llm_id is not None: if agent_llm_id is not None:
body["agent_llm_id"] = agent_llm_id body["agent_llm_id"] = agent_llm_id
if document_summary_llm_id is not None:
body["document_summary_llm_id"] = document_summary_llm_id
if image_generation_config_id is not None: if image_generation_config_id is not None:
body["image_generation_config_id"] = image_generation_config_id body["image_generation_config_id"] = image_generation_config_id
if vision_llm_config_id is not None: if vision_llm_config_id is not None:

View file

@ -8,15 +8,13 @@ exactly three knobs (verified at
* ``processing_mode`` ``"basic"`` (default) | ``"premium"`` * ``processing_mode`` ``"basic"`` (default) | ``"premium"``
* ``use_vision_llm`` ``bool`` (run vision LLM during ingest to * ``use_vision_llm`` ``bool`` (run vision LLM during ingest to
extract image content / captions / tables) extract image content / captions / tables)
* ``should_summarize`` ``bool`` (generate document summary)
This module gives every benchmark a uniform way to: This module gives every benchmark a uniform way to:
1. Receive sensible per-benchmark defaults (text-only benchmarks 1. Receive sensible per-benchmark defaults (text-only benchmarks
default vision off; image-bearing benchmarks default vision on). default vision off; image-bearing benchmarks default vision on).
2. Accept CLI overrides (``--use-vision-llm`` / ``--no-vision-llm``, 2. Accept CLI overrides (``--use-vision-llm`` / ``--no-vision-llm``,
``--processing-mode {basic,premium}``, ``--processing-mode {basic,premium}``).
``--should-summarize`` / ``--no-summarize``).
3. Persist the *actual* settings used into the doc-map manifest and 3. Persist the *actual* settings used into the doc-map manifest and
the run artifact so reports can show "vision=ON, mode=premium → the run artifact so reports can show "vision=ON, mode=premium →
65% accuracy" head-to-head with "vision=OFF, mode=basic 52%". 65% accuracy" head-to-head with "vision=OFF, mode=basic 52%".
@ -71,13 +69,11 @@ class IngestSettings:
use_vision_llm: bool = False use_vision_llm: bool = False
processing_mode: str = "basic" processing_mode: str = "basic"
should_summarize: bool = False
def to_dict(self) -> dict[str, Any]: def to_dict(self) -> dict[str, Any]:
return { return {
"use_vision_llm": self.use_vision_llm, "use_vision_llm": self.use_vision_llm,
"processing_mode": self.processing_mode, "processing_mode": self.processing_mode,
"should_summarize": self.should_summarize,
} }
@classmethod @classmethod
@ -87,14 +83,13 @@ class IngestSettings:
``opts`` is the kwargs dict built by ``core.cli`` from the ``opts`` is the kwargs dict built by ``core.cli`` from the
argparse namespace (see ``_cmd_ingest`` / ``_cmd_run``). Keys argparse namespace (see ``_cmd_ingest`` / ``_cmd_run``). Keys
we look for: ``use_vision_llm`` (bool or None), ``processing_mode`` we look for: ``use_vision_llm`` (bool or None), ``processing_mode``
(str or None), ``should_summarize`` (bool or None). Anything (str or None). Anything
else is ignored so benchmarks can pass through their own opts. else is ignored so benchmarks can pass through their own opts.
""" """
return cls( return cls(
use_vision_llm=_coerce_bool(opts.get("use_vision_llm"), defaults.use_vision_llm), use_vision_llm=_coerce_bool(opts.get("use_vision_llm"), defaults.use_vision_llm),
processing_mode=_coerce_mode(opts.get("processing_mode"), defaults.processing_mode), processing_mode=_coerce_mode(opts.get("processing_mode"), defaults.processing_mode),
should_summarize=_coerce_bool(opts.get("should_summarize"), defaults.should_summarize),
) )
def render_label(self) -> str: def render_label(self) -> str:
@ -102,8 +97,7 @@ class IngestSettings:
return ( return (
f"vision={'on' if self.use_vision_llm else 'off'}, " f"vision={'on' if self.use_vision_llm else 'off'}, "
f"mode={self.processing_mode}, " f"mode={self.processing_mode}"
f"summarize={'on' if self.should_summarize else 'off'}"
) )
@ -217,18 +211,6 @@ def add_ingest_settings_args(
f"Default for this benchmark: {defaults.processing_mode!r}." f"Default for this benchmark: {defaults.processing_mode!r}."
), ),
) )
_add_bool_pair(
settings_group,
dest="should_summarize",
on_flag="--should-summarize",
off_flag="--no-summarize",
on_help=(
"Have SurfSense generate a document summary at ingest "
f"(default for this benchmark: "
f"{'on' if defaults.should_summarize else 'off'})."
),
off_help="Skip per-document summary generation.",
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -292,7 +274,6 @@ def format_ingest_settings_md(settings: Any) -> str:
return "- SurfSense ingest settings: (not recorded — re-ingest to capture)" return "- SurfSense ingest settings: (not recorded — re-ingest to capture)"
vision = "on" if settings.get("use_vision_llm") else "off" vision = "on" if settings.get("use_vision_llm") else "off"
mode = settings.get("processing_mode") or "basic" mode = settings.get("processing_mode") or "basic"
summarize = "on" if settings.get("should_summarize") else "off"
return ( return (
f"- SurfSense ingest settings: vision_llm=`{vision}`, " f"- SurfSense ingest settings: vision_llm=`{vision}`, "
f"processing_mode=`{mode}`, summarize=`{summarize}`" f"processing_mode=`{mode}`, summarize=`{summarize}`"

View file

@ -160,8 +160,7 @@ async def run_ingest(
upload_result = await docs_client.upload( upload_result = await docs_client.upload(
files=[b.path for b in batches], files=[b.path for b in batches],
search_space_id=ctx.search_space_id, search_space_id=ctx.search_space_id,
should_summarize=settings.should_summarize, use_vision_llm=settings.use_vision_llm,
use_vision_llm=settings.use_vision_llm,
processing_mode=settings.processing_mode, processing_mode=settings.processing_mode,
) )
new_doc_ids = list(upload_result.document_ids) new_doc_ids = list(upload_result.document_ids)

View file

@ -63,7 +63,6 @@ _DESCRIPTION = "CUREv1 retrieval (single-arm SurfSense): Recall@k / MRR / nDCG@1
_DEFAULT_INGEST_SETTINGS = IngestSettings( _DEFAULT_INGEST_SETTINGS = IngestSettings(
use_vision_llm=False, use_vision_llm=False,
processing_mode="basic", processing_mode="basic",
should_summarize=False,
) )

View file

@ -208,7 +208,6 @@ async def _upload_pdfs(
result = await docs_client.upload( result = await docs_client.upload(
files=batch, files=batch,
search_space_id=ctx.search_space_id, search_space_id=ctx.search_space_id,
should_summarize=settings.should_summarize,
use_vision_llm=settings.use_vision_llm, use_vision_llm=settings.use_vision_llm,
processing_mode=settings.processing_mode, processing_mode=settings.processing_mode,
) )

View file

@ -169,7 +169,6 @@ _DESCRIPTION = (
_DEFAULT_INGEST_SETTINGS = IngestSettings( _DEFAULT_INGEST_SETTINGS = IngestSettings(
use_vision_llm=True, use_vision_llm=True,
processing_mode="basic", processing_mode="basic",
should_summarize=False,
) )

View file

@ -480,7 +480,6 @@ async def run_ingest(
upload_result = await docs_client.upload( upload_result = await docs_client.upload(
files=[b.path for b in batches], files=[b.path for b in batches],
search_space_id=ctx.search_space_id, search_space_id=ctx.search_space_id,
should_summarize=settings.should_summarize,
use_vision_llm=settings.use_vision_llm, use_vision_llm=settings.use_vision_llm,
processing_mode=settings.processing_mode, processing_mode=settings.processing_mode,
) )

View file

@ -48,7 +48,6 @@ _DESCRIPTION = "MIRAGE (7,663 medical MCQs) — single-arm SurfSense per-task ac
_DEFAULT_INGEST_SETTINGS = IngestSettings( _DEFAULT_INGEST_SETTINGS = IngestSettings(
use_vision_llm=False, use_vision_llm=False,
processing_mode="basic", processing_mode="basic",
should_summarize=False,
) )

View file

@ -225,7 +225,6 @@ async def _upload_pdfs(
result = await docs_client.upload( result = await docs_client.upload(
files=batch, files=batch,
search_space_id=ctx.search_space_id, search_space_id=ctx.search_space_id,
should_summarize=settings.should_summarize,
use_vision_llm=settings.use_vision_llm, use_vision_llm=settings.use_vision_llm,
processing_mode=settings.processing_mode, processing_mode=settings.processing_mode,
) )

View file

@ -178,7 +178,6 @@ _TEXT_ONLY_HINTS = ("gpt-5.4-mini", "gpt-3.5", "text-only", "instruct-")
_DEFAULT_INGEST_SETTINGS = IngestSettings( _DEFAULT_INGEST_SETTINGS = IngestSettings(
use_vision_llm=True, use_vision_llm=True,
processing_mode="basic", processing_mode="basic",
should_summarize=False,
) )

View file

@ -189,7 +189,6 @@ async def _upload_pages(
result = await docs_client.upload( result = await docs_client.upload(
files=batch, files=batch,
search_space_id=ctx.search_space_id, search_space_id=ctx.search_space_id,
should_summarize=settings.should_summarize,
use_vision_llm=settings.use_vision_llm, use_vision_llm=settings.use_vision_llm,
processing_mode=settings.processing_mode, processing_mode=settings.processing_mode,
) )
@ -306,8 +305,7 @@ async def run_ingest(
settings = settings or IngestSettings( settings = settings or IngestSettings(
use_vision_llm=False, use_vision_llm=False,
processing_mode="basic", processing_mode="basic",
should_summarize=False, )
)
bench_dir = ctx.benchmark_data_dir() bench_dir = ctx.benchmark_data_dir()
pages_dir = bench_dir / "pages" pages_dir = bench_dir / "pages"
raw_cache = bench_dir / ".raw_cache" raw_cache = bench_dir / ".raw_cache"

View file

@ -177,7 +177,6 @@ _DESCRIPTION = (
_DEFAULT_INGEST_SETTINGS = IngestSettings( _DEFAULT_INGEST_SETTINGS = IngestSettings(
use_vision_llm=False, use_vision_llm=False,
processing_mode="basic", processing_mode="basic",
should_summarize=False,
) )

View file

@ -136,7 +136,6 @@ async def _upload_markdowns(
result = await docs_client.upload( result = await docs_client.upload(
files=batch, files=batch,
search_space_id=ctx.search_space_id, search_space_id=ctx.search_space_id,
should_summarize=settings.should_summarize,
use_vision_llm=settings.use_vision_llm, use_vision_llm=settings.use_vision_llm,
processing_mode=settings.processing_mode, processing_mode=settings.processing_mode,
) )
@ -240,8 +239,7 @@ async def run_ingest(
settings = settings or IngestSettings( settings = settings or IngestSettings(
use_vision_llm=False, use_vision_llm=False,
processing_mode="basic", processing_mode="basic",
should_summarize=False, )
)
bench_dir = ctx.benchmark_data_dir() bench_dir = ctx.benchmark_data_dir()
wiki_cache = bench_dir / "wiki" wiki_cache = bench_dir / "wiki"
wiki_cache.mkdir(parents=True, exist_ok=True) wiki_cache.mkdir(parents=True, exist_ok=True)

View file

@ -153,7 +153,6 @@ _DESCRIPTION = (
_DEFAULT_INGEST_SETTINGS = IngestSettings( _DEFAULT_INGEST_SETTINGS = IngestSettings(
use_vision_llm=False, use_vision_llm=False,
processing_mode="basic", processing_mode="basic",
should_summarize=False,
) )

View file

@ -69,7 +69,7 @@ async def test_set_llm_preferences_partial_update(respx_mock, http):
200, 200,
json={ json={
"agent_llm_id": -10042, "agent_llm_id": -10042,
"document_summary_llm_id": None, "agent_llm_id": None,
"image_generation_config_id": None, "image_generation_config_id": None,
"vision_llm_config_id": None, "vision_llm_config_id": None,
"agent_llm": { "agent_llm": {

View file

@ -40,7 +40,7 @@ from surfsense_evals.core.ingest_settings import (
class TestMerge: class TestMerge:
def test_silent_operator_uses_defaults(self) -> None: def test_silent_operator_uses_defaults(self) -> None:
defaults = IngestSettings(use_vision_llm=True, processing_mode="basic", should_summarize=True) defaults = IngestSettings(use_vision_llm=True, processing_mode="basic")
merged = IngestSettings.merge(defaults, {}) merged = IngestSettings.merge(defaults, {})
assert merged == defaults assert merged == defaults
@ -111,16 +111,16 @@ class TestMerge:
assert merged.processing_mode == "basic" assert merged.processing_mode == "basic"
def test_to_dict_round_trips(self) -> None: def test_to_dict_round_trips(self) -> None:
s = IngestSettings(use_vision_llm=True, processing_mode="premium", should_summarize=False) s = IngestSettings(use_vision_llm=True, processing_mode="premium")
d = s.to_dict() d = s.to_dict()
assert d == { assert d == {
"use_vision_llm": True, "use_vision_llm": True,
"processing_mode": "premium", "processing_mode": "premium",
"should_summarize": False, "use_vision_llm": False,
} }
def test_render_label_format(self) -> None: def test_render_label_format(self) -> None:
s = IngestSettings(use_vision_llm=True, processing_mode="premium", should_summarize=True) s = IngestSettings(use_vision_llm=True, processing_mode="premium")
assert s.render_label() == "vision=on, mode=premium, summarize=on" assert s.render_label() == "vision=on, mode=premium, summarize=on"
@ -136,7 +136,7 @@ class TestAddArgs:
add_ingest_settings_args( add_ingest_settings_args(
p, p,
defaults=IngestSettings( defaults=IngestSettings(
use_vision_llm=False, processing_mode="basic", should_summarize=False use_vision_llm=False, processing_mode="basic"
), ),
) )
return p return p
@ -145,7 +145,7 @@ class TestAddArgs:
args = parser.parse_args([]) args = parser.parse_args([])
assert args.use_vision_llm is None assert args.use_vision_llm is None
assert args.processing_mode is None assert args.processing_mode is None
assert args.should_summarize is None assert args.use_vision_llm is None
def test_use_vision_llm_flag(self, parser: argparse.ArgumentParser) -> None: def test_use_vision_llm_flag(self, parser: argparse.ArgumentParser) -> None:
args = parser.parse_args(["--use-vision-llm"]) args = parser.parse_args(["--use-vision-llm"])
@ -168,9 +168,9 @@ class TestAddArgs:
def test_summarize_flag_pair(self, parser: argparse.ArgumentParser) -> None: def test_summarize_flag_pair(self, parser: argparse.ArgumentParser) -> None:
on = parser.parse_args(["--should-summarize"]) on = parser.parse_args(["--should-summarize"])
assert on.should_summarize is True assert on.use_vision_llm is True
off = parser.parse_args(["--no-summarize"]) off = parser.parse_args(["--no-summarize"])
assert off.should_summarize is False assert off.use_vision_llm is False
def test_vision_flags_mutually_exclusive( def test_vision_flags_mutually_exclusive(
self, parser: argparse.ArgumentParser self, parser: argparse.ArgumentParser
@ -185,11 +185,11 @@ class TestAddArgs:
["--use-vision-llm", "--processing-mode", "premium"] ["--use-vision-llm", "--processing-mode", "premium"]
) )
defaults = IngestSettings( defaults = IngestSettings(
use_vision_llm=False, processing_mode="basic", should_summarize=False use_vision_llm=False, processing_mode="basic"
) )
merged = IngestSettings.merge(defaults, vars(args)) merged = IngestSettings.merge(defaults, vars(args))
assert merged == IngestSettings( assert merged == IngestSettings(
use_vision_llm=True, processing_mode="premium", should_summarize=False use_vision_llm=True, processing_mode="premium"
) )
@ -249,7 +249,7 @@ class TestHeader:
class TestFormatMd: class TestFormatMd:
def test_full_settings(self) -> None: def test_full_settings(self) -> None:
out = format_ingest_settings_md( out = format_ingest_settings_md(
{"use_vision_llm": True, "processing_mode": "premium", "should_summarize": True} {"use_vision_llm": True, "processing_mode": "premium", "use_vision_llm": True}
) )
assert "vision_llm=`on`" in out assert "vision_llm=`on`" in out
assert "processing_mode=`premium`" in out assert "processing_mode=`premium`" in out
@ -257,7 +257,7 @@ class TestFormatMd:
def test_default_off(self) -> None: def test_default_off(self) -> None:
out = format_ingest_settings_md( out = format_ingest_settings_md(
{"use_vision_llm": False, "processing_mode": "basic", "should_summarize": False} {"use_vision_llm": False, "processing_mode": "basic", "use_vision_llm": False}
) )
assert "vision_llm=`off`" in out assert "vision_llm=`off`" in out
assert "processing_mode=`basic`" in out assert "processing_mode=`basic`" in out