mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
feat(evals): Remove summary ingest settings from evals
This commit is contained in:
parent
e427501482
commit
dc6a17930b
17 changed files with 19 additions and 59 deletions
|
|
@ -110,7 +110,6 @@ class DocumentsClient:
|
|||
files: Iterable[Path],
|
||||
*,
|
||||
search_space_id: int,
|
||||
should_summarize: bool = False,
|
||||
use_vision_llm: bool = False,
|
||||
processing_mode: str = "basic",
|
||||
) -> FileUploadResult:
|
||||
|
|
@ -149,7 +148,6 @@ class DocumentsClient:
|
|||
f"{self._base}/api/v1/documents/fileupload",
|
||||
data={
|
||||
"search_space_id": str(search_space_id),
|
||||
"should_summarize": "true" if should_summarize else "false",
|
||||
"use_vision_llm": "true" if use_vision_llm else "false",
|
||||
"processing_mode": processing_mode,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -83,7 +83,6 @@ class LlmPreferences:
|
|||
"""
|
||||
|
||||
agent_llm_id: int | None
|
||||
document_summary_llm_id: int | None
|
||||
image_generation_config_id: int | None
|
||||
vision_llm_config_id: int | None
|
||||
agent_llm: dict[str, Any] | None
|
||||
|
|
@ -93,7 +92,6 @@ class LlmPreferences:
|
|||
def from_payload(cls, payload: dict[str, Any]) -> LlmPreferences:
|
||||
return cls(
|
||||
agent_llm_id=payload.get("agent_llm_id"),
|
||||
document_summary_llm_id=payload.get("document_summary_llm_id"),
|
||||
image_generation_config_id=payload.get("image_generation_config_id"),
|
||||
vision_llm_config_id=payload.get("vision_llm_config_id"),
|
||||
agent_llm=payload.get("agent_llm"),
|
||||
|
|
@ -154,7 +152,6 @@ class SearchSpaceClient:
|
|||
search_space_id: int,
|
||||
*,
|
||||
agent_llm_id: int | None = None,
|
||||
document_summary_llm_id: int | None = None,
|
||||
image_generation_config_id: int | None = None,
|
||||
vision_llm_config_id: int | None = None,
|
||||
) -> LlmPreferences:
|
||||
|
|
@ -167,8 +164,6 @@ class SearchSpaceClient:
|
|||
body: dict[str, Any] = {}
|
||||
if agent_llm_id is not None:
|
||||
body["agent_llm_id"] = agent_llm_id
|
||||
if document_summary_llm_id is not None:
|
||||
body["document_summary_llm_id"] = document_summary_llm_id
|
||||
if image_generation_config_id is not None:
|
||||
body["image_generation_config_id"] = image_generation_config_id
|
||||
if vision_llm_config_id is not None:
|
||||
|
|
|
|||
|
|
@ -8,15 +8,13 @@ exactly three knobs (verified at
|
|||
* ``processing_mode`` — ``"basic"`` (default) | ``"premium"``
|
||||
* ``use_vision_llm`` — ``bool`` (run vision LLM during ingest to
|
||||
extract image content / captions / tables)
|
||||
* ``should_summarize`` — ``bool`` (generate document summary)
|
||||
|
||||
This module gives every benchmark a uniform way to:
|
||||
|
||||
1. Receive sensible per-benchmark defaults (text-only benchmarks
|
||||
default vision off; image-bearing benchmarks default vision on).
|
||||
2. Accept CLI overrides (``--use-vision-llm`` / ``--no-vision-llm``,
|
||||
``--processing-mode {basic,premium}``,
|
||||
``--should-summarize`` / ``--no-summarize``).
|
||||
``--processing-mode {basic,premium}``).
|
||||
3. Persist the *actual* settings used into the doc-map manifest and
|
||||
the run artifact so reports can show "vision=ON, mode=premium →
|
||||
65% accuracy" head-to-head with "vision=OFF, mode=basic → 52%".
|
||||
|
|
@ -71,13 +69,11 @@ class IngestSettings:
|
|||
|
||||
use_vision_llm: bool = False
|
||||
processing_mode: str = "basic"
|
||||
should_summarize: bool = False
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"use_vision_llm": self.use_vision_llm,
|
||||
"processing_mode": self.processing_mode,
|
||||
"should_summarize": self.should_summarize,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
|
@ -87,14 +83,13 @@ class IngestSettings:
|
|||
``opts`` is the kwargs dict built by ``core.cli`` from the
|
||||
argparse namespace (see ``_cmd_ingest`` / ``_cmd_run``). Keys
|
||||
we look for: ``use_vision_llm`` (bool or None), ``processing_mode``
|
||||
(str or None), ``should_summarize`` (bool or None). Anything
|
||||
(str or None). Anything
|
||||
else is ignored so benchmarks can pass through their own opts.
|
||||
"""
|
||||
|
||||
return cls(
|
||||
use_vision_llm=_coerce_bool(opts.get("use_vision_llm"), defaults.use_vision_llm),
|
||||
processing_mode=_coerce_mode(opts.get("processing_mode"), defaults.processing_mode),
|
||||
should_summarize=_coerce_bool(opts.get("should_summarize"), defaults.should_summarize),
|
||||
)
|
||||
|
||||
def render_label(self) -> str:
|
||||
|
|
@ -102,8 +97,7 @@ class IngestSettings:
|
|||
|
||||
return (
|
||||
f"vision={'on' if self.use_vision_llm else 'off'}, "
|
||||
f"mode={self.processing_mode}, "
|
||||
f"summarize={'on' if self.should_summarize else 'off'}"
|
||||
f"mode={self.processing_mode}"
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -217,18 +211,6 @@ def add_ingest_settings_args(
|
|||
f"Default for this benchmark: {defaults.processing_mode!r}."
|
||||
),
|
||||
)
|
||||
_add_bool_pair(
|
||||
settings_group,
|
||||
dest="should_summarize",
|
||||
on_flag="--should-summarize",
|
||||
off_flag="--no-summarize",
|
||||
on_help=(
|
||||
"Have SurfSense generate a document summary at ingest "
|
||||
f"(default for this benchmark: "
|
||||
f"{'on' if defaults.should_summarize else 'off'})."
|
||||
),
|
||||
off_help="Skip per-document summary generation.",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -292,7 +274,6 @@ def format_ingest_settings_md(settings: Any) -> str:
|
|||
return "- SurfSense ingest settings: (not recorded — re-ingest to capture)"
|
||||
vision = "on" if settings.get("use_vision_llm") else "off"
|
||||
mode = settings.get("processing_mode") or "basic"
|
||||
summarize = "on" if settings.get("should_summarize") else "off"
|
||||
return (
|
||||
f"- SurfSense ingest settings: vision_llm=`{vision}`, "
|
||||
f"processing_mode=`{mode}`, summarize=`{summarize}`"
|
||||
|
|
|
|||
|
|
@ -160,8 +160,7 @@ async def run_ingest(
|
|||
upload_result = await docs_client.upload(
|
||||
files=[b.path for b in batches],
|
||||
search_space_id=ctx.search_space_id,
|
||||
should_summarize=settings.should_summarize,
|
||||
use_vision_llm=settings.use_vision_llm,
|
||||
use_vision_llm=settings.use_vision_llm,
|
||||
processing_mode=settings.processing_mode,
|
||||
)
|
||||
new_doc_ids = list(upload_result.document_ids)
|
||||
|
|
|
|||
|
|
@ -63,7 +63,6 @@ _DESCRIPTION = "CUREv1 retrieval (single-arm SurfSense): Recall@k / MRR / nDCG@1
|
|||
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
||||
use_vision_llm=False,
|
||||
processing_mode="basic",
|
||||
should_summarize=False,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -208,7 +208,6 @@ async def _upload_pdfs(
|
|||
result = await docs_client.upload(
|
||||
files=batch,
|
||||
search_space_id=ctx.search_space_id,
|
||||
should_summarize=settings.should_summarize,
|
||||
use_vision_llm=settings.use_vision_llm,
|
||||
processing_mode=settings.processing_mode,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -169,7 +169,6 @@ _DESCRIPTION = (
|
|||
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
||||
use_vision_llm=True,
|
||||
processing_mode="basic",
|
||||
should_summarize=False,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -480,7 +480,6 @@ async def run_ingest(
|
|||
upload_result = await docs_client.upload(
|
||||
files=[b.path for b in batches],
|
||||
search_space_id=ctx.search_space_id,
|
||||
should_summarize=settings.should_summarize,
|
||||
use_vision_llm=settings.use_vision_llm,
|
||||
processing_mode=settings.processing_mode,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -48,7 +48,6 @@ _DESCRIPTION = "MIRAGE (7,663 medical MCQs) — single-arm SurfSense per-task ac
|
|||
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
||||
use_vision_llm=False,
|
||||
processing_mode="basic",
|
||||
should_summarize=False,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -225,7 +225,6 @@ async def _upload_pdfs(
|
|||
result = await docs_client.upload(
|
||||
files=batch,
|
||||
search_space_id=ctx.search_space_id,
|
||||
should_summarize=settings.should_summarize,
|
||||
use_vision_llm=settings.use_vision_llm,
|
||||
processing_mode=settings.processing_mode,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -178,7 +178,6 @@ _TEXT_ONLY_HINTS = ("gpt-5.4-mini", "gpt-3.5", "text-only", "instruct-")
|
|||
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
||||
use_vision_llm=True,
|
||||
processing_mode="basic",
|
||||
should_summarize=False,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -189,7 +189,6 @@ async def _upload_pages(
|
|||
result = await docs_client.upload(
|
||||
files=batch,
|
||||
search_space_id=ctx.search_space_id,
|
||||
should_summarize=settings.should_summarize,
|
||||
use_vision_llm=settings.use_vision_llm,
|
||||
processing_mode=settings.processing_mode,
|
||||
)
|
||||
|
|
@ -306,8 +305,7 @@ async def run_ingest(
|
|||
settings = settings or IngestSettings(
|
||||
use_vision_llm=False,
|
||||
processing_mode="basic",
|
||||
should_summarize=False,
|
||||
)
|
||||
)
|
||||
bench_dir = ctx.benchmark_data_dir()
|
||||
pages_dir = bench_dir / "pages"
|
||||
raw_cache = bench_dir / ".raw_cache"
|
||||
|
|
|
|||
|
|
@ -177,7 +177,6 @@ _DESCRIPTION = (
|
|||
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
||||
use_vision_llm=False,
|
||||
processing_mode="basic",
|
||||
should_summarize=False,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -136,7 +136,6 @@ async def _upload_markdowns(
|
|||
result = await docs_client.upload(
|
||||
files=batch,
|
||||
search_space_id=ctx.search_space_id,
|
||||
should_summarize=settings.should_summarize,
|
||||
use_vision_llm=settings.use_vision_llm,
|
||||
processing_mode=settings.processing_mode,
|
||||
)
|
||||
|
|
@ -240,8 +239,7 @@ async def run_ingest(
|
|||
settings = settings or IngestSettings(
|
||||
use_vision_llm=False,
|
||||
processing_mode="basic",
|
||||
should_summarize=False,
|
||||
)
|
||||
)
|
||||
bench_dir = ctx.benchmark_data_dir()
|
||||
wiki_cache = bench_dir / "wiki"
|
||||
wiki_cache.mkdir(parents=True, exist_ok=True)
|
||||
|
|
|
|||
|
|
@ -153,7 +153,6 @@ _DESCRIPTION = (
|
|||
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
||||
use_vision_llm=False,
|
||||
processing_mode="basic",
|
||||
should_summarize=False,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ async def test_set_llm_preferences_partial_update(respx_mock, http):
|
|||
200,
|
||||
json={
|
||||
"agent_llm_id": -10042,
|
||||
"document_summary_llm_id": None,
|
||||
"agent_llm_id": None,
|
||||
"image_generation_config_id": None,
|
||||
"vision_llm_config_id": None,
|
||||
"agent_llm": {
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ from surfsense_evals.core.ingest_settings import (
|
|||
|
||||
class TestMerge:
|
||||
def test_silent_operator_uses_defaults(self) -> None:
|
||||
defaults = IngestSettings(use_vision_llm=True, processing_mode="basic", should_summarize=True)
|
||||
defaults = IngestSettings(use_vision_llm=True, processing_mode="basic")
|
||||
merged = IngestSettings.merge(defaults, {})
|
||||
assert merged == defaults
|
||||
|
||||
|
|
@ -111,16 +111,16 @@ class TestMerge:
|
|||
assert merged.processing_mode == "basic"
|
||||
|
||||
def test_to_dict_round_trips(self) -> None:
|
||||
s = IngestSettings(use_vision_llm=True, processing_mode="premium", should_summarize=False)
|
||||
s = IngestSettings(use_vision_llm=True, processing_mode="premium")
|
||||
d = s.to_dict()
|
||||
assert d == {
|
||||
"use_vision_llm": True,
|
||||
"processing_mode": "premium",
|
||||
"should_summarize": False,
|
||||
"use_vision_llm": False,
|
||||
}
|
||||
|
||||
def test_render_label_format(self) -> None:
|
||||
s = IngestSettings(use_vision_llm=True, processing_mode="premium", should_summarize=True)
|
||||
s = IngestSettings(use_vision_llm=True, processing_mode="premium")
|
||||
assert s.render_label() == "vision=on, mode=premium, summarize=on"
|
||||
|
||||
|
||||
|
|
@ -136,7 +136,7 @@ class TestAddArgs:
|
|||
add_ingest_settings_args(
|
||||
p,
|
||||
defaults=IngestSettings(
|
||||
use_vision_llm=False, processing_mode="basic", should_summarize=False
|
||||
use_vision_llm=False, processing_mode="basic"
|
||||
),
|
||||
)
|
||||
return p
|
||||
|
|
@ -145,7 +145,7 @@ class TestAddArgs:
|
|||
args = parser.parse_args([])
|
||||
assert args.use_vision_llm is None
|
||||
assert args.processing_mode is None
|
||||
assert args.should_summarize is None
|
||||
assert args.use_vision_llm is None
|
||||
|
||||
def test_use_vision_llm_flag(self, parser: argparse.ArgumentParser) -> None:
|
||||
args = parser.parse_args(["--use-vision-llm"])
|
||||
|
|
@ -168,9 +168,9 @@ class TestAddArgs:
|
|||
|
||||
def test_summarize_flag_pair(self, parser: argparse.ArgumentParser) -> None:
|
||||
on = parser.parse_args(["--should-summarize"])
|
||||
assert on.should_summarize is True
|
||||
assert on.use_vision_llm is True
|
||||
off = parser.parse_args(["--no-summarize"])
|
||||
assert off.should_summarize is False
|
||||
assert off.use_vision_llm is False
|
||||
|
||||
def test_vision_flags_mutually_exclusive(
|
||||
self, parser: argparse.ArgumentParser
|
||||
|
|
@ -185,11 +185,11 @@ class TestAddArgs:
|
|||
["--use-vision-llm", "--processing-mode", "premium"]
|
||||
)
|
||||
defaults = IngestSettings(
|
||||
use_vision_llm=False, processing_mode="basic", should_summarize=False
|
||||
use_vision_llm=False, processing_mode="basic"
|
||||
)
|
||||
merged = IngestSettings.merge(defaults, vars(args))
|
||||
assert merged == IngestSettings(
|
||||
use_vision_llm=True, processing_mode="premium", should_summarize=False
|
||||
use_vision_llm=True, processing_mode="premium"
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -249,7 +249,7 @@ class TestHeader:
|
|||
class TestFormatMd:
|
||||
def test_full_settings(self) -> None:
|
||||
out = format_ingest_settings_md(
|
||||
{"use_vision_llm": True, "processing_mode": "premium", "should_summarize": True}
|
||||
{"use_vision_llm": True, "processing_mode": "premium", "use_vision_llm": True}
|
||||
)
|
||||
assert "vision_llm=`on`" in out
|
||||
assert "processing_mode=`premium`" in out
|
||||
|
|
@ -257,7 +257,7 @@ class TestFormatMd:
|
|||
|
||||
def test_default_off(self) -> None:
|
||||
out = format_ingest_settings_md(
|
||||
{"use_vision_llm": False, "processing_mode": "basic", "should_summarize": False}
|
||||
{"use_vision_llm": False, "processing_mode": "basic", "use_vision_llm": False}
|
||||
)
|
||||
assert "vision_llm=`off`" in out
|
||||
assert "processing_mode=`basic`" in out
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue