refactor(tests): Update tests to remove summary references and adjust for embedding errors

This commit is contained in:
Anish Sarkar 2026-06-04 01:51:21 +05:30
parent e4d7b01b09
commit e588782a9b
17 changed files with 69 additions and 148 deletions

View file

@ -137,15 +137,14 @@ Notes:
- `--skip-unanswerable` (run) — drop unanswerable questions
- `--docs <a.pdf>,<b.pdf>` (run) — scope to specific docs
## Ingestion knobs (vision LLM, processing mode, summarize)
## Ingestion knobs (vision LLM, processing mode)
The harness exposes `POST /api/v1/documents/fileupload`'s three knobs on every `ingest` subcommand:
The harness exposes `POST /api/v1/documents/fileupload`'s ingest knobs on every `ingest` subcommand:
| Flag pair | Effect |
|--------------------------------------------|-----------------------------------------------------------------------------------------|
| `--use-vision-llm` / `--no-vision-llm` | Walk every embedded image in the PDF and inline image-derived text at the image's position (see below). |
| `--processing-mode {basic,premium}` | `premium` carries a 10× page multiplier and routes to a stronger ETL (e.g. LlamaCloud). |
| `--should-summarize` / `--no-summarize` | Generate a per-document summary at ingest. |
The "Default ingest" column in the benchmarks table is what runs if you don't pass any flag. Whatever was actually used is recorded as a `__settings__` header in the doc map (`data/<suite>/maps/<benchmark>_*_map.jsonl`) and as `extra.ingest_settings` in `run_artifact.json`, then surfaced in the report — no need to hunt through CLI history.

View file

@ -173,14 +173,14 @@ def add_ingest_settings_args(
*,
defaults: IngestSettings,
) -> None:
"""Attach the three ingest-settings flag pairs to ``parser``.
"""Attach ingest-settings flags to ``parser``.
Each bool exposes a mutually exclusive ``--foo`` / ``--no-foo``
pair so an operator can flip either direction without restating
every flag. Default is ``None`` so that "operator didn't pass the
flag" is distinguishable from "operator explicitly passed false"
``IngestSettings.merge`` then folds in the benchmark default
only when the operator was silent.
The vision bool exposes a mutually exclusive ``--foo`` / ``--no-foo``
pair so an operator can flip either direction without restating every
flag. Default is ``None`` so that "operator didn't pass the flag" is
distinguishable from "operator explicitly passed false"
``IngestSettings.merge`` then folds in the benchmark default only when
the operator was silent.
"""
settings_group = parser.add_argument_group(
@ -276,7 +276,7 @@ def format_ingest_settings_md(settings: Any) -> str:
mode = settings.get("processing_mode") or "basic"
return (
f"- SurfSense ingest settings: vision_llm=`{vision}`, "
f"processing_mode=`{mode}`, summarize=`{summarize}`"
f"processing_mode=`{mode}`"
)

View file

@ -4,7 +4,7 @@ Covers:
* ``IngestSettings.merge`` honours operator overrides and falls back
to per-benchmark defaults when the operator is silent.
* ``add_ingest_settings_args`` exposes the three flag pairs and
* ``add_ingest_settings_args`` exposes ingest settings flags and
argparse defaults of ``None`` correctly distinguish "not passed"
from "explicitly false".
* ``settings_header_line`` / ``read_settings_header`` round-trip
@ -116,12 +116,11 @@ class TestMerge:
assert d == {
"use_vision_llm": True,
"processing_mode": "premium",
"use_vision_llm": False,
}
def test_render_label_format(self) -> None:
s = IngestSettings(use_vision_llm=True, processing_mode="premium")
assert s.render_label() == "vision=on, mode=premium, summarize=on"
assert s.render_label() == "vision=on, mode=premium"
# ---------------------------------------------------------------------------
@ -145,7 +144,6 @@ class TestAddArgs:
args = parser.parse_args([])
assert args.use_vision_llm is None
assert args.processing_mode is None
assert args.use_vision_llm is None
def test_use_vision_llm_flag(self, parser: argparse.ArgumentParser) -> None:
args = parser.parse_args(["--use-vision-llm"])
@ -166,12 +164,6 @@ class TestAddArgs:
with pytest.raises(SystemExit):
parser.parse_args(["--processing-mode", "exotic"])
def test_summarize_flag_pair(self, parser: argparse.ArgumentParser) -> None:
on = parser.parse_args(["--should-summarize"])
assert on.use_vision_llm is True
off = parser.parse_args(["--no-summarize"])
assert off.use_vision_llm is False
def test_vision_flags_mutually_exclusive(
self, parser: argparse.ArgumentParser
) -> None:
@ -249,19 +241,17 @@ class TestHeader:
class TestFormatMd:
def test_full_settings(self) -> None:
out = format_ingest_settings_md(
{"use_vision_llm": True, "processing_mode": "premium", "use_vision_llm": True}
{"use_vision_llm": True, "processing_mode": "premium"}
)
assert "vision_llm=`on`" in out
assert "processing_mode=`premium`" in out
assert "summarize=`on`" in out
def test_default_off(self) -> None:
out = format_ingest_settings_md(
{"use_vision_llm": False, "processing_mode": "basic", "use_vision_llm": False}
{"use_vision_llm": False, "processing_mode": "basic"}
)
assert "vision_llm=`off`" in out
assert "processing_mode=`basic`" in out
assert "summarize=`off`" in out
def test_missing_returns_re_ingest_hint(self) -> None:
# Empty dict + None + non-mapping should all degrade gracefully.