mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
feat(evals): Remove summary ingest settings from evals
This commit is contained in:
parent
e427501482
commit
dc6a17930b
17 changed files with 19 additions and 59 deletions
|
|
@ -110,7 +110,6 @@ class DocumentsClient:
|
||||||
files: Iterable[Path],
|
files: Iterable[Path],
|
||||||
*,
|
*,
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
should_summarize: bool = False,
|
|
||||||
use_vision_llm: bool = False,
|
use_vision_llm: bool = False,
|
||||||
processing_mode: str = "basic",
|
processing_mode: str = "basic",
|
||||||
) -> FileUploadResult:
|
) -> FileUploadResult:
|
||||||
|
|
@ -149,7 +148,6 @@ class DocumentsClient:
|
||||||
f"{self._base}/api/v1/documents/fileupload",
|
f"{self._base}/api/v1/documents/fileupload",
|
||||||
data={
|
data={
|
||||||
"search_space_id": str(search_space_id),
|
"search_space_id": str(search_space_id),
|
||||||
"should_summarize": "true" if should_summarize else "false",
|
|
||||||
"use_vision_llm": "true" if use_vision_llm else "false",
|
"use_vision_llm": "true" if use_vision_llm else "false",
|
||||||
"processing_mode": processing_mode,
|
"processing_mode": processing_mode,
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -83,7 +83,6 @@ class LlmPreferences:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
agent_llm_id: int | None
|
agent_llm_id: int | None
|
||||||
document_summary_llm_id: int | None
|
|
||||||
image_generation_config_id: int | None
|
image_generation_config_id: int | None
|
||||||
vision_llm_config_id: int | None
|
vision_llm_config_id: int | None
|
||||||
agent_llm: dict[str, Any] | None
|
agent_llm: dict[str, Any] | None
|
||||||
|
|
@ -93,7 +92,6 @@ class LlmPreferences:
|
||||||
def from_payload(cls, payload: dict[str, Any]) -> LlmPreferences:
|
def from_payload(cls, payload: dict[str, Any]) -> LlmPreferences:
|
||||||
return cls(
|
return cls(
|
||||||
agent_llm_id=payload.get("agent_llm_id"),
|
agent_llm_id=payload.get("agent_llm_id"),
|
||||||
document_summary_llm_id=payload.get("document_summary_llm_id"),
|
|
||||||
image_generation_config_id=payload.get("image_generation_config_id"),
|
image_generation_config_id=payload.get("image_generation_config_id"),
|
||||||
vision_llm_config_id=payload.get("vision_llm_config_id"),
|
vision_llm_config_id=payload.get("vision_llm_config_id"),
|
||||||
agent_llm=payload.get("agent_llm"),
|
agent_llm=payload.get("agent_llm"),
|
||||||
|
|
@ -154,7 +152,6 @@ class SearchSpaceClient:
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
*,
|
*,
|
||||||
agent_llm_id: int | None = None,
|
agent_llm_id: int | None = None,
|
||||||
document_summary_llm_id: int | None = None,
|
|
||||||
image_generation_config_id: int | None = None,
|
image_generation_config_id: int | None = None,
|
||||||
vision_llm_config_id: int | None = None,
|
vision_llm_config_id: int | None = None,
|
||||||
) -> LlmPreferences:
|
) -> LlmPreferences:
|
||||||
|
|
@ -167,8 +164,6 @@ class SearchSpaceClient:
|
||||||
body: dict[str, Any] = {}
|
body: dict[str, Any] = {}
|
||||||
if agent_llm_id is not None:
|
if agent_llm_id is not None:
|
||||||
body["agent_llm_id"] = agent_llm_id
|
body["agent_llm_id"] = agent_llm_id
|
||||||
if document_summary_llm_id is not None:
|
|
||||||
body["document_summary_llm_id"] = document_summary_llm_id
|
|
||||||
if image_generation_config_id is not None:
|
if image_generation_config_id is not None:
|
||||||
body["image_generation_config_id"] = image_generation_config_id
|
body["image_generation_config_id"] = image_generation_config_id
|
||||||
if vision_llm_config_id is not None:
|
if vision_llm_config_id is not None:
|
||||||
|
|
|
||||||
|
|
@ -8,15 +8,13 @@ exactly three knobs (verified at
|
||||||
* ``processing_mode`` — ``"basic"`` (default) | ``"premium"``
|
* ``processing_mode`` — ``"basic"`` (default) | ``"premium"``
|
||||||
* ``use_vision_llm`` — ``bool`` (run vision LLM during ingest to
|
* ``use_vision_llm`` — ``bool`` (run vision LLM during ingest to
|
||||||
extract image content / captions / tables)
|
extract image content / captions / tables)
|
||||||
* ``should_summarize`` — ``bool`` (generate document summary)
|
|
||||||
|
|
||||||
This module gives every benchmark a uniform way to:
|
This module gives every benchmark a uniform way to:
|
||||||
|
|
||||||
1. Receive sensible per-benchmark defaults (text-only benchmarks
|
1. Receive sensible per-benchmark defaults (text-only benchmarks
|
||||||
default vision off; image-bearing benchmarks default vision on).
|
default vision off; image-bearing benchmarks default vision on).
|
||||||
2. Accept CLI overrides (``--use-vision-llm`` / ``--no-vision-llm``,
|
2. Accept CLI overrides (``--use-vision-llm`` / ``--no-vision-llm``,
|
||||||
``--processing-mode {basic,premium}``,
|
``--processing-mode {basic,premium}``).
|
||||||
``--should-summarize`` / ``--no-summarize``).
|
|
||||||
3. Persist the *actual* settings used into the doc-map manifest and
|
3. Persist the *actual* settings used into the doc-map manifest and
|
||||||
the run artifact so reports can show "vision=ON, mode=premium →
|
the run artifact so reports can show "vision=ON, mode=premium →
|
||||||
65% accuracy" head-to-head with "vision=OFF, mode=basic → 52%".
|
65% accuracy" head-to-head with "vision=OFF, mode=basic → 52%".
|
||||||
|
|
@ -71,13 +69,11 @@ class IngestSettings:
|
||||||
|
|
||||||
use_vision_llm: bool = False
|
use_vision_llm: bool = False
|
||||||
processing_mode: str = "basic"
|
processing_mode: str = "basic"
|
||||||
should_summarize: bool = False
|
|
||||||
|
|
||||||
def to_dict(self) -> dict[str, Any]:
|
def to_dict(self) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"use_vision_llm": self.use_vision_llm,
|
"use_vision_llm": self.use_vision_llm,
|
||||||
"processing_mode": self.processing_mode,
|
"processing_mode": self.processing_mode,
|
||||||
"should_summarize": self.should_summarize,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
@ -87,14 +83,13 @@ class IngestSettings:
|
||||||
``opts`` is the kwargs dict built by ``core.cli`` from the
|
``opts`` is the kwargs dict built by ``core.cli`` from the
|
||||||
argparse namespace (see ``_cmd_ingest`` / ``_cmd_run``). Keys
|
argparse namespace (see ``_cmd_ingest`` / ``_cmd_run``). Keys
|
||||||
we look for: ``use_vision_llm`` (bool or None), ``processing_mode``
|
we look for: ``use_vision_llm`` (bool or None), ``processing_mode``
|
||||||
(str or None), ``should_summarize`` (bool or None). Anything
|
(str or None). Anything
|
||||||
else is ignored so benchmarks can pass through their own opts.
|
else is ignored so benchmarks can pass through their own opts.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return cls(
|
return cls(
|
||||||
use_vision_llm=_coerce_bool(opts.get("use_vision_llm"), defaults.use_vision_llm),
|
use_vision_llm=_coerce_bool(opts.get("use_vision_llm"), defaults.use_vision_llm),
|
||||||
processing_mode=_coerce_mode(opts.get("processing_mode"), defaults.processing_mode),
|
processing_mode=_coerce_mode(opts.get("processing_mode"), defaults.processing_mode),
|
||||||
should_summarize=_coerce_bool(opts.get("should_summarize"), defaults.should_summarize),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def render_label(self) -> str:
|
def render_label(self) -> str:
|
||||||
|
|
@ -102,8 +97,7 @@ class IngestSettings:
|
||||||
|
|
||||||
return (
|
return (
|
||||||
f"vision={'on' if self.use_vision_llm else 'off'}, "
|
f"vision={'on' if self.use_vision_llm else 'off'}, "
|
||||||
f"mode={self.processing_mode}, "
|
f"mode={self.processing_mode}"
|
||||||
f"summarize={'on' if self.should_summarize else 'off'}"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -217,18 +211,6 @@ def add_ingest_settings_args(
|
||||||
f"Default for this benchmark: {defaults.processing_mode!r}."
|
f"Default for this benchmark: {defaults.processing_mode!r}."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
_add_bool_pair(
|
|
||||||
settings_group,
|
|
||||||
dest="should_summarize",
|
|
||||||
on_flag="--should-summarize",
|
|
||||||
off_flag="--no-summarize",
|
|
||||||
on_help=(
|
|
||||||
"Have SurfSense generate a document summary at ingest "
|
|
||||||
f"(default for this benchmark: "
|
|
||||||
f"{'on' if defaults.should_summarize else 'off'})."
|
|
||||||
),
|
|
||||||
off_help="Skip per-document summary generation.",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -292,7 +274,6 @@ def format_ingest_settings_md(settings: Any) -> str:
|
||||||
return "- SurfSense ingest settings: (not recorded — re-ingest to capture)"
|
return "- SurfSense ingest settings: (not recorded — re-ingest to capture)"
|
||||||
vision = "on" if settings.get("use_vision_llm") else "off"
|
vision = "on" if settings.get("use_vision_llm") else "off"
|
||||||
mode = settings.get("processing_mode") or "basic"
|
mode = settings.get("processing_mode") or "basic"
|
||||||
summarize = "on" if settings.get("should_summarize") else "off"
|
|
||||||
return (
|
return (
|
||||||
f"- SurfSense ingest settings: vision_llm=`{vision}`, "
|
f"- SurfSense ingest settings: vision_llm=`{vision}`, "
|
||||||
f"processing_mode=`{mode}`, summarize=`{summarize}`"
|
f"processing_mode=`{mode}`, summarize=`{summarize}`"
|
||||||
|
|
|
||||||
|
|
@ -160,8 +160,7 @@ async def run_ingest(
|
||||||
upload_result = await docs_client.upload(
|
upload_result = await docs_client.upload(
|
||||||
files=[b.path for b in batches],
|
files=[b.path for b in batches],
|
||||||
search_space_id=ctx.search_space_id,
|
search_space_id=ctx.search_space_id,
|
||||||
should_summarize=settings.should_summarize,
|
use_vision_llm=settings.use_vision_llm,
|
||||||
use_vision_llm=settings.use_vision_llm,
|
|
||||||
processing_mode=settings.processing_mode,
|
processing_mode=settings.processing_mode,
|
||||||
)
|
)
|
||||||
new_doc_ids = list(upload_result.document_ids)
|
new_doc_ids = list(upload_result.document_ids)
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,6 @@ _DESCRIPTION = "CUREv1 retrieval (single-arm SurfSense): Recall@k / MRR / nDCG@1
|
||||||
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
||||||
use_vision_llm=False,
|
use_vision_llm=False,
|
||||||
processing_mode="basic",
|
processing_mode="basic",
|
||||||
should_summarize=False,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -208,7 +208,6 @@ async def _upload_pdfs(
|
||||||
result = await docs_client.upload(
|
result = await docs_client.upload(
|
||||||
files=batch,
|
files=batch,
|
||||||
search_space_id=ctx.search_space_id,
|
search_space_id=ctx.search_space_id,
|
||||||
should_summarize=settings.should_summarize,
|
|
||||||
use_vision_llm=settings.use_vision_llm,
|
use_vision_llm=settings.use_vision_llm,
|
||||||
processing_mode=settings.processing_mode,
|
processing_mode=settings.processing_mode,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -169,7 +169,6 @@ _DESCRIPTION = (
|
||||||
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
||||||
use_vision_llm=True,
|
use_vision_llm=True,
|
||||||
processing_mode="basic",
|
processing_mode="basic",
|
||||||
should_summarize=False,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -480,7 +480,6 @@ async def run_ingest(
|
||||||
upload_result = await docs_client.upload(
|
upload_result = await docs_client.upload(
|
||||||
files=[b.path for b in batches],
|
files=[b.path for b in batches],
|
||||||
search_space_id=ctx.search_space_id,
|
search_space_id=ctx.search_space_id,
|
||||||
should_summarize=settings.should_summarize,
|
|
||||||
use_vision_llm=settings.use_vision_llm,
|
use_vision_llm=settings.use_vision_llm,
|
||||||
processing_mode=settings.processing_mode,
|
processing_mode=settings.processing_mode,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,6 @@ _DESCRIPTION = "MIRAGE (7,663 medical MCQs) — single-arm SurfSense per-task ac
|
||||||
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
||||||
use_vision_llm=False,
|
use_vision_llm=False,
|
||||||
processing_mode="basic",
|
processing_mode="basic",
|
||||||
should_summarize=False,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -225,7 +225,6 @@ async def _upload_pdfs(
|
||||||
result = await docs_client.upload(
|
result = await docs_client.upload(
|
||||||
files=batch,
|
files=batch,
|
||||||
search_space_id=ctx.search_space_id,
|
search_space_id=ctx.search_space_id,
|
||||||
should_summarize=settings.should_summarize,
|
|
||||||
use_vision_llm=settings.use_vision_llm,
|
use_vision_llm=settings.use_vision_llm,
|
||||||
processing_mode=settings.processing_mode,
|
processing_mode=settings.processing_mode,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -178,7 +178,6 @@ _TEXT_ONLY_HINTS = ("gpt-5.4-mini", "gpt-3.5", "text-only", "instruct-")
|
||||||
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
||||||
use_vision_llm=True,
|
use_vision_llm=True,
|
||||||
processing_mode="basic",
|
processing_mode="basic",
|
||||||
should_summarize=False,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -189,7 +189,6 @@ async def _upload_pages(
|
||||||
result = await docs_client.upload(
|
result = await docs_client.upload(
|
||||||
files=batch,
|
files=batch,
|
||||||
search_space_id=ctx.search_space_id,
|
search_space_id=ctx.search_space_id,
|
||||||
should_summarize=settings.should_summarize,
|
|
||||||
use_vision_llm=settings.use_vision_llm,
|
use_vision_llm=settings.use_vision_llm,
|
||||||
processing_mode=settings.processing_mode,
|
processing_mode=settings.processing_mode,
|
||||||
)
|
)
|
||||||
|
|
@ -306,8 +305,7 @@ async def run_ingest(
|
||||||
settings = settings or IngestSettings(
|
settings = settings or IngestSettings(
|
||||||
use_vision_llm=False,
|
use_vision_llm=False,
|
||||||
processing_mode="basic",
|
processing_mode="basic",
|
||||||
should_summarize=False,
|
)
|
||||||
)
|
|
||||||
bench_dir = ctx.benchmark_data_dir()
|
bench_dir = ctx.benchmark_data_dir()
|
||||||
pages_dir = bench_dir / "pages"
|
pages_dir = bench_dir / "pages"
|
||||||
raw_cache = bench_dir / ".raw_cache"
|
raw_cache = bench_dir / ".raw_cache"
|
||||||
|
|
|
||||||
|
|
@ -177,7 +177,6 @@ _DESCRIPTION = (
|
||||||
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
||||||
use_vision_llm=False,
|
use_vision_llm=False,
|
||||||
processing_mode="basic",
|
processing_mode="basic",
|
||||||
should_summarize=False,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -136,7 +136,6 @@ async def _upload_markdowns(
|
||||||
result = await docs_client.upload(
|
result = await docs_client.upload(
|
||||||
files=batch,
|
files=batch,
|
||||||
search_space_id=ctx.search_space_id,
|
search_space_id=ctx.search_space_id,
|
||||||
should_summarize=settings.should_summarize,
|
|
||||||
use_vision_llm=settings.use_vision_llm,
|
use_vision_llm=settings.use_vision_llm,
|
||||||
processing_mode=settings.processing_mode,
|
processing_mode=settings.processing_mode,
|
||||||
)
|
)
|
||||||
|
|
@ -240,8 +239,7 @@ async def run_ingest(
|
||||||
settings = settings or IngestSettings(
|
settings = settings or IngestSettings(
|
||||||
use_vision_llm=False,
|
use_vision_llm=False,
|
||||||
processing_mode="basic",
|
processing_mode="basic",
|
||||||
should_summarize=False,
|
)
|
||||||
)
|
|
||||||
bench_dir = ctx.benchmark_data_dir()
|
bench_dir = ctx.benchmark_data_dir()
|
||||||
wiki_cache = bench_dir / "wiki"
|
wiki_cache = bench_dir / "wiki"
|
||||||
wiki_cache.mkdir(parents=True, exist_ok=True)
|
wiki_cache.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
|
||||||
|
|
@ -153,7 +153,6 @@ _DESCRIPTION = (
|
||||||
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
_DEFAULT_INGEST_SETTINGS = IngestSettings(
|
||||||
use_vision_llm=False,
|
use_vision_llm=False,
|
||||||
processing_mode="basic",
|
processing_mode="basic",
|
||||||
should_summarize=False,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -69,7 +69,7 @@ async def test_set_llm_preferences_partial_update(respx_mock, http):
|
||||||
200,
|
200,
|
||||||
json={
|
json={
|
||||||
"agent_llm_id": -10042,
|
"agent_llm_id": -10042,
|
||||||
"document_summary_llm_id": None,
|
"agent_llm_id": None,
|
||||||
"image_generation_config_id": None,
|
"image_generation_config_id": None,
|
||||||
"vision_llm_config_id": None,
|
"vision_llm_config_id": None,
|
||||||
"agent_llm": {
|
"agent_llm": {
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@ from surfsense_evals.core.ingest_settings import (
|
||||||
|
|
||||||
class TestMerge:
|
class TestMerge:
|
||||||
def test_silent_operator_uses_defaults(self) -> None:
|
def test_silent_operator_uses_defaults(self) -> None:
|
||||||
defaults = IngestSettings(use_vision_llm=True, processing_mode="basic", should_summarize=True)
|
defaults = IngestSettings(use_vision_llm=True, processing_mode="basic")
|
||||||
merged = IngestSettings.merge(defaults, {})
|
merged = IngestSettings.merge(defaults, {})
|
||||||
assert merged == defaults
|
assert merged == defaults
|
||||||
|
|
||||||
|
|
@ -111,16 +111,16 @@ class TestMerge:
|
||||||
assert merged.processing_mode == "basic"
|
assert merged.processing_mode == "basic"
|
||||||
|
|
||||||
def test_to_dict_round_trips(self) -> None:
|
def test_to_dict_round_trips(self) -> None:
|
||||||
s = IngestSettings(use_vision_llm=True, processing_mode="premium", should_summarize=False)
|
s = IngestSettings(use_vision_llm=True, processing_mode="premium")
|
||||||
d = s.to_dict()
|
d = s.to_dict()
|
||||||
assert d == {
|
assert d == {
|
||||||
"use_vision_llm": True,
|
"use_vision_llm": True,
|
||||||
"processing_mode": "premium",
|
"processing_mode": "premium",
|
||||||
"should_summarize": False,
|
"use_vision_llm": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
def test_render_label_format(self) -> None:
|
def test_render_label_format(self) -> None:
|
||||||
s = IngestSettings(use_vision_llm=True, processing_mode="premium", should_summarize=True)
|
s = IngestSettings(use_vision_llm=True, processing_mode="premium")
|
||||||
assert s.render_label() == "vision=on, mode=premium, summarize=on"
|
assert s.render_label() == "vision=on, mode=premium, summarize=on"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -136,7 +136,7 @@ class TestAddArgs:
|
||||||
add_ingest_settings_args(
|
add_ingest_settings_args(
|
||||||
p,
|
p,
|
||||||
defaults=IngestSettings(
|
defaults=IngestSettings(
|
||||||
use_vision_llm=False, processing_mode="basic", should_summarize=False
|
use_vision_llm=False, processing_mode="basic"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
return p
|
return p
|
||||||
|
|
@ -145,7 +145,7 @@ class TestAddArgs:
|
||||||
args = parser.parse_args([])
|
args = parser.parse_args([])
|
||||||
assert args.use_vision_llm is None
|
assert args.use_vision_llm is None
|
||||||
assert args.processing_mode is None
|
assert args.processing_mode is None
|
||||||
assert args.should_summarize is None
|
assert args.use_vision_llm is None
|
||||||
|
|
||||||
def test_use_vision_llm_flag(self, parser: argparse.ArgumentParser) -> None:
|
def test_use_vision_llm_flag(self, parser: argparse.ArgumentParser) -> None:
|
||||||
args = parser.parse_args(["--use-vision-llm"])
|
args = parser.parse_args(["--use-vision-llm"])
|
||||||
|
|
@ -168,9 +168,9 @@ class TestAddArgs:
|
||||||
|
|
||||||
def test_summarize_flag_pair(self, parser: argparse.ArgumentParser) -> None:
|
def test_summarize_flag_pair(self, parser: argparse.ArgumentParser) -> None:
|
||||||
on = parser.parse_args(["--should-summarize"])
|
on = parser.parse_args(["--should-summarize"])
|
||||||
assert on.should_summarize is True
|
assert on.use_vision_llm is True
|
||||||
off = parser.parse_args(["--no-summarize"])
|
off = parser.parse_args(["--no-summarize"])
|
||||||
assert off.should_summarize is False
|
assert off.use_vision_llm is False
|
||||||
|
|
||||||
def test_vision_flags_mutually_exclusive(
|
def test_vision_flags_mutually_exclusive(
|
||||||
self, parser: argparse.ArgumentParser
|
self, parser: argparse.ArgumentParser
|
||||||
|
|
@ -185,11 +185,11 @@ class TestAddArgs:
|
||||||
["--use-vision-llm", "--processing-mode", "premium"]
|
["--use-vision-llm", "--processing-mode", "premium"]
|
||||||
)
|
)
|
||||||
defaults = IngestSettings(
|
defaults = IngestSettings(
|
||||||
use_vision_llm=False, processing_mode="basic", should_summarize=False
|
use_vision_llm=False, processing_mode="basic"
|
||||||
)
|
)
|
||||||
merged = IngestSettings.merge(defaults, vars(args))
|
merged = IngestSettings.merge(defaults, vars(args))
|
||||||
assert merged == IngestSettings(
|
assert merged == IngestSettings(
|
||||||
use_vision_llm=True, processing_mode="premium", should_summarize=False
|
use_vision_llm=True, processing_mode="premium"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -249,7 +249,7 @@ class TestHeader:
|
||||||
class TestFormatMd:
|
class TestFormatMd:
|
||||||
def test_full_settings(self) -> None:
|
def test_full_settings(self) -> None:
|
||||||
out = format_ingest_settings_md(
|
out = format_ingest_settings_md(
|
||||||
{"use_vision_llm": True, "processing_mode": "premium", "should_summarize": True}
|
{"use_vision_llm": True, "processing_mode": "premium", "use_vision_llm": True}
|
||||||
)
|
)
|
||||||
assert "vision_llm=`on`" in out
|
assert "vision_llm=`on`" in out
|
||||||
assert "processing_mode=`premium`" in out
|
assert "processing_mode=`premium`" in out
|
||||||
|
|
@ -257,7 +257,7 @@ class TestFormatMd:
|
||||||
|
|
||||||
def test_default_off(self) -> None:
|
def test_default_off(self) -> None:
|
||||||
out = format_ingest_settings_md(
|
out = format_ingest_settings_md(
|
||||||
{"use_vision_llm": False, "processing_mode": "basic", "should_summarize": False}
|
{"use_vision_llm": False, "processing_mode": "basic", "use_vision_llm": False}
|
||||||
)
|
)
|
||||||
assert "vision_llm=`off`" in out
|
assert "vision_llm=`off`" in out
|
||||||
assert "processing_mode=`basic`" in out
|
assert "processing_mode=`basic`" in out
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue