From e588782a9bedcf7a9a2adaedeec1f9d3e4d654c9 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 4 Jun 2026 01:51:21 +0530 Subject: [PATCH] refactor(tests): Update tests to remove summary references and adjust for embedding errors --- .../tests/integration/conftest.py | 24 +++++++++---------- .../adapters/test_file_upload_adapter.py | 22 +++++++++-------- .../indexing_pipeline/test_index_document.py | 14 ++++------- .../test_local_folder_pipeline.py | 4 ++-- .../test_prepare_for_indexing.py | 6 ++--- .../test_confluence_parallel.py | 12 ---------- .../test_google_drive_parallel.py | 12 ---------- .../test_linear_parallel.py | 14 ----------- .../test_notion_parallel.py | 13 ---------- .../connector_indexers/test_page_limits.py | 8 ------- .../tests/unit/gateway/test_webhook_routes.py | 17 +++++++++---- .../indexing_pipeline/test_index_batch.py | 10 ++++---- .../test_index_batch_parallel.py | 20 ++++------------ surfsense_evals/README.md | 5 ++-- .../surfsense_evals/core/ingest_settings.py | 16 ++++++------- .../tests/core/test_ingest_settings.py | 18 ++++---------- .../settings/agent-model-manager.tsx | 2 +- 17 files changed, 69 insertions(+), 148 deletions(-) diff --git a/surfsense_backend/tests/integration/conftest.py b/surfsense_backend/tests/integration/conftest.py index 9b8384303..19f8e3d0a 100644 --- a/surfsense_backend/tests/integration/conftest.py +++ b/surfsense_backend/tests/integration/conftest.py @@ -1,7 +1,7 @@ import importlib import sys import uuid -from unittest.mock import AsyncMock, MagicMock +from unittest.mock import MagicMock import pytest import pytest_asyncio @@ -123,18 +123,6 @@ async def db_search_space(db_session: AsyncSession, db_user: User) -> SearchSpac return space -@pytest.fixture -def patched_summarize(monkeypatch) -> AsyncMock: - mock = AsyncMock(return_value="Mocked summary.") - return mock - - -@pytest.fixture -def patched_summarize_raises(monkeypatch) -> AsyncMock: - mock = AsyncMock(side_effect=RuntimeError("LLM unavailable")) - return mock - - @pytest.fixture def patched_embed_texts(monkeypatch) -> MagicMock: mock = MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]) @@ -145,6 +133,16 @@ def patched_embed_texts(monkeypatch) -> MagicMock: return mock +@pytest.fixture +def patched_embed_texts_raises(monkeypatch) -> MagicMock: + mock = MagicMock(side_effect=RuntimeError("Embedding unavailable")) + monkeypatch.setattr( + "app.indexing_pipeline.indexing_pipeline_service.embed_texts", + mock, + ) + return mock + + @pytest.fixture def patched_chunk_text(monkeypatch) -> MagicMock: mock = MagicMock(return_value=["Test chunk content."]) diff --git a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py index 3f4c88a59..b3bb241a3 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py @@ -32,8 +32,8 @@ async def test_sets_status_ready(db_session, db_search_space, db_user, mocker): @pytest.mark.usefixtures( "patched_embed_texts", "patched_chunk_text" ) -async def test_content_is_summary(db_session, db_search_space, db_user, mocker): - """Document content is set to the LLM-generated summary.""" +async def test_content_is_source_markdown(db_session, db_search_space, db_user, mocker): + """Document content is set to the extracted source markdown.""" adapter = UploadDocumentAdapter(db_session) await adapter.index( markdown_content="## Hello\n\nSome content.", @@ -48,7 +48,7 @@ async def test_content_is_summary(db_session, db_search_space, db_user, mocker): ) document = result.scalars().first() - assert document.content == "Mocked summary." + assert document.content == "## Hello\n\nSome content." @pytest.mark.usefixtures( @@ -79,9 +79,7 @@ async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker assert chunks[0].content == "Test chunk content." -@pytest.mark.usefixtures( - "patched_summarize_raises", "patched_embed_texts", "patched_chunk_text" -) +@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text") async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, mocker): """RuntimeError is raised when the indexing step fails so the caller can fire a failure notification.""" adapter = UploadDocumentAdapter(db_session) @@ -92,7 +90,7 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, etl_service="UNSTRUCTURED", search_space_id=db_search_space.id, user_id=str(db_user.id), - ) + ) # --------------------------------------------------------------------------- @@ -104,7 +102,7 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, "patched_embed_texts", "patched_chunk_text" ) async def test_reindex_updates_content(db_session, db_search_space, db_user, mocker): - """Document content is updated to the new summary after reindexing.""" + """Document content is updated to the new source markdown after reindexing.""" adapter = UploadDocumentAdapter(db_session) await adapter.index( markdown_content="## Original\n\nOriginal content.", @@ -125,7 +123,7 @@ async def test_reindex_updates_content(db_session, db_search_space, db_user, moc await adapter.reindex(document=document) await db_session.refresh(document) - assert document.content == "Mocked summary." + assert document.content == "## Edited\n\nNew content after user edit." @pytest.mark.usefixtures( @@ -256,7 +254,9 @@ async def test_reindex_clears_reindexing_flag( @pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text") -async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, mocker): +async def test_reindex_raises_on_failure( + db_session, db_search_space, db_user, patched_embed_texts, mocker +): """RuntimeError is raised when reindexing fails so the caller can handle it.""" adapter = UploadDocumentAdapter(db_session) @@ -276,6 +276,8 @@ async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, m document.source_markdown = "## Edited\n\nNew content after user edit." await db_session.flush() + patched_embed_texts.side_effect = RuntimeError("Embedding unavailable") + with pytest.raises(RuntimeError, match=r"Embedding failed|Reindexing failed"): await adapter.reindex(document=document) diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py b/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py index ff0578720..ee895c61b 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py @@ -259,16 +259,14 @@ async def test_reindex_replaces_old_chunks( assert len(chunks) == 1 -@pytest.mark.usefixtures( - "patched_summarize_raises", "patched_embed_texts", "patched_chunk_text" -) -async def test_llm_error_sets_status_failed( +@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text") +async def test_embedding_error_sets_status_failed( db_session, db_search_space, make_connector_document, mocker, ): - """Document status is FAILED when the LLM raises during indexing.""" + """Document status is FAILED when embedding raises during indexing.""" connector_doc = make_connector_document(search_space_id=db_search_space.id) service = IndexingPipelineService(session=db_session) @@ -286,10 +284,8 @@ async def test_llm_error_sets_status_failed( assert DocumentStatus.is_state(reloaded.status, DocumentStatus.FAILED) -@pytest.mark.usefixtures( - "patched_summarize_raises", "patched_embed_texts", "patched_chunk_text" -) -async def test_llm_error_leaves_no_partial_data( +@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text") +async def test_embedding_error_leaves_no_partial_data( db_session, db_search_space, make_connector_document, diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py index 4070daa80..2cd378343 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py @@ -1271,7 +1271,7 @@ class TestIndexingProgressFlag: original_index = IndexingPipelineService.index flag_observed = [] - async def patched_index(self_pipe, document, connector_doc, llm): + async def patched_index(self_pipe, document, connector_doc): folder = ( await db_session.execute( select(Folder).where( @@ -1283,7 +1283,7 @@ class TestIndexingProgressFlag: if folder: meta = folder.folder_metadata or {} flag_observed.append(meta.get("indexing_in_progress", False)) - return await original_index(self_pipe, document, connector_doc, llm) + return await original_index(self_pipe, document, connector_doc) IndexingPipelineService.index = patched_index try: diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py index 9c8a3203b..d0b8c7fed 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py @@ -338,9 +338,7 @@ async def test_same_content_from_different_source_is_skipped( assert len(result.scalars().all()) == 1 -@pytest.mark.usefixtures( - "patched_summarize_raises", "patched_embed_texts", "patched_chunk_text" -) +@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text") async def test_failed_document_with_unchanged_content_is_requeued( db_session, db_search_space, @@ -351,7 +349,7 @@ async def test_failed_document_with_unchanged_content_is_requeued( doc = make_connector_document(search_space_id=db_search_space.id) service = IndexingPipelineService(session=db_session) - # First run: document is created and indexing crashes → status = failed + # First run: document is created and indexing crashes, so status becomes failed. prepared = await service.prepare_for_indexing([doc]) document_id = prepared[0].id await service.index(prepared[0], doc) diff --git a/surfsense_backend/tests/unit/connector_indexers/test_confluence_parallel.py b/surfsense_backend/tests/unit/connector_indexers/test_confluence_parallel.py index daf6ab985..ff85096d4 100644 --- a/surfsense_backend/tests/unit/connector_indexers/test_confluence_parallel.py +++ b/surfsense_backend/tests/unit/connector_indexers/test_confluence_parallel.py @@ -87,18 +87,6 @@ async def test_build_connector_doc_produces_correct_fields(): assert doc.metadata["connector_id"] == _CONNECTOR_ID assert doc.metadata["document_type"] == "Confluence Page" assert doc.metadata["connector_type"] == "Confluence" - assert "Engineering Handbook" in doc.deterministic_preview - assert markdown in doc.deterministic_preview - - -async def test_build_connector_doc_summary_disabled(): - doc = _build_connector_doc( - _make_page(), - _to_markdown(_make_page()), - connector_id=_CONNECTOR_ID, - search_space_id=_SEARCH_SPACE_ID, - user_id=_USER_ID, - ) # --------------------------------------------------------------------------- diff --git a/surfsense_backend/tests/unit/connector_indexers/test_google_drive_parallel.py b/surfsense_backend/tests/unit/connector_indexers/test_google_drive_parallel.py index 4e67236c3..65be05593 100644 --- a/surfsense_backend/tests/unit/connector_indexers/test_google_drive_parallel.py +++ b/surfsense_backend/tests/unit/connector_indexers/test_google_drive_parallel.py @@ -294,12 +294,6 @@ def full_scan_mocks(mock_drive_client, monkeypatch): MagicMock(return_value=pipeline_mock), ) - monkeypatch.setattr( - _mod, - "get_agent_llm", - AsyncMock(return_value=MagicMock()), - ) - return { "drive_client": mock_drive_client, "session": mock_session, @@ -480,12 +474,6 @@ async def test_delta_sync_removals_serial_rest_parallel(monkeypatch): "IndexingPipelineService", MagicMock(return_value=pipeline_mock), ) - monkeypatch.setattr( - _mod, - "get_agent_llm", - AsyncMock(return_value=MagicMock()), - ) - mock_session, _ = _make_page_limit_session() mock_task_logger = MagicMock() mock_task_logger.log_task_progress = AsyncMock() diff --git a/surfsense_backend/tests/unit/connector_indexers/test_linear_parallel.py b/surfsense_backend/tests/unit/connector_indexers/test_linear_parallel.py index a4702a5ff..f057a6352 100644 --- a/surfsense_backend/tests/unit/connector_indexers/test_linear_parallel.py +++ b/surfsense_backend/tests/unit/connector_indexers/test_linear_parallel.py @@ -88,20 +88,6 @@ async def test_build_connector_doc_produces_correct_fields(): assert doc.metadata["connector_id"] == _CONNECTOR_ID assert doc.metadata["document_type"] == "Linear Issue" assert doc.metadata["connector_type"] == "Linear" - assert "ENG-42" in doc.deterministic_preview - assert markdown in doc.deterministic_preview - - -async def test_build_connector_doc_summary_disabled(): - """When enable_vision_llm is False, deterministic_content is False.""" - doc = _build_connector_doc( - _make_issue(), - _make_formatted_issue(), - "# content", - connector_id=_CONNECTOR_ID, - search_space_id=_SEARCH_SPACE_ID, - user_id=_USER_ID, - ) # --------------------------------------------------------------------------- diff --git a/surfsense_backend/tests/unit/connector_indexers/test_notion_parallel.py b/surfsense_backend/tests/unit/connector_indexers/test_notion_parallel.py index 0ad1f2178..e40f739d8 100644 --- a/surfsense_backend/tests/unit/connector_indexers/test_notion_parallel.py +++ b/surfsense_backend/tests/unit/connector_indexers/test_notion_parallel.py @@ -55,19 +55,6 @@ async def test_build_connector_doc_produces_correct_fields(): assert doc.metadata["connector_id"] == _CONNECTOR_ID assert doc.metadata["document_type"] == "Notion Page" assert doc.metadata["connector_type"] == "Notion" - assert "My Notion Page" in doc.deterministic_preview - assert markdown in doc.deterministic_preview - - -async def test_build_connector_doc_summary_disabled(): - """When enable_vision_llm is False, deterministic_content is False.""" - doc = _build_connector_doc( - _make_page(), - "# content", - connector_id=_CONNECTOR_ID, - search_space_id=_SEARCH_SPACE_ID, - user_id=_USER_ID, - ) # --------------------------------------------------------------------------- diff --git a/surfsense_backend/tests/unit/connector_indexers/test_page_limits.py b/surfsense_backend/tests/unit/connector_indexers/test_page_limits.py index 0080b639e..a79ed7858 100644 --- a/surfsense_backend/tests/unit/connector_indexers/test_page_limits.py +++ b/surfsense_backend/tests/unit/connector_indexers/test_page_limits.py @@ -335,10 +335,6 @@ def gdrive_full_scan_mocks(monkeypatch): monkeypatch.setattr( _mod, "IndexingPipelineService", MagicMock(return_value=pipeline_mock) ) - monkeypatch.setattr( - _mod, "get_agent_llm", AsyncMock(return_value=MagicMock()) - ) - return { "mod": _mod, "session": session, @@ -452,10 +448,6 @@ async def test_gdrive_delta_sync_skips_over_quota(monkeypatch): monkeypatch.setattr( _mod, "IndexingPipelineService", MagicMock(return_value=pipeline_mock) ) - monkeypatch.setattr( - _mod, "get_agent_llm", AsyncMock(return_value=MagicMock()) - ) - mock_task_logger = MagicMock() mock_task_logger.log_task_progress = AsyncMock() diff --git a/surfsense_backend/tests/unit/gateway/test_webhook_routes.py b/surfsense_backend/tests/unit/gateway/test_webhook_routes.py index 34d0651ab..338a35c39 100644 --- a/surfsense_backend/tests/unit/gateway/test_webhook_routes.py +++ b/surfsense_backend/tests/unit/gateway/test_webhook_routes.py @@ -69,6 +69,13 @@ def _signed_slack_request(payload: dict, *, secret: str = "signing-secret") -> R ) +def _enable_slack_gateway(monkeypatch): + monkeypatch.setattr(routes.config, "GATEWAY_SLACK_ENABLED", True) + monkeypatch.setattr(routes.config, "GATEWAY_SLACK_CLIENT_ID", "client-id") + monkeypatch.setattr(routes.config, "GATEWAY_SLACK_CLIENT_SECRET", "client-secret") + monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret") + + async def _call_webhook(*, request: RequestStub, account_id: int, session): return await routes.telegram_webhook( request=request, @@ -207,7 +214,7 @@ def test_verify_slack_signature_accepts_valid_signature(): @pytest.mark.asyncio async def test_slack_webhook_url_verification(monkeypatch, mocker): - monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret") + _enable_slack_gateway(monkeypatch) request = _signed_slack_request({"type": "url_verification", "challenge": "abc123"}) response = await routes.slack_webhook(request=request, session=mocker.AsyncMock()) @@ -218,7 +225,7 @@ async def test_slack_webhook_url_verification(monkeypatch, mocker): @pytest.mark.asyncio async def test_slack_webhook_persists_event(monkeypatch, mocker): - monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret") + _enable_slack_gateway(monkeypatch) session = mocker.AsyncMock() monkeypatch.setattr(routes, "get_slack_account_by_team", mocker.AsyncMock(return_value=_slack_account())) persist = mocker.AsyncMock(return_value=100) @@ -248,7 +255,7 @@ async def test_slack_webhook_persists_event(monkeypatch, mocker): @pytest.mark.asyncio async def test_slack_webhook_ignores_self_event(monkeypatch, mocker): - monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret") + _enable_slack_gateway(monkeypatch) session = mocker.AsyncMock() monkeypatch.setattr(routes, "get_slack_account_by_team", mocker.AsyncMock(return_value=_slack_account())) persist = mocker.AsyncMock(return_value=100) @@ -275,7 +282,7 @@ async def test_slack_webhook_ignores_self_event(monkeypatch, mocker): @pytest.mark.asyncio -async def test_discord_gateway_install_returns_oauth_url(monkeypatch): +async def test_discord_gateway_install_returns_oauth_url(monkeypatch, mocker): monkeypatch.setattr(routes.config, "DISCORD_CLIENT_ID", "discord-client") monkeypatch.setattr( routes.config, @@ -283,10 +290,12 @@ async def test_discord_gateway_install_returns_oauth_url(monkeypatch): "http://localhost:8000/api/v1/gateway/discord/callback", ) monkeypatch.setattr(routes.config, "SECRET_KEY", "test-secret") + monkeypatch.setattr(routes, "check_search_space_access", mocker.AsyncMock()) response = await routes.install_discord_gateway( search_space_id=123, user=SimpleNamespace(id="00000000-0000-0000-0000-000000000001"), + session=mocker.AsyncMock(), ) assert response["auth_url"].startswith("https://discord.com/api/oauth2/authorize?") diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch.py b/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch.py index dd9940503..963ac6792 100644 --- a/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch.py +++ b/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch.py @@ -37,12 +37,10 @@ async def test_calls_prepare_then_index_per_document(pipeline, make_connector_do orm2 = MagicMock(spec=Document) orm2.unique_identifier_hash = compute_unique_identifier_hash(doc2) - mock_llm = MagicMock() - pipeline.prepare_for_indexing = AsyncMock(return_value=[orm1, orm2]) - pipeline.index = AsyncMock(side_effect=lambda doc, cdoc, llm: doc) + pipeline.index = AsyncMock(side_effect=lambda doc, cdoc: doc) - results = await pipeline.index_batch([doc1, doc2], mock_llm) + results = await pipeline.index_batch([doc1, doc2]) pipeline.prepare_for_indexing.assert_awaited_once_with([doc1, doc2]) assert pipeline.index.await_count == 2 @@ -53,7 +51,7 @@ async def test_empty_input_returns_empty(pipeline): """Empty connector_docs list returns empty result.""" pipeline.prepare_for_indexing = AsyncMock(return_value=[]) - results = await pipeline.index_batch([], MagicMock()) + results = await pipeline.index_batch([]) assert results == [] @@ -74,7 +72,7 @@ async def test_skips_document_without_matching_connector_doc( pipeline.prepare_for_indexing = AsyncMock(return_value=[orphan_orm]) pipeline.index = AsyncMock() - results = await pipeline.index_batch([doc1], MagicMock()) + results = await pipeline.index_batch([doc1]) pipeline.index.assert_not_awaited() assert results == [] diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch_parallel.py b/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch_parallel.py index e4ba8f44c..3a1b77d90 100644 --- a/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch_parallel.py +++ b/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch_parallel.py @@ -183,19 +183,14 @@ async def test_batch_parallel_indexes_all_documents( index_calls = [] - async def fake_index(self, document, connector_doc, llm): + async def fake_index(self, document, connector_doc): index_calls.append(document.id) document.status = DocumentStatus.ready() return document monkeypatch.setattr(IndexingPipelineService, "index", fake_index) - async def mock_get_llm(session): - return MagicMock() - - _, indexed, failed = await pipeline.index_batch_parallel( - docs, mock_get_llm, max_concurrency=2 - ) + _, indexed, failed = await pipeline.index_batch_parallel(docs, max_concurrency=2) assert indexed == 3 assert failed == 0 @@ -224,20 +219,15 @@ async def test_batch_parallel_one_failure_does_not_affect_others( _mock_session_factory(orm_by_id), ) - async def failing_index(self, document, connector_doc, llm): + async def failing_index(self, document, connector_doc): if document.id == 2: - raise RuntimeError("LLM exploded") + raise RuntimeError("Indexing exploded") document.status = DocumentStatus.ready() return document monkeypatch.setattr(IndexingPipelineService, "index", failing_index) - async def mock_get_llm(session): - return MagicMock() - - _, indexed, failed = await pipeline.index_batch_parallel( - docs, mock_get_llm, max_concurrency=4 - ) + _, indexed, failed = await pipeline.index_batch_parallel(docs, max_concurrency=4) assert indexed == 2 assert failed == 1 diff --git a/surfsense_evals/README.md b/surfsense_evals/README.md index c6314af80..c755c4de6 100644 --- a/surfsense_evals/README.md +++ b/surfsense_evals/README.md @@ -137,15 +137,14 @@ Notes: - `--skip-unanswerable` (run) — drop unanswerable questions - `--docs ,` (run) — scope to specific docs -## Ingestion knobs (vision LLM, processing mode, summarize) +## Ingestion knobs (vision LLM, processing mode) -The harness exposes `POST /api/v1/documents/fileupload`'s three knobs on every `ingest` subcommand: +The harness exposes `POST /api/v1/documents/fileupload`'s ingest knobs on every `ingest` subcommand: | Flag pair | Effect | |--------------------------------------------|-----------------------------------------------------------------------------------------| | `--use-vision-llm` / `--no-vision-llm` | Walk every embedded image in the PDF and inline image-derived text at the image's position (see below). | | `--processing-mode {basic,premium}` | `premium` carries a 10× page multiplier and routes to a stronger ETL (e.g. LlamaCloud). | -| `--should-summarize` / `--no-summarize` | Generate a per-document summary at ingest. | The "Default ingest" column in the benchmarks table is what runs if you don't pass any flag. Whatever was actually used is recorded as a `__settings__` header in the doc map (`data//maps/_*_map.jsonl`) and as `extra.ingest_settings` in `run_artifact.json`, then surfaced in the report — no need to hunt through CLI history. diff --git a/surfsense_evals/src/surfsense_evals/core/ingest_settings.py b/surfsense_evals/src/surfsense_evals/core/ingest_settings.py index 6c27abcd5..8328e0d46 100644 --- a/surfsense_evals/src/surfsense_evals/core/ingest_settings.py +++ b/surfsense_evals/src/surfsense_evals/core/ingest_settings.py @@ -173,14 +173,14 @@ def add_ingest_settings_args( *, defaults: IngestSettings, ) -> None: - """Attach the three ingest-settings flag pairs to ``parser``. + """Attach ingest-settings flags to ``parser``. - Each bool exposes a mutually exclusive ``--foo`` / ``--no-foo`` - pair so an operator can flip either direction without restating - every flag. Default is ``None`` so that "operator didn't pass the - flag" is distinguishable from "operator explicitly passed false" - — ``IngestSettings.merge`` then folds in the benchmark default - only when the operator was silent. + The vision bool exposes a mutually exclusive ``--foo`` / ``--no-foo`` + pair so an operator can flip either direction without restating every + flag. Default is ``None`` so that "operator didn't pass the flag" is + distinguishable from "operator explicitly passed false" — + ``IngestSettings.merge`` then folds in the benchmark default only when + the operator was silent. """ settings_group = parser.add_argument_group( @@ -276,7 +276,7 @@ def format_ingest_settings_md(settings: Any) -> str: mode = settings.get("processing_mode") or "basic" return ( f"- SurfSense ingest settings: vision_llm=`{vision}`, " - f"processing_mode=`{mode}`, summarize=`{summarize}`" + f"processing_mode=`{mode}`" ) diff --git a/surfsense_evals/tests/core/test_ingest_settings.py b/surfsense_evals/tests/core/test_ingest_settings.py index afbfc709d..fd7e7818a 100644 --- a/surfsense_evals/tests/core/test_ingest_settings.py +++ b/surfsense_evals/tests/core/test_ingest_settings.py @@ -4,7 +4,7 @@ Covers: * ``IngestSettings.merge`` honours operator overrides and falls back to per-benchmark defaults when the operator is silent. -* ``add_ingest_settings_args`` exposes the three flag pairs and +* ``add_ingest_settings_args`` exposes ingest settings flags and argparse defaults of ``None`` correctly distinguish "not passed" from "explicitly false". * ``settings_header_line`` / ``read_settings_header`` round-trip @@ -116,12 +116,11 @@ class TestMerge: assert d == { "use_vision_llm": True, "processing_mode": "premium", - "use_vision_llm": False, } def test_render_label_format(self) -> None: s = IngestSettings(use_vision_llm=True, processing_mode="premium") - assert s.render_label() == "vision=on, mode=premium, summarize=on" + assert s.render_label() == "vision=on, mode=premium" # --------------------------------------------------------------------------- @@ -145,7 +144,6 @@ class TestAddArgs: args = parser.parse_args([]) assert args.use_vision_llm is None assert args.processing_mode is None - assert args.use_vision_llm is None def test_use_vision_llm_flag(self, parser: argparse.ArgumentParser) -> None: args = parser.parse_args(["--use-vision-llm"]) @@ -166,12 +164,6 @@ class TestAddArgs: with pytest.raises(SystemExit): parser.parse_args(["--processing-mode", "exotic"]) - def test_summarize_flag_pair(self, parser: argparse.ArgumentParser) -> None: - on = parser.parse_args(["--should-summarize"]) - assert on.use_vision_llm is True - off = parser.parse_args(["--no-summarize"]) - assert off.use_vision_llm is False - def test_vision_flags_mutually_exclusive( self, parser: argparse.ArgumentParser ) -> None: @@ -249,19 +241,17 @@ class TestHeader: class TestFormatMd: def test_full_settings(self) -> None: out = format_ingest_settings_md( - {"use_vision_llm": True, "processing_mode": "premium", "use_vision_llm": True} + {"use_vision_llm": True, "processing_mode": "premium"} ) assert "vision_llm=`on`" in out assert "processing_mode=`premium`" in out - assert "summarize=`on`" in out def test_default_off(self) -> None: out = format_ingest_settings_md( - {"use_vision_llm": False, "processing_mode": "basic", "use_vision_llm": False} + {"use_vision_llm": False, "processing_mode": "basic"} ) assert "vision_llm=`off`" in out assert "processing_mode=`basic`" in out - assert "summarize=`off`" in out def test_missing_returns_re_ingest_hint(self) -> None: # Empty dict + None + non-mapping should all degrade gracefully. diff --git a/surfsense_web/components/settings/agent-model-manager.tsx b/surfsense_web/components/settings/agent-model-manager.tsx index b0e13d3d7..507a263e0 100644 --- a/surfsense_web/components/settings/agent-model-manager.tsx +++ b/surfsense_web/components/settings/agent-model-manager.tsx @@ -228,7 +228,7 @@ export function AgentModelManager({ searchSpaceId }: AgentModelManagerProps) {

No Models Yet

{canCreate - ? "Add your first model to power document summarization, chat, and other agent capabilities" + ? "Add your first model to power chat, reports, and other agent capabilities" : "No models have been added to this space yet. Contact a space owner to add one"}