From e588782a9bedcf7a9a2adaedeec1f9d3e4d654c9 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Thu, 4 Jun 2026 01:51:21 +0530
Subject: [PATCH] refactor(tests): Update tests to remove summary references
 and adjust for embedding errors

---
 .../tests/integration/conftest.py             | 24 +++++++++----------
 .../adapters/test_file_upload_adapter.py      | 22 +++++++++--------
 .../indexing_pipeline/test_index_document.py  | 14 ++++-------
 .../test_local_folder_pipeline.py             |  4 ++--
 .../test_prepare_for_indexing.py              |  6 ++---
 .../test_confluence_parallel.py               | 12 ----------
 .../test_google_drive_parallel.py             | 12 ----------
 .../test_linear_parallel.py                   | 14 -----------
 .../test_notion_parallel.py                   | 13 ----------
 .../connector_indexers/test_page_limits.py    |  8 -------
 .../tests/unit/gateway/test_webhook_routes.py | 17 +++++++++----
 .../indexing_pipeline/test_index_batch.py     | 10 ++++----
 .../test_index_batch_parallel.py              | 20 ++++------------
 surfsense_evals/README.md                     |  5 ++--
 .../surfsense_evals/core/ingest_settings.py   | 16 ++++++-------
 .../tests/core/test_ingest_settings.py        | 18 ++++----------
 .../settings/agent-model-manager.tsx          |  2 +-
 17 files changed, 69 insertions(+), 148 deletions(-)

diff --git a/surfsense_backend/tests/integration/conftest.py b/surfsense_backend/tests/integration/conftest.py
index 9b8384303..19f8e3d0a 100644
--- a/surfsense_backend/tests/integration/conftest.py
+++ b/surfsense_backend/tests/integration/conftest.py
@@ -1,7 +1,7 @@
 import importlib
 import sys
 import uuid
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import MagicMock
 
 import pytest
 import pytest_asyncio
@@ -123,18 +123,6 @@ async def db_search_space(db_session: AsyncSession, db_user: User) -> SearchSpac
     return space
 
 
-@pytest.fixture
-def patched_summarize(monkeypatch) -> AsyncMock:
-    mock = AsyncMock(return_value="Mocked summary.")
-    return mock
-
-
-@pytest.fixture
-def patched_summarize_raises(monkeypatch) -> AsyncMock:
-    mock = AsyncMock(side_effect=RuntimeError("LLM unavailable"))
-    return mock
-
-
 @pytest.fixture
 def patched_embed_texts(monkeypatch) -> MagicMock:
     mock = MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts])
@@ -145,6 +133,16 @@ def patched_embed_texts(monkeypatch) -> MagicMock:
     return mock
 
 
+@pytest.fixture
+def patched_embed_texts_raises(monkeypatch) -> MagicMock:
+    mock = MagicMock(side_effect=RuntimeError("Embedding unavailable"))
+    monkeypatch.setattr(
+        "app.indexing_pipeline.indexing_pipeline_service.embed_texts",
+        mock,
+    )
+    return mock
+
+
 @pytest.fixture
 def patched_chunk_text(monkeypatch) -> MagicMock:
     mock = MagicMock(return_value=["Test chunk content."])
diff --git a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py
index 3f4c88a59..b3bb241a3 100644
--- a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py
@@ -32,8 +32,8 @@ async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):
 @pytest.mark.usefixtures(
 "patched_embed_texts", "patched_chunk_text"
 )
-async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
-    """Document content is set to the LLM-generated summary."""
+async def test_content_is_source_markdown(db_session, db_search_space, db_user, mocker):
+    """Document content is set to the extracted source markdown."""
     adapter = UploadDocumentAdapter(db_session)
     await adapter.index(
         markdown_content="## Hello\n\nSome content.",
@@ -48,7 +48,7 @@ async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
     )
     document = result.scalars().first()
 
-    assert document.content == "Mocked summary."
+    assert document.content == "## Hello\n\nSome content."
 
 
 @pytest.mark.usefixtures(
@@ -79,9 +79,7 @@ async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker
     assert chunks[0].content == "Test chunk content."
 
 
-@pytest.mark.usefixtures(
-    "patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
-)
+@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
 async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, mocker):
     """RuntimeError is raised when the indexing step fails so the caller can fire a failure notification."""
     adapter = UploadDocumentAdapter(db_session)
@@ -92,7 +90,7 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user,
             etl_service="UNSTRUCTURED",
             search_space_id=db_search_space.id,
             user_id=str(db_user.id),
-                )
+        )
 
 
 # ---------------------------------------------------------------------------
@@ -104,7 +102,7 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user,
 "patched_embed_texts", "patched_chunk_text"
 )
 async def test_reindex_updates_content(db_session, db_search_space, db_user, mocker):
-    """Document content is updated to the new summary after reindexing."""
+    """Document content is updated to the new source markdown after reindexing."""
     adapter = UploadDocumentAdapter(db_session)
     await adapter.index(
         markdown_content="## Original\n\nOriginal content.",
@@ -125,7 +123,7 @@ async def test_reindex_updates_content(db_session, db_search_space, db_user, moc
     await adapter.reindex(document=document)
 
     await db_session.refresh(document)
-    assert document.content == "Mocked summary."
+    assert document.content == "## Edited\n\nNew content after user edit."
 
 
 @pytest.mark.usefixtures(
@@ -256,7 +254,9 @@ async def test_reindex_clears_reindexing_flag(
 
 
 @pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
-async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, mocker):
+async def test_reindex_raises_on_failure(
+    db_session, db_search_space, db_user, patched_embed_texts, mocker
+):
     """RuntimeError is raised when reindexing fails so the caller can handle it."""
 
     adapter = UploadDocumentAdapter(db_session)
@@ -276,6 +276,8 @@ async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, m
     document.source_markdown = "## Edited\n\nNew content after user edit."
     await db_session.flush()
 
+    patched_embed_texts.side_effect = RuntimeError("Embedding unavailable")
+
     with pytest.raises(RuntimeError, match=r"Embedding failed|Reindexing failed"):
         await adapter.reindex(document=document)
 
diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py b/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py
index ff0578720..ee895c61b 100644
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py
@@ -259,16 +259,14 @@ async def test_reindex_replaces_old_chunks(
     assert len(chunks) == 1
 
 
-@pytest.mark.usefixtures(
-    "patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
-)
-async def test_llm_error_sets_status_failed(
+@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
+async def test_embedding_error_sets_status_failed(
     db_session,
     db_search_space,
     make_connector_document,
     mocker,
 ):
-    """Document status is FAILED when the LLM raises during indexing."""
+    """Document status is FAILED when embedding raises during indexing."""
     connector_doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
@@ -286,10 +284,8 @@ async def test_llm_error_sets_status_failed(
     assert DocumentStatus.is_state(reloaded.status, DocumentStatus.FAILED)
 
 
-@pytest.mark.usefixtures(
-    "patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
-)
-async def test_llm_error_leaves_no_partial_data(
+@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
+async def test_embedding_error_leaves_no_partial_data(
     db_session,
     db_search_space,
     make_connector_document,
diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py
index 4070daa80..2cd378343 100644
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py
@@ -1271,7 +1271,7 @@ class TestIndexingProgressFlag:
         original_index = IndexingPipelineService.index
         flag_observed = []
 
-        async def patched_index(self_pipe, document, connector_doc, llm):
+        async def patched_index(self_pipe, document, connector_doc):
             folder = (
                 await db_session.execute(
                     select(Folder).where(
@@ -1283,7 +1283,7 @@ class TestIndexingProgressFlag:
             if folder:
                 meta = folder.folder_metadata or {}
                 flag_observed.append(meta.get("indexing_in_progress", False))
-            return await original_index(self_pipe, document, connector_doc, llm)
+            return await original_index(self_pipe, document, connector_doc)
 
         IndexingPipelineService.index = patched_index
         try:
diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py
index 9c8a3203b..d0b8c7fed 100644
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py
@@ -338,9 +338,7 @@ async def test_same_content_from_different_source_is_skipped(
     assert len(result.scalars().all()) == 1
 
 
-@pytest.mark.usefixtures(
-    "patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
-)
+@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
 async def test_failed_document_with_unchanged_content_is_requeued(
     db_session,
     db_search_space,
@@ -351,7 +349,7 @@ async def test_failed_document_with_unchanged_content_is_requeued(
     doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
-    # First run: document is created and indexing crashes → status = failed
+    # First run: document is created and indexing crashes, so status becomes failed.
     prepared = await service.prepare_for_indexing([doc])
     document_id = prepared[0].id
     await service.index(prepared[0], doc)
diff --git a/surfsense_backend/tests/unit/connector_indexers/test_confluence_parallel.py b/surfsense_backend/tests/unit/connector_indexers/test_confluence_parallel.py
index daf6ab985..ff85096d4 100644
--- a/surfsense_backend/tests/unit/connector_indexers/test_confluence_parallel.py
+++ b/surfsense_backend/tests/unit/connector_indexers/test_confluence_parallel.py
@@ -87,18 +87,6 @@ async def test_build_connector_doc_produces_correct_fields():
     assert doc.metadata["connector_id"] == _CONNECTOR_ID
     assert doc.metadata["document_type"] == "Confluence Page"
     assert doc.metadata["connector_type"] == "Confluence"
-    assert "Engineering Handbook" in doc.deterministic_preview
-    assert markdown in doc.deterministic_preview
-
-
-async def test_build_connector_doc_summary_disabled():
-    doc = _build_connector_doc(
-        _make_page(),
-        _to_markdown(_make_page()),
-        connector_id=_CONNECTOR_ID,
-        search_space_id=_SEARCH_SPACE_ID,
-        user_id=_USER_ID,
-    )
 
 
 # ---------------------------------------------------------------------------
diff --git a/surfsense_backend/tests/unit/connector_indexers/test_google_drive_parallel.py b/surfsense_backend/tests/unit/connector_indexers/test_google_drive_parallel.py
index 4e67236c3..65be05593 100644
--- a/surfsense_backend/tests/unit/connector_indexers/test_google_drive_parallel.py
+++ b/surfsense_backend/tests/unit/connector_indexers/test_google_drive_parallel.py
@@ -294,12 +294,6 @@ def full_scan_mocks(mock_drive_client, monkeypatch):
         MagicMock(return_value=pipeline_mock),
     )
 
-    monkeypatch.setattr(
-        _mod,
-        "get_agent_llm",
-        AsyncMock(return_value=MagicMock()),
-    )
-
     return {
         "drive_client": mock_drive_client,
         "session": mock_session,
@@ -480,12 +474,6 @@ async def test_delta_sync_removals_serial_rest_parallel(monkeypatch):
         "IndexingPipelineService",
         MagicMock(return_value=pipeline_mock),
     )
-    monkeypatch.setattr(
-        _mod,
-        "get_agent_llm",
-        AsyncMock(return_value=MagicMock()),
-    )
-
     mock_session, _ = _make_page_limit_session()
     mock_task_logger = MagicMock()
     mock_task_logger.log_task_progress = AsyncMock()
diff --git a/surfsense_backend/tests/unit/connector_indexers/test_linear_parallel.py b/surfsense_backend/tests/unit/connector_indexers/test_linear_parallel.py
index a4702a5ff..f057a6352 100644
--- a/surfsense_backend/tests/unit/connector_indexers/test_linear_parallel.py
+++ b/surfsense_backend/tests/unit/connector_indexers/test_linear_parallel.py
@@ -88,20 +88,6 @@ async def test_build_connector_doc_produces_correct_fields():
     assert doc.metadata["connector_id"] == _CONNECTOR_ID
     assert doc.metadata["document_type"] == "Linear Issue"
     assert doc.metadata["connector_type"] == "Linear"
-    assert "ENG-42" in doc.deterministic_preview
-    assert markdown in doc.deterministic_preview
-
-
-async def test_build_connector_doc_summary_disabled():
-    """When enable_vision_llm is False, deterministic_content is False."""
-    doc = _build_connector_doc(
-        _make_issue(),
-        _make_formatted_issue(),
-        "# content",
-        connector_id=_CONNECTOR_ID,
-        search_space_id=_SEARCH_SPACE_ID,
-        user_id=_USER_ID,
-    )
 
 
 # ---------------------------------------------------------------------------
diff --git a/surfsense_backend/tests/unit/connector_indexers/test_notion_parallel.py b/surfsense_backend/tests/unit/connector_indexers/test_notion_parallel.py
index 0ad1f2178..e40f739d8 100644
--- a/surfsense_backend/tests/unit/connector_indexers/test_notion_parallel.py
+++ b/surfsense_backend/tests/unit/connector_indexers/test_notion_parallel.py
@@ -55,19 +55,6 @@ async def test_build_connector_doc_produces_correct_fields():
     assert doc.metadata["connector_id"] == _CONNECTOR_ID
     assert doc.metadata["document_type"] == "Notion Page"
     assert doc.metadata["connector_type"] == "Notion"
-    assert "My Notion Page" in doc.deterministic_preview
-    assert markdown in doc.deterministic_preview
-
-
-async def test_build_connector_doc_summary_disabled():
-    """When enable_vision_llm is False, deterministic_content is False."""
-    doc = _build_connector_doc(
-        _make_page(),
-        "# content",
-        connector_id=_CONNECTOR_ID,
-        search_space_id=_SEARCH_SPACE_ID,
-        user_id=_USER_ID,
-    )
 
 
 # ---------------------------------------------------------------------------
diff --git a/surfsense_backend/tests/unit/connector_indexers/test_page_limits.py b/surfsense_backend/tests/unit/connector_indexers/test_page_limits.py
index 0080b639e..a79ed7858 100644
--- a/surfsense_backend/tests/unit/connector_indexers/test_page_limits.py
+++ b/surfsense_backend/tests/unit/connector_indexers/test_page_limits.py
@@ -335,10 +335,6 @@ def gdrive_full_scan_mocks(monkeypatch):
     monkeypatch.setattr(
         _mod, "IndexingPipelineService", MagicMock(return_value=pipeline_mock)
     )
-    monkeypatch.setattr(
-        _mod, "get_agent_llm", AsyncMock(return_value=MagicMock())
-    )
-
     return {
         "mod": _mod,
         "session": session,
@@ -452,10 +448,6 @@ async def test_gdrive_delta_sync_skips_over_quota(monkeypatch):
     monkeypatch.setattr(
         _mod, "IndexingPipelineService", MagicMock(return_value=pipeline_mock)
     )
-    monkeypatch.setattr(
-        _mod, "get_agent_llm", AsyncMock(return_value=MagicMock())
-    )
-
     mock_task_logger = MagicMock()
     mock_task_logger.log_task_progress = AsyncMock()
 
diff --git a/surfsense_backend/tests/unit/gateway/test_webhook_routes.py b/surfsense_backend/tests/unit/gateway/test_webhook_routes.py
index 34d0651ab..338a35c39 100644
--- a/surfsense_backend/tests/unit/gateway/test_webhook_routes.py
+++ b/surfsense_backend/tests/unit/gateway/test_webhook_routes.py
@@ -69,6 +69,13 @@ def _signed_slack_request(payload: dict, *, secret: str = "signing-secret") -> R
     )
 
 
+def _enable_slack_gateway(monkeypatch):
+    monkeypatch.setattr(routes.config, "GATEWAY_SLACK_ENABLED", True)
+    monkeypatch.setattr(routes.config, "GATEWAY_SLACK_CLIENT_ID", "client-id")
+    monkeypatch.setattr(routes.config, "GATEWAY_SLACK_CLIENT_SECRET", "client-secret")
+    monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret")
+
+
 async def _call_webhook(*, request: RequestStub, account_id: int, session):
     return await routes.telegram_webhook(
         request=request,
@@ -207,7 +214,7 @@ def test_verify_slack_signature_accepts_valid_signature():
 
 @pytest.mark.asyncio
 async def test_slack_webhook_url_verification(monkeypatch, mocker):
-    monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret")
+    _enable_slack_gateway(monkeypatch)
     request = _signed_slack_request({"type": "url_verification", "challenge": "abc123"})
 
     response = await routes.slack_webhook(request=request, session=mocker.AsyncMock())
@@ -218,7 +225,7 @@ async def test_slack_webhook_url_verification(monkeypatch, mocker):
 
 @pytest.mark.asyncio
 async def test_slack_webhook_persists_event(monkeypatch, mocker):
-    monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret")
+    _enable_slack_gateway(monkeypatch)
     session = mocker.AsyncMock()
     monkeypatch.setattr(routes, "get_slack_account_by_team", mocker.AsyncMock(return_value=_slack_account()))
     persist = mocker.AsyncMock(return_value=100)
@@ -248,7 +255,7 @@ async def test_slack_webhook_persists_event(monkeypatch, mocker):
 
 @pytest.mark.asyncio
 async def test_slack_webhook_ignores_self_event(monkeypatch, mocker):
-    monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret")
+    _enable_slack_gateway(monkeypatch)
     session = mocker.AsyncMock()
     monkeypatch.setattr(routes, "get_slack_account_by_team", mocker.AsyncMock(return_value=_slack_account()))
     persist = mocker.AsyncMock(return_value=100)
@@ -275,7 +282,7 @@ async def test_slack_webhook_ignores_self_event(monkeypatch, mocker):
 
 
 @pytest.mark.asyncio
-async def test_discord_gateway_install_returns_oauth_url(monkeypatch):
+async def test_discord_gateway_install_returns_oauth_url(monkeypatch, mocker):
     monkeypatch.setattr(routes.config, "DISCORD_CLIENT_ID", "discord-client")
     monkeypatch.setattr(
         routes.config,
@@ -283,10 +290,12 @@ async def test_discord_gateway_install_returns_oauth_url(monkeypatch):
         "http://localhost:8000/api/v1/gateway/discord/callback",
     )
     monkeypatch.setattr(routes.config, "SECRET_KEY", "test-secret")
+    monkeypatch.setattr(routes, "check_search_space_access", mocker.AsyncMock())
 
     response = await routes.install_discord_gateway(
         search_space_id=123,
         user=SimpleNamespace(id="00000000-0000-0000-0000-000000000001"),
+        session=mocker.AsyncMock(),
     )
 
     assert response["auth_url"].startswith("https://discord.com/api/oauth2/authorize?")
diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch.py b/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch.py
index dd9940503..963ac6792 100644
--- a/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch.py
+++ b/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch.py
@@ -37,12 +37,10 @@ async def test_calls_prepare_then_index_per_document(pipeline, make_connector_do
     orm2 = MagicMock(spec=Document)
     orm2.unique_identifier_hash = compute_unique_identifier_hash(doc2)
 
-    mock_llm = MagicMock()
-
     pipeline.prepare_for_indexing = AsyncMock(return_value=[orm1, orm2])
-    pipeline.index = AsyncMock(side_effect=lambda doc, cdoc, llm: doc)
+    pipeline.index = AsyncMock(side_effect=lambda doc, cdoc: doc)
 
-    results = await pipeline.index_batch([doc1, doc2], mock_llm)
+    results = await pipeline.index_batch([doc1, doc2])
 
     pipeline.prepare_for_indexing.assert_awaited_once_with([doc1, doc2])
     assert pipeline.index.await_count == 2
@@ -53,7 +51,7 @@ async def test_empty_input_returns_empty(pipeline):
     """Empty connector_docs list returns empty result."""
     pipeline.prepare_for_indexing = AsyncMock(return_value=[])
 
-    results = await pipeline.index_batch([], MagicMock())
+    results = await pipeline.index_batch([])
 
     assert results == []
 
@@ -74,7 +72,7 @@ async def test_skips_document_without_matching_connector_doc(
     pipeline.prepare_for_indexing = AsyncMock(return_value=[orphan_orm])
     pipeline.index = AsyncMock()
 
-    results = await pipeline.index_batch([doc1], MagicMock())
+    results = await pipeline.index_batch([doc1])
 
     pipeline.index.assert_not_awaited()
     assert results == []
diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch_parallel.py b/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch_parallel.py
index e4ba8f44c..3a1b77d90 100644
--- a/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch_parallel.py
+++ b/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch_parallel.py
@@ -183,19 +183,14 @@ async def test_batch_parallel_indexes_all_documents(
 
     index_calls = []
 
-    async def fake_index(self, document, connector_doc, llm):
+    async def fake_index(self, document, connector_doc):
         index_calls.append(document.id)
         document.status = DocumentStatus.ready()
         return document
 
     monkeypatch.setattr(IndexingPipelineService, "index", fake_index)
 
-    async def mock_get_llm(session):
-        return MagicMock()
-
-    _, indexed, failed = await pipeline.index_batch_parallel(
-        docs, mock_get_llm, max_concurrency=2
-    )
+    _, indexed, failed = await pipeline.index_batch_parallel(docs, max_concurrency=2)
 
     assert indexed == 3
     assert failed == 0
@@ -224,20 +219,15 @@ async def test_batch_parallel_one_failure_does_not_affect_others(
         _mock_session_factory(orm_by_id),
     )
 
-    async def failing_index(self, document, connector_doc, llm):
+    async def failing_index(self, document, connector_doc):
         if document.id == 2:
-            raise RuntimeError("LLM exploded")
+            raise RuntimeError("Indexing exploded")
         document.status = DocumentStatus.ready()
         return document
 
     monkeypatch.setattr(IndexingPipelineService, "index", failing_index)
 
-    async def mock_get_llm(session):
-        return MagicMock()
-
-    _, indexed, failed = await pipeline.index_batch_parallel(
-        docs, mock_get_llm, max_concurrency=4
-    )
+    _, indexed, failed = await pipeline.index_batch_parallel(docs, max_concurrency=4)
 
     assert indexed == 2
     assert failed == 1
diff --git a/surfsense_evals/README.md b/surfsense_evals/README.md
index c6314af80..c755c4de6 100644
--- a/surfsense_evals/README.md
+++ b/surfsense_evals/README.md
@@ -137,15 +137,14 @@ Notes:
 - `--skip-unanswerable` (run) — drop unanswerable questions
 - `--docs <a.pdf>,<b.pdf>` (run) — scope to specific docs
 
-## Ingestion knobs (vision LLM, processing mode, summarize)
+## Ingestion knobs (vision LLM, processing mode)
 
-The harness exposes `POST /api/v1/documents/fileupload`'s three knobs on every `ingest` subcommand:
+The harness exposes `POST /api/v1/documents/fileupload`'s ingest knobs on every `ingest` subcommand:
 
 | Flag pair                                  | Effect                                                                                  |
 |--------------------------------------------|-----------------------------------------------------------------------------------------|
 | `--use-vision-llm` / `--no-vision-llm`     | Walk every embedded image in the PDF and inline image-derived text at the image's position (see below). |
 | `--processing-mode {basic,premium}`        | `premium` carries a 10× page multiplier and routes to a stronger ETL (e.g. LlamaCloud). |
-| `--should-summarize` / `--no-summarize`    | Generate a per-document summary at ingest.                                              |
 
 The "Default ingest" column in the benchmarks table is what runs if you don't pass any flag. Whatever was actually used is recorded as a `__settings__` header in the doc map (`data/<suite>/maps/<benchmark>_*_map.jsonl`) and as `extra.ingest_settings` in `run_artifact.json`, then surfaced in the report — no need to hunt through CLI history.
 
diff --git a/surfsense_evals/src/surfsense_evals/core/ingest_settings.py b/surfsense_evals/src/surfsense_evals/core/ingest_settings.py
index 6c27abcd5..8328e0d46 100644
--- a/surfsense_evals/src/surfsense_evals/core/ingest_settings.py
+++ b/surfsense_evals/src/surfsense_evals/core/ingest_settings.py
@@ -173,14 +173,14 @@ def add_ingest_settings_args(
     *,
     defaults: IngestSettings,
 ) -> None:
-    """Attach the three ingest-settings flag pairs to ``parser``.
+    """Attach ingest-settings flags to ``parser``.
 
-    Each bool exposes a mutually exclusive ``--foo`` / ``--no-foo``
-    pair so an operator can flip either direction without restating
-    every flag. Default is ``None`` so that "operator didn't pass the
-    flag" is distinguishable from "operator explicitly passed false"
-    — ``IngestSettings.merge`` then folds in the benchmark default
-    only when the operator was silent.
+    The vision bool exposes a mutually exclusive ``--foo`` / ``--no-foo``
+    pair so an operator can flip either direction without restating every
+    flag. Default is ``None`` so that "operator didn't pass the flag" is
+    distinguishable from "operator explicitly passed false" —
+    ``IngestSettings.merge`` then folds in the benchmark default only when
+    the operator was silent.
     """
 
     settings_group = parser.add_argument_group(
@@ -276,7 +276,7 @@ def format_ingest_settings_md(settings: Any) -> str:
     mode = settings.get("processing_mode") or "basic"
     return (
         f"- SurfSense ingest settings: vision_llm=`{vision}`, "
-        f"processing_mode=`{mode}`, summarize=`{summarize}`"
+        f"processing_mode=`{mode}`"
     )
 
 
diff --git a/surfsense_evals/tests/core/test_ingest_settings.py b/surfsense_evals/tests/core/test_ingest_settings.py
index afbfc709d..fd7e7818a 100644
--- a/surfsense_evals/tests/core/test_ingest_settings.py
+++ b/surfsense_evals/tests/core/test_ingest_settings.py
@@ -4,7 +4,7 @@ Covers:
 
 * ``IngestSettings.merge`` honours operator overrides and falls back
   to per-benchmark defaults when the operator is silent.
-* ``add_ingest_settings_args`` exposes the three flag pairs and
+* ``add_ingest_settings_args`` exposes ingest settings flags and
   argparse defaults of ``None`` correctly distinguish "not passed"
   from "explicitly false".
 * ``settings_header_line`` / ``read_settings_header`` round-trip
@@ -116,12 +116,11 @@ class TestMerge:
         assert d == {
             "use_vision_llm": True,
             "processing_mode": "premium",
-            "use_vision_llm": False,
         }
 
     def test_render_label_format(self) -> None:
         s = IngestSettings(use_vision_llm=True, processing_mode="premium")
-        assert s.render_label() == "vision=on, mode=premium, summarize=on"
+        assert s.render_label() == "vision=on, mode=premium"
 
 
 # ---------------------------------------------------------------------------
@@ -145,7 +144,6 @@ class TestAddArgs:
         args = parser.parse_args([])
         assert args.use_vision_llm is None
         assert args.processing_mode is None
-        assert args.use_vision_llm is None
 
     def test_use_vision_llm_flag(self, parser: argparse.ArgumentParser) -> None:
         args = parser.parse_args(["--use-vision-llm"])
@@ -166,12 +164,6 @@ class TestAddArgs:
         with pytest.raises(SystemExit):
             parser.parse_args(["--processing-mode", "exotic"])
 
-    def test_summarize_flag_pair(self, parser: argparse.ArgumentParser) -> None:
-        on = parser.parse_args(["--should-summarize"])
-        assert on.use_vision_llm is True
-        off = parser.parse_args(["--no-summarize"])
-        assert off.use_vision_llm is False
-
     def test_vision_flags_mutually_exclusive(
         self, parser: argparse.ArgumentParser
     ) -> None:
@@ -249,19 +241,17 @@ class TestHeader:
 class TestFormatMd:
     def test_full_settings(self) -> None:
         out = format_ingest_settings_md(
-            {"use_vision_llm": True, "processing_mode": "premium", "use_vision_llm": True}
+            {"use_vision_llm": True, "processing_mode": "premium"}
         )
         assert "vision_llm=`on`" in out
         assert "processing_mode=`premium`" in out
-        assert "summarize=`on`" in out
 
     def test_default_off(self) -> None:
         out = format_ingest_settings_md(
-            {"use_vision_llm": False, "processing_mode": "basic", "use_vision_llm": False}
+            {"use_vision_llm": False, "processing_mode": "basic"}
         )
         assert "vision_llm=`off`" in out
         assert "processing_mode=`basic`" in out
-        assert "summarize=`off`" in out
 
     def test_missing_returns_re_ingest_hint(self) -> None:
         # Empty dict + None + non-mapping should all degrade gracefully.
diff --git a/surfsense_web/components/settings/agent-model-manager.tsx b/surfsense_web/components/settings/agent-model-manager.tsx
index b0e13d3d7..507a263e0 100644
--- a/surfsense_web/components/settings/agent-model-manager.tsx
+++ b/surfsense_web/components/settings/agent-model-manager.tsx
@@ -228,7 +228,7 @@ export function AgentModelManager({ searchSpaceId }: AgentModelManagerProps) {
 									<h3 className="text-sm md:text-base font-semibold mb-2">No Models Yet</h3>
 									<p className="text-[11px] md:text-xs text-muted-foreground max-w-sm mb-4">
 										{canCreate
-											? "Add your first model to power document summarization, chat, and other agent capabilities"
+											? "Add your first model to power chat, reports, and other agent capabilities"
 											: "No models have been added to this space yet. Contact a space owner to add one"}
 									</p>
 								</CardContent>