feat(tests): Update tests for summary-free indexing

2026-07-22 23:31:12 +02:00 · 2026-06-04 00:53:51 +05:30 · 2026-06-04 00:53:51 +05:30 · ddfe60c2f0
commit ddfe60c2f0
parent dc6a17930b
26 changed files with 123 additions and 294 deletions
--- a/surfsense_backend/tests/integration/conftest.py
+++ b/surfsense_backend/tests/integration/conftest.py
@ -126,20 +126,12 @@ async def db_search_space(db_session: AsyncSession, db_user: User) -> SearchSpac
@pytest.fixture
 def patched_summarize(monkeypatch) -> AsyncMock:
    mock = AsyncMock(return_value="Mocked summary.")
-    monkeypatch.setattr(
-        "app.indexing_pipeline.indexing_pipeline_service.summarize_document",
-        mock,
-    )
    return mock


@pytest.fixture
 def patched_summarize_raises(monkeypatch) -> AsyncMock:
    mock = AsyncMock(side_effect=RuntimeError("LLM unavailable"))
-    monkeypatch.setattr(
-        "app.indexing_pipeline.indexing_pipeline_service.summarize_document",
-        mock,
-    )
    return mock


--- a/surfsense_backend/tests/integration/document_upload/conftest.py
+++ b/surfsense_backend/tests/integration/document_upload/conftest.py
@ -68,7 +68,6 @@ class InlineTaskDispatcher:
        filename: str,
        search_space_id: int,
        user_id: str,
-        should_summarize: bool = False,
        use_vision_llm: bool = False,
        processing_mode: str = "basic",
    ) -> None:
@ -83,7 +82,6 @@ class InlineTaskDispatcher:
                filename,
                search_space_id,
                user_id,
-                should_summarize=should_summarize,
                use_vision_llm=use_vision_llm,
                processing_mode=processing_mode,
            )
@ -266,10 +264,6 @@ async def page_limits():
@pytest.fixture(autouse=True)
 def _mock_external_apis(monkeypatch):
    """Mock LLM, embedding, and chunking — these are external API boundaries."""
-    monkeypatch.setattr(
-        "app.indexing_pipeline.indexing_pipeline_service.summarize_document",
-        AsyncMock(return_value="Mocked summary."),
-    )
    monkeypatch.setattr(
        "app.indexing_pipeline.indexing_pipeline_service.embed_texts",
        MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]),
--- a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py
@ -8,7 +8,7 @@ pytestmark = pytest.mark.integration


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):
    """Document status is READY after successful indexing."""
@ -19,7 +19,6 @@ async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):
        etl_service="UNSTRUCTURED",
        search_space_id=db_search_space.id,
        user_id=str(db_user.id),
-        llm=mocker.Mock(),
    )

    result = await db_session.execute(
@ -31,7 +30,7 @@ async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
    """Document content is set to the LLM-generated summary."""
@ -42,8 +41,6 @@ async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
        etl_service="UNSTRUCTURED",
        search_space_id=db_search_space.id,
        user_id=str(db_user.id),
-        llm=mocker.Mock(),
-        should_summarize=True,
    )

    result = await db_session.execute(
@ -55,7 +52,7 @@ async def test_content_is_summary(db_session, db_search_space, db_user, mocker):


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker):
    """Chunks derived from the source markdown are persisted in the DB."""
@ -66,7 +63,6 @@ async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker
        etl_service="UNSTRUCTURED",
        search_space_id=db_search_space.id,
        user_id=str(db_user.id),
-        llm=mocker.Mock(),
    )

    result = await db_session.execute(
@ -96,9 +92,7 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user,
            etl_service="UNSTRUCTURED",
            search_space_id=db_search_space.id,
            user_id=str(db_user.id),
-            llm=mocker.Mock(),
-            should_summarize=True,
-        )
+                )


 # ---------------------------------------------------------------------------
@ -107,7 +101,7 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user,


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_reindex_updates_content(db_session, db_search_space, db_user, mocker):
    """Document content is updated to the new summary after reindexing."""
@ -118,7 +112,6 @@ async def test_reindex_updates_content(db_session, db_search_space, db_user, moc
        etl_service="UNSTRUCTURED",
        search_space_id=db_search_space.id,
        user_id=str(db_user.id),
-        llm=mocker.Mock(),
    )

    result = await db_session.execute(
@ -129,14 +122,14 @@ async def test_reindex_updates_content(db_session, db_search_space, db_user, moc
    document.source_markdown = "## Edited\n\nNew content after user edit."
    await db_session.flush()

-    await adapter.reindex(document=document, llm=mocker.Mock())
+    await adapter.reindex(document=document)

    await db_session.refresh(document)
    assert document.content == "Mocked summary."


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_reindex_updates_content_hash(
    db_session, db_search_space, db_user, mocker
@ -149,7 +142,6 @@ async def test_reindex_updates_content_hash(
        etl_service="UNSTRUCTURED",
        search_space_id=db_search_space.id,
        user_id=str(db_user.id),
-        llm=mocker.Mock(),
    )

    result = await db_session.execute(
@ -161,14 +153,14 @@ async def test_reindex_updates_content_hash(
    document.source_markdown = "## Edited\n\nNew content after user edit."
    await db_session.flush()

-    await adapter.reindex(document=document, llm=mocker.Mock())
+    await adapter.reindex(document=document)

    await db_session.refresh(document)
    assert document.content_hash != original_hash


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, mocker):
    """Document status is READY after successful reindexing."""
@ -179,7 +171,6 @@ async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, m
        etl_service="UNSTRUCTURED",
        search_space_id=db_search_space.id,
        user_id=str(db_user.id),
-        llm=mocker.Mock(),
    )

    result = await db_session.execute(
@ -190,13 +181,13 @@ async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, m
    document.source_markdown = "## Edited\n\nNew content after user edit."
    await db_session.flush()

-    await adapter.reindex(document=document, llm=mocker.Mock())
+    await adapter.reindex(document=document)

    await db_session.refresh(document)
    assert DocumentStatus.is_state(document.status, DocumentStatus.READY)


-@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts")
+@pytest.mark.usefixtures("patched_embed_texts")
 async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, mocker):
    """Reindexing replaces old chunks with new content rather than appending."""
    mocker.patch(
@ -211,7 +202,6 @@ async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, moc
        etl_service="UNSTRUCTURED",
        search_space_id=db_search_space.id,
        user_id=str(db_user.id),
-        llm=mocker.Mock(),
    )

    result = await db_session.execute(
@ -223,7 +213,7 @@ async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, moc
    document.source_markdown = "## Edited\n\nNew content after user edit."
    await db_session.flush()

-    await adapter.reindex(document=document, llm=mocker.Mock())
+    await adapter.reindex(document=document)

    chunks_result = await db_session.execute(
        select(Chunk).filter(Chunk.document_id == document_id)
@ -235,7 +225,7 @@ async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, moc


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_reindex_clears_reindexing_flag(
    db_session, db_search_space, db_user, mocker
@ -248,7 +238,6 @@ async def test_reindex_clears_reindexing_flag(
        etl_service="UNSTRUCTURED",
        search_space_id=db_search_space.id,
        user_id=str(db_user.id),
-        llm=mocker.Mock(),
    )

    result = await db_session.execute(
@ -260,7 +249,7 @@ async def test_reindex_clears_reindexing_flag(
    document.content_needs_reindexing = True
    await db_session.flush()

-    await adapter.reindex(document=document, llm=mocker.Mock())
+    await adapter.reindex(document=document)

    await db_session.refresh(document)
    assert document.content_needs_reindexing is False
@ -269,10 +258,6 @@ async def test_reindex_clears_reindexing_flag(
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
 async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, mocker):
    """RuntimeError is raised when reindexing fails so the caller can handle it."""
-    mocker.patch(
-        "app.indexing_pipeline.indexing_pipeline_service.summarize_document",
-        return_value="Mocked summary.",
-    )

    adapter = UploadDocumentAdapter(db_session)
    await adapter.index(
@ -281,7 +266,6 @@ async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, m
        etl_service="UNSTRUCTURED",
        search_space_id=db_search_space.id,
        user_id=str(db_user.id),
-        llm=mocker.Mock(),
    )

    result = await db_session.execute(
@ -292,13 +276,8 @@ async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, m
    document.source_markdown = "## Edited\n\nNew content after user edit."
    await db_session.flush()

-    mocker.patch(
-        "app.indexing_pipeline.indexing_pipeline_service.summarize_document",
-        side_effect=RuntimeError("LLM unavailable"),
-    )
-
    with pytest.raises(RuntimeError, match=r"Embedding failed|Reindexing failed"):
-        await adapter.reindex(document=document, llm=mocker.Mock())
+        await adapter.reindex(document=document)


 async def test_reindex_raises_on_empty_source_markdown(
@ -323,4 +302,4 @@ async def test_reindex_raises_on_empty_source_markdown(
    adapter = UploadDocumentAdapter(db_session)

    with pytest.raises(RuntimeError, match="no source_markdown"):
-        await adapter.reindex(document=document, llm=mocker.Mock())
+        await adapter.reindex(document=document)
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_calendar_pipeline.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_calendar_pipeline.py
@ -25,8 +25,6 @@ def _cal_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=True,
-        fallback_summary=f"Calendar: Event {unique_id}",
        metadata={
            "event_id": unique_id,
            "start_time": "2025-01-15T10:00:00",
@ -37,7 +35,7 @@ def _cal_doc(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_calendar_pipeline_creates_ready_document(
    db_session, db_search_space, db_connector, db_user, mocker
@ -55,7 +53,7 @@ async def test_calendar_pipeline_creates_ready_document(
    prepared = await service.prepare_for_indexing([doc])
    assert len(prepared) == 1

-    await service.index(prepared[0], doc, llm=mocker.Mock())
+    await service.index(prepared[0], doc)

    result = await db_session.execute(
        select(Document).filter(Document.search_space_id == space_id)
@ -68,7 +66,7 @@ async def test_calendar_pipeline_creates_ready_document(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_calendar_legacy_doc_migrated(
    db_session, db_search_space, db_connector, db_user, mocker
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py
@ -25,8 +25,6 @@ def _drive_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=True,
-        fallback_summary=f"File: {unique_id}.pdf",
        metadata={
            "google_drive_file_id": unique_id,
            "google_drive_file_name": f"{unique_id}.pdf",
@ -36,7 +34,7 @@ def _drive_doc(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_drive_pipeline_creates_ready_document(
    db_session, db_search_space, db_connector, db_user, mocker
@ -54,7 +52,7 @@ async def test_drive_pipeline_creates_ready_document(
    prepared = await service.prepare_for_indexing([doc])
    assert len(prepared) == 1

-    await service.index(prepared[0], doc, llm=mocker.Mock())
+    await service.index(prepared[0], doc)

    result = await db_session.execute(
        select(Document).filter(Document.search_space_id == space_id)
@ -67,7 +65,7 @@ async def test_drive_pipeline_creates_ready_document(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_drive_legacy_doc_migrated(
    db_session, db_search_space, db_connector, db_user, mocker
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_dropbox_pipeline.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_dropbox_pipeline.py
@ -24,8 +24,6 @@ def _dropbox_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=True,
-        fallback_summary=f"File: {unique_id}.docx",
        metadata={
            "dropbox_file_id": unique_id,
            "dropbox_file_name": f"{unique_id}.docx",
@ -35,7 +33,7 @@ def _dropbox_doc(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_dropbox_pipeline_creates_ready_document(
    db_session, db_search_space, db_connector, db_user, mocker
@ -53,7 +51,7 @@ async def test_dropbox_pipeline_creates_ready_document(
    prepared = await service.prepare_for_indexing([doc])
    assert len(prepared) == 1

-    await service.index(prepared[0], doc, llm=mocker.Mock())
+    await service.index(prepared[0], doc)

    result = await db_session.execute(
        select(Document).filter(Document.search_space_id == space_id)
@ -66,7 +64,7 @@ async def test_dropbox_pipeline_creates_ready_document(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_dropbox_duplicate_content_skipped(
    db_session, db_search_space, db_connector, db_user, mocker
@ -86,7 +84,7 @@ async def test_dropbox_duplicate_content_skipped(

    prepared = await service.prepare_for_indexing([doc])
    assert len(prepared) == 1
-    await service.index(prepared[0], doc, llm=mocker.Mock())
+    await service.index(prepared[0], doc)

    result = await db_session.execute(
        select(Document).filter(Document.search_space_id == space_id)
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_gmail_pipeline.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_gmail_pipeline.py
@ -28,8 +28,6 @@ def _gmail_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=True,
-        fallback_summary=f"Gmail: Subject for {unique_id}",
        metadata={
            "message_id": unique_id,
            "from": "sender@example.com",
@ -39,7 +37,7 @@ def _gmail_doc(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_gmail_pipeline_creates_ready_document(
    db_session, db_search_space, db_connector, db_user, mocker
@ -57,7 +55,7 @@ async def test_gmail_pipeline_creates_ready_document(
    prepared = await service.prepare_for_indexing([doc])
    assert len(prepared) == 1

-    await service.index(prepared[0], doc, llm=mocker.Mock())
+    await service.index(prepared[0], doc)

    result = await db_session.execute(
        select(Document).filter(Document.search_space_id == space_id)
@ -71,7 +69,7 @@ async def test_gmail_pipeline_creates_ready_document(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_gmail_legacy_doc_migrated_then_reused(
    db_session, db_search_space, db_connector, db_user, mocker
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_index_batch.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_index_batch.py
@ -10,7 +10,7 @@ pytestmark = pytest.mark.integration


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_index_batch_creates_ready_documents(
    db_session, db_search_space, make_connector_document, mocker
@ -33,7 +33,7 @@ async def test_index_batch_creates_ready_documents(
    ]

    service = IndexingPipelineService(session=db_session)
-    results = await service.index_batch(docs, llm=mocker.Mock())
+    results = await service.index_batch(docs)

    assert len(results) == 2

@ -50,10 +50,10 @@ async def test_index_batch_creates_ready_documents(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_index_batch_empty_returns_empty(db_session, mocker):
    """index_batch with empty input returns an empty list."""
    service = IndexingPipelineService(session=db_session)
-    results = await service.index_batch([], llm=mocker.Mock())
+    results = await service.index_batch([])
    assert results == []
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py
@ -10,9 +10,7 @@ _EMBEDDING_DIM = app_config.embedding_model_instance.dimension
 pytestmark = pytest.mark.integration


-@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
-)
+@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
 async def test_sets_status_ready(
    db_session,
    db_search_space,
@ -27,7 +25,7 @@ async def test_sets_status_ready(
    document = prepared[0]
    document_id = document.id

-    await service.index(document, connector_doc, llm=mocker.Mock())
+    await service.index(document, connector_doc)

    result = await db_session.execute(
        select(Document).filter(Document.id == document_id)
@ -37,16 +35,14 @@ async def test_sets_status_ready(
    assert DocumentStatus.is_state(reloaded.status, DocumentStatus.READY)


-@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
-)
-async def test_content_is_summary_when_should_summarize_true(
+@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
+async def test_content_is_source_markdown_by_default(
    db_session,
    db_search_space,
    make_connector_document,
    mocker,
 ):
-    """Document content is set to the LLM-generated summary when should_summarize=True."""
+    """Document content is set to source_markdown by default."""
    connector_doc = make_connector_document(search_space_id=db_search_space.id)
    service = IndexingPipelineService(session=db_session)

@ -54,28 +50,25 @@ async def test_content_is_summary_when_should_summarize_true(
    document = prepared[0]
    document_id = document.id

-    await service.index(document, connector_doc, llm=mocker.Mock())
+    await service.index(document, connector_doc)

    result = await db_session.execute(
        select(Document).filter(Document.id == document_id)
    )
    reloaded = result.scalars().first()

-    assert reloaded.content == "Mocked summary."
+    assert reloaded.content == connector_doc.source_markdown


-@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
-)
-async def test_content_is_source_markdown_when_should_summarize_false(
+@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
+async def test_content_is_source_markdown_when_custom_content(
    db_session,
    db_search_space,
    make_connector_document,
 ):
-    """Document content is set to source_markdown verbatim when should_summarize=False."""
+    """Document content is set to source_markdown verbatim."""
    connector_doc = make_connector_document(
        search_space_id=db_search_space.id,
-        should_summarize=False,
        source_markdown="## Raw content",
    )
    service = IndexingPipelineService(session=db_session)
@ -84,7 +77,7 @@ async def test_content_is_source_markdown_when_should_summarize_false(
    document = prepared[0]
    document_id = document.id

-    await service.index(document, connector_doc, llm=None)
+    await service.index(document, connector_doc)

    result = await db_session.execute(
        select(Document).filter(Document.id == document_id)
@ -94,9 +87,7 @@ async def test_content_is_source_markdown_when_should_summarize_false(
    assert reloaded.content == "## Raw content"


-@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
-)
+@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
 async def test_chunks_written_to_db(
    db_session,
    db_search_space,
@ -111,7 +102,7 @@ async def test_chunks_written_to_db(
    document = prepared[0]
    document_id = document.id

-    await service.index(document, connector_doc, llm=mocker.Mock())
+    await service.index(document, connector_doc)

    result = await db_session.execute(
        select(Chunk).filter(Chunk.document_id == document_id)
@ -122,9 +113,7 @@ async def test_chunks_written_to_db(
    assert chunks[0].content == "Test chunk content."


-@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
-)
+@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
 async def test_embedding_written_to_db(
    db_session,
    db_search_space,
@ -139,7 +128,7 @@ async def test_embedding_written_to_db(
    document = prepared[0]
    document_id = document.id

-    await service.index(document, connector_doc, llm=mocker.Mock())
+    await service.index(document, connector_doc)

    result = await db_session.execute(
        select(Document).filter(Document.id == document_id)
@ -150,9 +139,7 @@ async def test_embedding_written_to_db(
    assert len(reloaded.embedding) == _EMBEDDING_DIM


-@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
-)
+@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
 async def test_updated_at_advances_after_indexing(
    db_session,
    db_search_space,
@ -172,7 +159,7 @@ async def test_updated_at_advances_after_indexing(
    )
    updated_at_pending = result.scalars().first().updated_at

-    await service.index(document, connector_doc, llm=mocker.Mock())
+    await service.index(document, connector_doc)

    result = await db_session.execute(
        select(Document).filter(Document.id == document_id)
@ -182,18 +169,15 @@ async def test_updated_at_advances_after_indexing(
    assert updated_at_ready > updated_at_pending


-@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
-)
+@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
 async def test_no_llm_falls_back_to_source_markdown(
    db_session,
    db_search_space,
    make_connector_document,
 ):
-    """When llm=None and no fallback_summary, content falls back to source_markdown."""
+    """Content stays deterministic source markdown without an LLM."""
    connector_doc = make_connector_document(
        search_space_id=db_search_space.id,
-        should_summarize=True,
        source_markdown="## Fallback content",
    )
    service = IndexingPipelineService(session=db_session)
@ -202,7 +186,7 @@ async def test_no_llm_falls_back_to_source_markdown(
    document = prepared[0]
    document_id = document.id

-    await service.index(document, connector_doc, llm=None)
+    await service.index(document, connector_doc)

    result = await db_session.execute(
        select(Document).filter(Document.id == document_id)
@ -213,27 +197,23 @@ async def test_no_llm_falls_back_to_source_markdown(
    assert reloaded.content == "## Fallback content"


-@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
-)
-async def test_fallback_summary_used_when_llm_unavailable(
+@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
+async def test_source_markdown_used_without_preview(
    db_session,
    db_search_space,
    make_connector_document,
 ):
-    """fallback_summary is used as content when llm=None and should_summarize=True."""
+    """Source markdown is used without fallback preview fields."""
    connector_doc = make_connector_document(
        search_space_id=db_search_space.id,
-        should_summarize=True,
        source_markdown="## Full raw content",
-        fallback_summary="Short pre-built summary.",
    )
    service = IndexingPipelineService(session=db_session)

    prepared = await service.prepare_for_indexing([connector_doc])
    document_id = prepared[0].id

-    await service.index(prepared[0], connector_doc, llm=None)
+    await service.index(prepared[0], connector_doc)

    result = await db_session.execute(
        select(Document).filter(Document.id == document_id)
@ -241,12 +221,10 @@ async def test_fallback_summary_used_when_llm_unavailable(
    reloaded = result.scalars().first()

    assert DocumentStatus.is_state(reloaded.status, DocumentStatus.READY)
-    assert reloaded.content == "Short pre-built summary."
+    assert reloaded.content == "## Full raw content"


-@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
-)
+@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
 async def test_reindex_replaces_old_chunks(
    db_session,
    db_search_space,
@ -264,14 +242,14 @@ async def test_reindex_replaces_old_chunks(
    document = prepared[0]
    document_id = document.id

-    await service.index(document, connector_doc, llm=mocker.Mock())
+    await service.index(document, connector_doc)

    updated_doc = make_connector_document(
        search_space_id=db_search_space.id,
        source_markdown="## v2",
    )
    re_prepared = await service.prepare_for_indexing([updated_doc])
-    await service.index(re_prepared[0], updated_doc, llm=mocker.Mock())
+    await service.index(re_prepared[0], updated_doc)

    result = await db_session.execute(
        select(Chunk).filter(Chunk.document_id == document_id)
@ -298,7 +276,7 @@ async def test_llm_error_sets_status_failed(
    document = prepared[0]
    document_id = document.id

-    await service.index(document, connector_doc, llm=mocker.Mock())
+    await service.index(document, connector_doc)

    result = await db_session.execute(
        select(Document).filter(Document.id == document_id)
@ -325,7 +303,7 @@ async def test_llm_error_leaves_no_partial_data(
    document = prepared[0]
    document_id = document.id

-    await service.index(document, connector_doc, llm=mocker.Mock())
+    await service.index(document, connector_doc)

    result = await db_session.execute(
        select(Document).filter(Document.id == document_id)
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py
@ -21,7 +21,6 @@ from app.db import (
 pytestmark = pytest.mark.integration

 UNIFIED_FIXTURES = (
-    "patched_summarize",
    "patched_embed_texts",
    "patched_chunk_text",
 )
@ -787,7 +786,7 @@ class TestPipelineIntegration:
        assert len(prepared) == 1

        db_doc = prepared[0]
-        result = await service.index(db_doc, doc, llm=mocker.Mock())
+        result = await service.index(db_doc, doc)
        assert result is not None

        docs = (
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_onedrive_pipeline.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_onedrive_pipeline.py
@ -24,8 +24,6 @@ def _onedrive_doc(
        search_space_id=search_space_id,
        connector_id=connector_id,
        created_by_id=user_id,
-        should_summarize=True,
-        fallback_summary=f"File: {unique_id}.docx",
        metadata={
            "onedrive_file_id": unique_id,
            "onedrive_file_name": f"{unique_id}.docx",
@ -35,7 +33,7 @@ def _onedrive_doc(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_onedrive_pipeline_creates_ready_document(
    db_session, db_search_space, db_connector, db_user, mocker
@ -53,7 +51,7 @@ async def test_onedrive_pipeline_creates_ready_document(
    prepared = await service.prepare_for_indexing([doc])
    assert len(prepared) == 1

-    await service.index(prepared[0], doc, llm=mocker.Mock())
+    await service.index(prepared[0], doc)

    result = await db_session.execute(
        select(Document).filter(Document.search_space_id == space_id)
@ -66,7 +64,7 @@ async def test_onedrive_pipeline_creates_ready_document(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_onedrive_duplicate_content_skipped(
    db_session, db_search_space, db_connector, db_user, mocker
@ -86,7 +84,7 @@ async def test_onedrive_duplicate_content_skipped(

    prepared = await service.prepare_for_indexing([doc])
    assert len(prepared) == 1
-    await service.index(prepared[0], doc, llm=mocker.Mock())
+    await service.index(prepared[0], doc)

    result = await db_session.execute(
        select(Document).filter(Document.search_space_id == space_id)
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py
@ -33,7 +33,7 @@ async def test_new_document_is_persisted_with_pending_status(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_unchanged_ready_document_is_skipped(
    db_session,
@ -47,7 +47,7 @@ async def test_unchanged_ready_document_is_skipped(

    # Index fully so the document reaches ready state
    prepared = await service.prepare_for_indexing([doc])
-    await service.index(prepared[0], doc, llm=mocker.Mock())
+    await service.index(prepared[0], doc)

    # Same content on the next run — a ready document must be skipped
    results = await service.prepare_for_indexing([doc])
@ -56,7 +56,7 @@ async def test_unchanged_ready_document_is_skipped(


@pytest.mark.usefixtures(
-    "patched_summarize", "patched_embed_texts", "patched_chunk_text"
+"patched_embed_texts", "patched_chunk_text"
 )
 async def test_title_only_change_updates_title_in_db(
    db_session,
@ -72,7 +72,7 @@ async def test_title_only_change_updates_title_in_db(

    prepared = await service.prepare_for_indexing([original])
    document_id = prepared[0].id
-    await service.index(prepared[0], original, llm=mocker.Mock())
+    await service.index(prepared[0], original)

    renamed = make_connector_document(
        search_space_id=db_search_space.id, title="Updated Title"
@ -354,7 +354,7 @@ async def test_failed_document_with_unchanged_content_is_requeued(
    # First run: document is created and indexing crashes → status = failed
    prepared = await service.prepare_for_indexing([doc])
    document_id = prepared[0].id
-    await service.index(prepared[0], doc, llm=mocker.Mock())
+    await service.index(prepared[0], doc)

    result = await db_session.execute(
        select(Document).filter(Document.id == document_id)