mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
feat(tests): Update tests for summary-free indexing
This commit is contained in:
parent
dc6a17930b
commit
ddfe60c2f0
26 changed files with 123 additions and 294 deletions
|
|
@ -101,7 +101,7 @@ async def test_generate_resume_defaults_to_one_page_target(monkeypatch) -> None:
|
|||
llm = SimpleNamespace(ainvoke=AsyncMock(side_effect=_llm_invoke))
|
||||
monkeypatch.setattr(
|
||||
resume_tool,
|
||||
"get_document_summary_llm",
|
||||
"get_agent_llm",
|
||||
AsyncMock(return_value=llm),
|
||||
)
|
||||
monkeypatch.setattr(resume_tool, "_compile_typst", lambda _source: b"pdf")
|
||||
|
|
@ -130,7 +130,7 @@ async def test_generate_resume_compresses_when_over_limit(monkeypatch) -> None:
|
|||
llm = SimpleNamespace(ainvoke=AsyncMock(side_effect=responses))
|
||||
monkeypatch.setattr(
|
||||
resume_tool,
|
||||
"get_document_summary_llm",
|
||||
"get_agent_llm",
|
||||
AsyncMock(return_value=llm),
|
||||
)
|
||||
monkeypatch.setattr(resume_tool, "_compile_typst", lambda _source: b"pdf")
|
||||
|
|
@ -165,7 +165,7 @@ async def test_generate_resume_returns_ready_when_target_not_met(monkeypatch) ->
|
|||
llm = SimpleNamespace(ainvoke=AsyncMock(side_effect=responses))
|
||||
monkeypatch.setattr(
|
||||
resume_tool,
|
||||
"get_document_summary_llm",
|
||||
"get_agent_llm",
|
||||
AsyncMock(return_value=llm),
|
||||
)
|
||||
monkeypatch.setattr(resume_tool, "_compile_typst", lambda _source: b"pdf")
|
||||
|
|
@ -198,7 +198,7 @@ async def test_generate_resume_fails_when_hard_limit_exceeded(monkeypatch) -> No
|
|||
llm = SimpleNamespace(ainvoke=AsyncMock(side_effect=responses))
|
||||
monkeypatch.setattr(
|
||||
resume_tool,
|
||||
"get_document_summary_llm",
|
||||
"get_agent_llm",
|
||||
AsyncMock(return_value=llm),
|
||||
)
|
||||
monkeypatch.setattr(resume_tool, "_compile_typst", lambda _source: b"pdf")
|
||||
|
|
|
|||
|
|
@ -71,7 +71,6 @@ async def test_build_connector_doc_produces_correct_fields():
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert doc.title == "Engineering Handbook"
|
||||
|
|
@ -81,7 +80,6 @@ async def test_build_connector_doc_produces_correct_fields():
|
|||
assert doc.search_space_id == _SEARCH_SPACE_ID
|
||||
assert doc.connector_id == _CONNECTOR_ID
|
||||
assert doc.created_by_id == _USER_ID
|
||||
assert doc.should_summarize is True
|
||||
assert doc.metadata["page_id"] == "abc-123"
|
||||
assert doc.metadata["page_title"] == "Engineering Handbook"
|
||||
assert doc.metadata["space_id"] == "ENG"
|
||||
|
|
@ -89,9 +87,8 @@ async def test_build_connector_doc_produces_correct_fields():
|
|||
assert doc.metadata["connector_id"] == _CONNECTOR_ID
|
||||
assert doc.metadata["document_type"] == "Confluence Page"
|
||||
assert doc.metadata["connector_type"] == "Confluence"
|
||||
assert doc.fallback_summary is not None
|
||||
assert "Engineering Handbook" in doc.fallback_summary
|
||||
assert markdown in doc.fallback_summary
|
||||
assert "Engineering Handbook" in doc.deterministic_preview
|
||||
assert markdown in doc.deterministic_preview
|
||||
|
||||
|
||||
async def test_build_connector_doc_summary_disabled():
|
||||
|
|
@ -101,9 +98,7 @@ async def test_build_connector_doc_summary_disabled():
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=False,
|
||||
)
|
||||
assert doc.should_summarize is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -111,10 +106,9 @@ async def test_build_connector_doc_summary_disabled():
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _mock_connector(enable_summary: bool = True):
|
||||
def _mock_connector():
|
||||
c = MagicMock()
|
||||
c.config = {"access_token": "tok"}
|
||||
c.enable_summary = enable_summary
|
||||
c.last_indexed_at = None
|
||||
return c
|
||||
|
||||
|
|
|
|||
|
|
@ -71,7 +71,6 @@ async def test_single_file_returns_one_connector_document(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert len(docs) == 1
|
||||
|
|
@ -97,7 +96,6 @@ async def test_multiple_files_all_produce_documents(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert len(docs) == 3
|
||||
|
|
@ -125,7 +123,6 @@ async def test_one_download_exception_does_not_block_others(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert len(docs) == 2
|
||||
|
|
@ -152,7 +149,6 @@ async def test_etl_error_counts_as_download_failure(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert len(docs) == 1
|
||||
|
|
@ -191,7 +187,6 @@ async def test_concurrency_bounded_by_semaphore(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
max_concurrency=2,
|
||||
)
|
||||
|
||||
|
|
@ -231,7 +226,6 @@ async def test_heartbeat_fires_during_parallel_downloads(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
on_heartbeat=_on_heartbeat,
|
||||
)
|
||||
|
||||
|
|
@ -324,7 +318,6 @@ async def _run_full_scan(mocks, monkeypatch, page_files, *, max_files=500):
|
|||
mocks["task_logger"],
|
||||
mocks["log_entry"],
|
||||
max_files,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -434,7 +427,6 @@ async def _run_selected(mocks, file_tuples):
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -569,7 +561,6 @@ async def test_delta_sync_deletions_call_remove_document(monkeypatch):
|
|||
mock_task_logger,
|
||||
MagicMock(),
|
||||
max_files=500,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert sorted(remove_calls) == ["id:del1", "id:del2"]
|
||||
|
|
@ -608,7 +599,6 @@ async def test_delta_sync_upserts_filtered_and_downloaded(monkeypatch):
|
|||
mock_task_logger,
|
||||
MagicMock(),
|
||||
max_files=500,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert indexed == 2
|
||||
|
|
@ -670,7 +660,6 @@ async def test_delta_sync_mix_deletions_and_upserts(monkeypatch):
|
|||
mock_task_logger,
|
||||
MagicMock(),
|
||||
max_files=500,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert sorted(remove_calls) == ["id:del1", "id:del2"]
|
||||
|
|
@ -704,7 +693,6 @@ async def test_delta_sync_returns_new_cursor(monkeypatch):
|
|||
mock_task_logger,
|
||||
MagicMock(),
|
||||
max_files=500,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert cursor == "brand-new-cursor-xyz"
|
||||
|
|
@ -725,7 +713,7 @@ def orchestrator_mocks(monkeypatch):
|
|||
mock_connector = MagicMock()
|
||||
mock_connector.config = {"_token_encrypted": False}
|
||||
mock_connector.last_indexed_at = None
|
||||
mock_connector.enable_summary = True
|
||||
mock_connector.enable_vision_llm = True
|
||||
|
||||
monkeypatch.setattr(
|
||||
_mod,
|
||||
|
|
|
|||
|
|
@ -66,7 +66,6 @@ async def test_single_file_returns_one_connector_document(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert len(docs) == 1
|
||||
|
|
@ -91,7 +90,6 @@ async def test_multiple_files_all_produce_documents(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert len(docs) == 3
|
||||
|
|
@ -119,7 +117,6 @@ async def test_one_download_exception_does_not_block_others(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert len(docs) == 2
|
||||
|
|
@ -146,7 +143,6 @@ async def test_etl_error_counts_as_download_failure(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert len(docs) == 1
|
||||
|
|
@ -186,7 +182,6 @@ async def test_concurrency_bounded_by_semaphore(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
max_concurrency=2,
|
||||
)
|
||||
|
||||
|
|
@ -226,7 +221,6 @@ async def test_heartbeat_fires_during_parallel_downloads(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
on_heartbeat=_on_heartbeat,
|
||||
)
|
||||
|
||||
|
|
@ -302,7 +296,7 @@ def full_scan_mocks(mock_drive_client, monkeypatch):
|
|||
|
||||
monkeypatch.setattr(
|
||||
_mod,
|
||||
"get_user_long_context_llm",
|
||||
"get_agent_llm",
|
||||
AsyncMock(return_value=MagicMock()),
|
||||
)
|
||||
|
||||
|
|
@ -333,7 +327,6 @@ async def _run_full_scan(mocks, *, max_files=500, include_subfolders=False):
|
|||
mocks["log_entry"],
|
||||
max_files,
|
||||
include_subfolders=include_subfolders,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -489,7 +482,7 @@ async def test_delta_sync_removals_serial_rest_parallel(monkeypatch):
|
|||
)
|
||||
monkeypatch.setattr(
|
||||
_mod,
|
||||
"get_user_long_context_llm",
|
||||
"get_agent_llm",
|
||||
AsyncMock(return_value=MagicMock()),
|
||||
)
|
||||
|
||||
|
|
@ -509,7 +502,6 @@ async def test_delta_sync_removals_serial_rest_parallel(monkeypatch):
|
|||
mock_task_logger,
|
||||
MagicMock(),
|
||||
max_files=500,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert sorted(remove_calls) == ["del1", "del2", "trash1"]
|
||||
|
|
@ -577,7 +569,6 @@ async def _run_selected(mocks, file_ids):
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -70,7 +70,6 @@ async def test_build_connector_doc_produces_correct_fields():
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert doc.title == "ENG-42: Fix login bug"
|
||||
|
|
@ -80,7 +79,6 @@ async def test_build_connector_doc_produces_correct_fields():
|
|||
assert doc.search_space_id == _SEARCH_SPACE_ID
|
||||
assert doc.connector_id == _CONNECTOR_ID
|
||||
assert doc.created_by_id == _USER_ID
|
||||
assert doc.should_summarize is True
|
||||
assert doc.metadata["issue_id"] == "abc-123"
|
||||
assert doc.metadata["issue_identifier"] == "ENG-42"
|
||||
assert doc.metadata["issue_title"] == "Fix login bug"
|
||||
|
|
@ -90,13 +88,12 @@ async def test_build_connector_doc_produces_correct_fields():
|
|||
assert doc.metadata["connector_id"] == _CONNECTOR_ID
|
||||
assert doc.metadata["document_type"] == "Linear Issue"
|
||||
assert doc.metadata["connector_type"] == "Linear"
|
||||
assert doc.fallback_summary is not None
|
||||
assert "ENG-42" in doc.fallback_summary
|
||||
assert markdown in doc.fallback_summary
|
||||
assert "ENG-42" in doc.deterministic_preview
|
||||
assert markdown in doc.deterministic_preview
|
||||
|
||||
|
||||
async def test_build_connector_doc_summary_disabled():
|
||||
"""When enable_summary is False, should_summarize is False."""
|
||||
"""When enable_vision_llm is False, deterministic_content is False."""
|
||||
doc = _build_connector_doc(
|
||||
_make_issue(),
|
||||
_make_formatted_issue(),
|
||||
|
|
@ -104,21 +101,17 @@ async def test_build_connector_doc_summary_disabled():
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=False,
|
||||
)
|
||||
|
||||
assert doc.should_summarize is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared fixtures for Slices 2-6
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _mock_connector(enable_summary: bool = True):
|
||||
def _mock_connector():
|
||||
c = MagicMock()
|
||||
c.config = {"access_token": "tok"}
|
||||
c.enable_summary = enable_summary
|
||||
c.last_indexed_at = None
|
||||
return c
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ async def test_build_connector_doc_produces_correct_fields():
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert doc.title == "My Notion Page"
|
||||
|
|
@ -51,40 +50,34 @@ async def test_build_connector_doc_produces_correct_fields():
|
|||
assert doc.search_space_id == _SEARCH_SPACE_ID
|
||||
assert doc.connector_id == _CONNECTOR_ID
|
||||
assert doc.created_by_id == _USER_ID
|
||||
assert doc.should_summarize is True
|
||||
assert doc.metadata["page_title"] == "My Notion Page"
|
||||
assert doc.metadata["page_id"] == "abc-123"
|
||||
assert doc.metadata["connector_id"] == _CONNECTOR_ID
|
||||
assert doc.metadata["document_type"] == "Notion Page"
|
||||
assert doc.metadata["connector_type"] == "Notion"
|
||||
assert doc.fallback_summary is not None
|
||||
assert "My Notion Page" in doc.fallback_summary
|
||||
assert markdown in doc.fallback_summary
|
||||
assert "My Notion Page" in doc.deterministic_preview
|
||||
assert markdown in doc.deterministic_preview
|
||||
|
||||
|
||||
async def test_build_connector_doc_summary_disabled():
|
||||
"""When enable_summary is False, should_summarize is False."""
|
||||
"""When enable_vision_llm is False, deterministic_content is False."""
|
||||
doc = _build_connector_doc(
|
||||
_make_page(),
|
||||
"# content",
|
||||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=False,
|
||||
)
|
||||
|
||||
assert doc.should_summarize is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared fixtures for Slices 2-7 (full index_notion_pages tests)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _mock_connector(enable_summary: bool = True):
|
||||
def _mock_connector():
|
||||
c = MagicMock()
|
||||
c.config = {"access_token": "tok"}
|
||||
c.enable_summary = enable_summary
|
||||
c.last_indexed_at = None
|
||||
return c
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,6 @@ async def test_single_file_returns_one_connector_document(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert len(docs) == 1
|
||||
|
|
@ -91,7 +90,6 @@ async def test_multiple_files_all_produce_documents(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert len(docs) == 3
|
||||
|
|
@ -119,7 +117,6 @@ async def test_one_download_exception_does_not_block_others(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert len(docs) == 2
|
||||
|
|
@ -146,7 +143,6 @@ async def test_etl_error_counts_as_download_failure(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert len(docs) == 1
|
||||
|
|
@ -185,7 +181,6 @@ async def test_concurrency_bounded_by_semaphore(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
max_concurrency=2,
|
||||
)
|
||||
|
||||
|
|
@ -225,7 +220,6 @@ async def test_heartbeat_fires_during_parallel_downloads(
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
on_heartbeat=_on_heartbeat,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -180,7 +180,6 @@ async def _run_gdrive_selected(mocks, file_ids):
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -337,7 +336,7 @@ def gdrive_full_scan_mocks(monkeypatch):
|
|||
_mod, "IndexingPipelineService", MagicMock(return_value=pipeline_mock)
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
_mod, "get_user_long_context_llm", AsyncMock(return_value=MagicMock())
|
||||
_mod, "get_agent_llm", AsyncMock(return_value=MagicMock())
|
||||
)
|
||||
|
||||
return {
|
||||
|
|
@ -366,7 +365,6 @@ async def _run_gdrive_full_scan(mocks, max_files=500):
|
|||
MagicMock(),
|
||||
max_files,
|
||||
include_subfolders=False,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -455,7 +453,7 @@ async def test_gdrive_delta_sync_skips_over_quota(monkeypatch):
|
|||
_mod, "IndexingPipelineService", MagicMock(return_value=pipeline_mock)
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
_mod, "get_user_long_context_llm", AsyncMock(return_value=MagicMock())
|
||||
_mod, "get_agent_llm", AsyncMock(return_value=MagicMock())
|
||||
)
|
||||
|
||||
mock_task_logger = MagicMock()
|
||||
|
|
@ -473,7 +471,6 @@ async def test_gdrive_delta_sync_skips_over_quota(monkeypatch):
|
|||
mock_task_logger,
|
||||
MagicMock(),
|
||||
max_files=500,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
call_files = download_mock.call_args[0][1]
|
||||
|
|
@ -539,7 +536,6 @@ async def _run_onedrive_selected(mocks, file_ids):
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -641,7 +637,6 @@ async def _run_dropbox_selected(mocks, file_paths):
|
|||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ def test_valid_document_created_with_required_fields():
|
|||
connector_id=42,
|
||||
created_by_id="00000000-0000-0000-0000-000000000001",
|
||||
)
|
||||
assert doc.should_summarize is True
|
||||
assert doc.should_use_code_chunker is False
|
||||
assert doc.metadata == {}
|
||||
assert doc.connector_id == 42
|
||||
|
|
|
|||
|
|
@ -1,41 +0,0 @@
|
|||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from app.indexing_pipeline.document_summarizer import summarize_document
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("patched_summarizer_chain")
|
||||
async def test_without_metadata_returns_raw_summary():
|
||||
"""Summarizer returns the LLM output directly when no metadata is provided."""
|
||||
result = await summarize_document("# Content", llm=MagicMock(model="gpt-4"))
|
||||
|
||||
assert result == "The summary."
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("patched_summarizer_chain")
|
||||
async def test_with_metadata_includes_metadata_values_in_output():
|
||||
"""Non-empty metadata values are prepended to the summary output."""
|
||||
result = await summarize_document(
|
||||
"# Content",
|
||||
llm=MagicMock(model="gpt-4"),
|
||||
metadata={"author": "Alice", "source": "Notion"},
|
||||
)
|
||||
|
||||
assert "Alice" in result
|
||||
assert "Notion" in result
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("patched_summarizer_chain")
|
||||
async def test_with_metadata_omits_empty_fields_from_output():
|
||||
"""Empty metadata fields are omitted from the summary output."""
|
||||
result = await summarize_document(
|
||||
"# Content",
|
||||
llm=MagicMock(model="gpt-4"),
|
||||
metadata={"author": "Alice", "description": ""},
|
||||
)
|
||||
|
||||
assert "Alice" in result
|
||||
assert "description" not in result.lower()
|
||||
|
|
@ -51,11 +51,6 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
|
|||
return await original_to_thread(func, *args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(asyncio, "to_thread", tracking_to_thread)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
|
||||
AsyncMock(return_value="Summary."),
|
||||
)
|
||||
mock_chunk_hybrid = MagicMock(return_value=["chunk1"])
|
||||
mock_chunk_hybrid.__name__ = "chunk_text_hybrid"
|
||||
monkeypatch.setattr(
|
||||
|
|
@ -85,7 +80,7 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
|
|||
document.id = 1
|
||||
document.status = DocumentStatus.pending()
|
||||
|
||||
await pipeline.index(document, connector_doc, llm=MagicMock())
|
||||
await pipeline.index(document, connector_doc)
|
||||
|
||||
# Either chunker entry point satisfies the "chunking runs off the event
|
||||
# loop" contract this test guards. Routing between the two is verified
|
||||
|
|
@ -104,10 +99,6 @@ async def test_non_code_documents_use_hybrid_chunker(
|
|||
mid-row. Only documents flagged with ``should_use_code_chunker=True``
|
||||
should take the ``chunk_text`` path.
|
||||
"""
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
|
||||
AsyncMock(return_value="Summary."),
|
||||
)
|
||||
mock_chunk_hybrid = MagicMock(return_value=["chunk1"])
|
||||
mock_chunk_hybrid.__name__ = "chunk_text_hybrid"
|
||||
monkeypatch.setattr(
|
||||
|
|
@ -139,7 +130,7 @@ async def test_non_code_documents_use_hybrid_chunker(
|
|||
document.id = 1
|
||||
document.status = DocumentStatus.pending()
|
||||
|
||||
await pipeline.index(document, connector_doc, llm=MagicMock())
|
||||
await pipeline.index(document, connector_doc)
|
||||
|
||||
mock_chunk_hybrid.assert_called_once()
|
||||
mock_chunk_code.assert_not_called()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue