Merge upstream/dev

This commit is contained in:
CREDO23 2026-06-05 19:18:12 +02:00
commit 8bdfd00a15
191 changed files with 3301 additions and 4079 deletions

View file

@ -142,7 +142,7 @@ async def test_generate_resume_defaults_to_one_page_target(monkeypatch) -> None:
llm = SimpleNamespace(ainvoke=AsyncMock(side_effect=_llm_invoke))
monkeypatch.setattr(
resume_tool,
"get_document_summary_llm",
"get_agent_llm",
AsyncMock(return_value=llm),
)
monkeypatch.setattr(resume_tool, "_compile_typst", lambda _source: b"pdf")
@ -171,7 +171,7 @@ async def test_generate_resume_compresses_when_over_limit(monkeypatch) -> None:
llm = SimpleNamespace(ainvoke=AsyncMock(side_effect=responses))
monkeypatch.setattr(
resume_tool,
"get_document_summary_llm",
"get_agent_llm",
AsyncMock(return_value=llm),
)
monkeypatch.setattr(resume_tool, "_compile_typst", lambda _source: b"pdf")
@ -206,7 +206,7 @@ async def test_generate_resume_returns_ready_when_target_not_met(monkeypatch) ->
llm = SimpleNamespace(ainvoke=AsyncMock(side_effect=responses))
monkeypatch.setattr(
resume_tool,
"get_document_summary_llm",
"get_agent_llm",
AsyncMock(return_value=llm),
)
monkeypatch.setattr(resume_tool, "_compile_typst", lambda _source: b"pdf")
@ -239,7 +239,7 @@ async def test_generate_resume_fails_when_hard_limit_exceeded(monkeypatch) -> No
llm = SimpleNamespace(ainvoke=AsyncMock(side_effect=responses))
monkeypatch.setattr(
resume_tool,
"get_document_summary_llm",
"get_agent_llm",
AsyncMock(return_value=llm),
)
monkeypatch.setattr(resume_tool, "_compile_typst", lambda _source: b"pdf")

View file

@ -71,7 +71,6 @@ async def test_build_connector_doc_produces_correct_fields():
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert doc.title == "Engineering Handbook"
@ -81,7 +80,6 @@ async def test_build_connector_doc_produces_correct_fields():
assert doc.search_space_id == _SEARCH_SPACE_ID
assert doc.connector_id == _CONNECTOR_ID
assert doc.created_by_id == _USER_ID
assert doc.should_summarize is True
assert doc.metadata["page_id"] == "abc-123"
assert doc.metadata["page_title"] == "Engineering Handbook"
assert doc.metadata["space_id"] == "ENG"
@ -89,21 +87,6 @@ async def test_build_connector_doc_produces_correct_fields():
assert doc.metadata["connector_id"] == _CONNECTOR_ID
assert doc.metadata["document_type"] == "Confluence Page"
assert doc.metadata["connector_type"] == "Confluence"
assert doc.fallback_summary is not None
assert "Engineering Handbook" in doc.fallback_summary
assert markdown in doc.fallback_summary
async def test_build_connector_doc_summary_disabled():
doc = _build_connector_doc(
_make_page(),
_to_markdown(_make_page()),
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=False,
)
assert doc.should_summarize is False
# ---------------------------------------------------------------------------
@ -111,10 +94,9 @@ async def test_build_connector_doc_summary_disabled():
# ---------------------------------------------------------------------------
def _mock_connector(enable_summary: bool = True):
def _mock_connector():
c = MagicMock()
c.config = {"access_token": "tok"}
c.enable_summary = enable_summary
c.last_indexed_at = None
return c

View file

@ -71,7 +71,6 @@ async def test_single_file_returns_one_connector_document(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert len(docs) == 1
@ -97,7 +96,6 @@ async def test_multiple_files_all_produce_documents(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert len(docs) == 3
@ -125,7 +123,6 @@ async def test_one_download_exception_does_not_block_others(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert len(docs) == 2
@ -152,7 +149,6 @@ async def test_etl_error_counts_as_download_failure(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert len(docs) == 1
@ -191,7 +187,6 @@ async def test_concurrency_bounded_by_semaphore(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
max_concurrency=2,
)
@ -231,7 +226,6 @@ async def test_heartbeat_fires_during_parallel_downloads(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
on_heartbeat=_on_heartbeat,
)
@ -324,7 +318,6 @@ async def _run_full_scan(mocks, monkeypatch, page_files, *, max_files=500):
mocks["task_logger"],
mocks["log_entry"],
max_files,
enable_summary=True,
)
@ -434,7 +427,6 @@ async def _run_selected(mocks, file_tuples):
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
@ -569,7 +561,6 @@ async def test_delta_sync_deletions_call_remove_document(monkeypatch):
mock_task_logger,
MagicMock(),
max_files=500,
enable_summary=True,
)
assert sorted(remove_calls) == ["id:del1", "id:del2"]
@ -608,7 +599,6 @@ async def test_delta_sync_upserts_filtered_and_downloaded(monkeypatch):
mock_task_logger,
MagicMock(),
max_files=500,
enable_summary=True,
)
assert indexed == 2
@ -670,7 +660,6 @@ async def test_delta_sync_mix_deletions_and_upserts(monkeypatch):
mock_task_logger,
MagicMock(),
max_files=500,
enable_summary=True,
)
assert sorted(remove_calls) == ["id:del1", "id:del2"]
@ -704,7 +693,6 @@ async def test_delta_sync_returns_new_cursor(monkeypatch):
mock_task_logger,
MagicMock(),
max_files=500,
enable_summary=True,
)
assert cursor == "brand-new-cursor-xyz"
@ -725,7 +713,7 @@ def orchestrator_mocks(monkeypatch):
mock_connector = MagicMock()
mock_connector.config = {"_token_encrypted": False}
mock_connector.last_indexed_at = None
mock_connector.enable_summary = True
mock_connector.enable_vision_llm = True
monkeypatch.setattr(
_mod,

View file

@ -66,7 +66,6 @@ async def test_single_file_returns_one_connector_document(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert len(docs) == 1
@ -91,7 +90,6 @@ async def test_multiple_files_all_produce_documents(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert len(docs) == 3
@ -119,7 +117,6 @@ async def test_one_download_exception_does_not_block_others(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert len(docs) == 2
@ -146,7 +143,6 @@ async def test_etl_error_counts_as_download_failure(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert len(docs) == 1
@ -186,7 +182,6 @@ async def test_concurrency_bounded_by_semaphore(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
max_concurrency=2,
)
@ -226,7 +221,6 @@ async def test_heartbeat_fires_during_parallel_downloads(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
on_heartbeat=_on_heartbeat,
)
@ -300,12 +294,6 @@ def full_scan_mocks(mock_drive_client, monkeypatch):
MagicMock(return_value=pipeline_mock),
)
monkeypatch.setattr(
_mod,
"get_user_long_context_llm",
AsyncMock(return_value=MagicMock()),
)
return {
"drive_client": mock_drive_client,
"session": mock_session,
@ -333,7 +321,6 @@ async def _run_full_scan(mocks, *, max_files=500, include_subfolders=False):
mocks["log_entry"],
max_files,
include_subfolders=include_subfolders,
enable_summary=True,
)
@ -487,12 +474,6 @@ async def test_delta_sync_removals_serial_rest_parallel(monkeypatch):
"IndexingPipelineService",
MagicMock(return_value=pipeline_mock),
)
monkeypatch.setattr(
_mod,
"get_user_long_context_llm",
AsyncMock(return_value=MagicMock()),
)
mock_session, _ = _make_page_limit_session()
mock_task_logger = MagicMock()
mock_task_logger.log_task_progress = AsyncMock()
@ -509,7 +490,6 @@ async def test_delta_sync_removals_serial_rest_parallel(monkeypatch):
mock_task_logger,
MagicMock(),
max_files=500,
enable_summary=True,
)
assert sorted(remove_calls) == ["del1", "del2", "trash1"]
@ -577,7 +557,6 @@ async def _run_selected(mocks, file_ids):
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)

View file

@ -70,7 +70,6 @@ async def test_build_connector_doc_produces_correct_fields():
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert doc.title == "ENG-42: Fix login bug"
@ -80,7 +79,6 @@ async def test_build_connector_doc_produces_correct_fields():
assert doc.search_space_id == _SEARCH_SPACE_ID
assert doc.connector_id == _CONNECTOR_ID
assert doc.created_by_id == _USER_ID
assert doc.should_summarize is True
assert doc.metadata["issue_id"] == "abc-123"
assert doc.metadata["issue_identifier"] == "ENG-42"
assert doc.metadata["issue_title"] == "Fix login bug"
@ -90,24 +88,6 @@ async def test_build_connector_doc_produces_correct_fields():
assert doc.metadata["connector_id"] == _CONNECTOR_ID
assert doc.metadata["document_type"] == "Linear Issue"
assert doc.metadata["connector_type"] == "Linear"
assert doc.fallback_summary is not None
assert "ENG-42" in doc.fallback_summary
assert markdown in doc.fallback_summary
async def test_build_connector_doc_summary_disabled():
"""When enable_summary is False, should_summarize is False."""
doc = _build_connector_doc(
_make_issue(),
_make_formatted_issue(),
"# content",
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=False,
)
assert doc.should_summarize is False
# ---------------------------------------------------------------------------
@ -115,10 +95,9 @@ async def test_build_connector_doc_summary_disabled():
# ---------------------------------------------------------------------------
def _mock_connector(enable_summary: bool = True):
def _mock_connector():
c = MagicMock()
c.config = {"access_token": "tok"}
c.enable_summary = enable_summary
c.last_indexed_at = None
return c

View file

@ -41,7 +41,6 @@ async def test_build_connector_doc_produces_correct_fields():
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert doc.title == "My Notion Page"
@ -51,29 +50,11 @@ async def test_build_connector_doc_produces_correct_fields():
assert doc.search_space_id == _SEARCH_SPACE_ID
assert doc.connector_id == _CONNECTOR_ID
assert doc.created_by_id == _USER_ID
assert doc.should_summarize is True
assert doc.metadata["page_title"] == "My Notion Page"
assert doc.metadata["page_id"] == "abc-123"
assert doc.metadata["connector_id"] == _CONNECTOR_ID
assert doc.metadata["document_type"] == "Notion Page"
assert doc.metadata["connector_type"] == "Notion"
assert doc.fallback_summary is not None
assert "My Notion Page" in doc.fallback_summary
assert markdown in doc.fallback_summary
async def test_build_connector_doc_summary_disabled():
"""When enable_summary is False, should_summarize is False."""
doc = _build_connector_doc(
_make_page(),
"# content",
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=False,
)
assert doc.should_summarize is False
# ---------------------------------------------------------------------------
@ -81,10 +62,9 @@ async def test_build_connector_doc_summary_disabled():
# ---------------------------------------------------------------------------
def _mock_connector(enable_summary: bool = True):
def _mock_connector():
c = MagicMock()
c.config = {"access_token": "tok"}
c.enable_summary = enable_summary
c.last_indexed_at = None
return c

View file

@ -65,7 +65,6 @@ async def test_single_file_returns_one_connector_document(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert len(docs) == 1
@ -91,7 +90,6 @@ async def test_multiple_files_all_produce_documents(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert len(docs) == 3
@ -119,7 +117,6 @@ async def test_one_download_exception_does_not_block_others(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert len(docs) == 2
@ -146,7 +143,6 @@ async def test_etl_error_counts_as_download_failure(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
assert len(docs) == 1
@ -185,7 +181,6 @@ async def test_concurrency_bounded_by_semaphore(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
max_concurrency=2,
)
@ -225,7 +220,6 @@ async def test_heartbeat_fires_during_parallel_downloads(
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
on_heartbeat=_on_heartbeat,
)

View file

@ -180,7 +180,6 @@ async def _run_gdrive_selected(mocks, file_ids):
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
@ -336,10 +335,6 @@ def gdrive_full_scan_mocks(monkeypatch):
monkeypatch.setattr(
_mod, "IndexingPipelineService", MagicMock(return_value=pipeline_mock)
)
monkeypatch.setattr(
_mod, "get_user_long_context_llm", AsyncMock(return_value=MagicMock())
)
return {
"mod": _mod,
"session": session,
@ -366,7 +361,6 @@ async def _run_gdrive_full_scan(mocks, max_files=500):
MagicMock(),
max_files,
include_subfolders=False,
enable_summary=True,
)
@ -454,10 +448,6 @@ async def test_gdrive_delta_sync_skips_over_quota(monkeypatch):
monkeypatch.setattr(
_mod, "IndexingPipelineService", MagicMock(return_value=pipeline_mock)
)
monkeypatch.setattr(
_mod, "get_user_long_context_llm", AsyncMock(return_value=MagicMock())
)
mock_task_logger = MagicMock()
mock_task_logger.log_task_progress = AsyncMock()
@ -473,7 +463,6 @@ async def test_gdrive_delta_sync_skips_over_quota(monkeypatch):
mock_task_logger,
MagicMock(),
max_files=500,
enable_summary=True,
)
call_files = download_mock.call_args[0][1]
@ -539,7 +528,6 @@ async def _run_onedrive_selected(mocks, file_ids):
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)
@ -641,7 +629,6 @@ async def _run_dropbox_selected(mocks, file_paths):
connector_id=_CONNECTOR_ID,
search_space_id=_SEARCH_SPACE_ID,
user_id=_USER_ID,
enable_summary=True,
)

View file

@ -92,6 +92,13 @@ def _signed_slack_request(payload: dict, *, secret: str = "signing-secret") -> R
)
def _enable_slack_gateway(monkeypatch):
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_ENABLED", True)
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_CLIENT_ID", "client-id")
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_CLIENT_SECRET", "client-secret")
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret")
async def _call_webhook(*, request: RequestStub, account_id: int, session):
return await routes.telegram_webhook(
request=request,
@ -230,7 +237,7 @@ def test_verify_slack_signature_accepts_valid_signature():
@pytest.mark.asyncio
async def test_slack_webhook_url_verification(monkeypatch, mocker):
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret")
_enable_slack_gateway(monkeypatch)
request = _signed_slack_request({"type": "url_verification", "challenge": "abc123"})
response = await routes.slack_webhook(request=request, session=mocker.AsyncMock())
@ -241,7 +248,7 @@ async def test_slack_webhook_url_verification(monkeypatch, mocker):
@pytest.mark.asyncio
async def test_slack_webhook_persists_event(monkeypatch, mocker):
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret")
_enable_slack_gateway(monkeypatch)
session = mocker.AsyncMock()
monkeypatch.setattr(routes, "get_slack_account_by_team", mocker.AsyncMock(return_value=_slack_account()))
persist = mocker.AsyncMock(return_value=100)
@ -271,7 +278,7 @@ async def test_slack_webhook_persists_event(monkeypatch, mocker):
@pytest.mark.asyncio
async def test_slack_webhook_ignores_self_event(monkeypatch, mocker):
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret")
_enable_slack_gateway(monkeypatch)
session = mocker.AsyncMock()
monkeypatch.setattr(routes, "get_slack_account_by_team", mocker.AsyncMock(return_value=_slack_account()))
persist = mocker.AsyncMock(return_value=100)

View file

@ -18,7 +18,6 @@ def test_valid_document_created_with_required_fields():
connector_id=42,
created_by_id="00000000-0000-0000-0000-000000000001",
)
assert doc.should_summarize is True
assert doc.should_use_code_chunker is False
assert doc.metadata == {}
assert doc.connector_id == 42

View file

@ -1,41 +0,0 @@
from unittest.mock import MagicMock
import pytest
from app.indexing_pipeline.document_summarizer import summarize_document
pytestmark = pytest.mark.unit
@pytest.mark.usefixtures("patched_summarizer_chain")
async def test_without_metadata_returns_raw_summary():
"""Summarizer returns the LLM output directly when no metadata is provided."""
result = await summarize_document("# Content", llm=MagicMock(model="gpt-4"))
assert result == "The summary."
@pytest.mark.usefixtures("patched_summarizer_chain")
async def test_with_metadata_includes_metadata_values_in_output():
"""Non-empty metadata values are prepended to the summary output."""
result = await summarize_document(
"# Content",
llm=MagicMock(model="gpt-4"),
metadata={"author": "Alice", "source": "Notion"},
)
assert "Alice" in result
assert "Notion" in result
@pytest.mark.usefixtures("patched_summarizer_chain")
async def test_with_metadata_omits_empty_fields_from_output():
"""Empty metadata fields are omitted from the summary output."""
result = await summarize_document(
"# Content",
llm=MagicMock(model="gpt-4"),
metadata={"author": "Alice", "description": ""},
)
assert "Alice" in result
assert "description" not in result.lower()

View file

@ -37,12 +37,10 @@ async def test_calls_prepare_then_index_per_document(pipeline, make_connector_do
orm2 = MagicMock(spec=Document)
orm2.unique_identifier_hash = compute_unique_identifier_hash(doc2)
mock_llm = MagicMock()
pipeline.prepare_for_indexing = AsyncMock(return_value=[orm1, orm2])
pipeline.index = AsyncMock(side_effect=lambda doc, cdoc, llm: doc)
pipeline.index = AsyncMock(side_effect=lambda doc, cdoc: doc)
results = await pipeline.index_batch([doc1, doc2], mock_llm)
results = await pipeline.index_batch([doc1, doc2])
pipeline.prepare_for_indexing.assert_awaited_once_with([doc1, doc2])
assert pipeline.index.await_count == 2
@ -53,7 +51,7 @@ async def test_empty_input_returns_empty(pipeline):
"""Empty connector_docs list returns empty result."""
pipeline.prepare_for_indexing = AsyncMock(return_value=[])
results = await pipeline.index_batch([], MagicMock())
results = await pipeline.index_batch([])
assert results == []
@ -74,7 +72,7 @@ async def test_skips_document_without_matching_connector_doc(
pipeline.prepare_for_indexing = AsyncMock(return_value=[orphan_orm])
pipeline.index = AsyncMock()
results = await pipeline.index_batch([doc1], MagicMock())
results = await pipeline.index_batch([doc1])
pipeline.index.assert_not_awaited()
assert results == []

View file

@ -51,11 +51,6 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
return await original_to_thread(func, *args, **kwargs)
monkeypatch.setattr(asyncio, "to_thread", tracking_to_thread)
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
AsyncMock(return_value="Summary."),
)
mock_chunk_hybrid = MagicMock(return_value=["chunk1"])
mock_chunk_hybrid.__name__ = "chunk_text_hybrid"
monkeypatch.setattr(
@ -85,7 +80,7 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
document.id = 1
document.status = DocumentStatus.pending()
await pipeline.index(document, connector_doc, llm=MagicMock())
await pipeline.index(document, connector_doc)
# Either chunker entry point satisfies the "chunking runs off the event
# loop" contract this test guards. Routing between the two is verified
@ -104,10 +99,6 @@ async def test_non_code_documents_use_hybrid_chunker(
mid-row. Only documents flagged with ``should_use_code_chunker=True``
should take the ``chunk_text`` path.
"""
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
AsyncMock(return_value="Summary."),
)
mock_chunk_hybrid = MagicMock(return_value=["chunk1"])
mock_chunk_hybrid.__name__ = "chunk_text_hybrid"
monkeypatch.setattr(
@ -139,7 +130,7 @@ async def test_non_code_documents_use_hybrid_chunker(
document.id = 1
document.status = DocumentStatus.pending()
await pipeline.index(document, connector_doc, llm=MagicMock())
await pipeline.index(document, connector_doc)
mock_chunk_hybrid.assert_called_once()
mock_chunk_code.assert_not_called()
@ -192,19 +183,14 @@ async def test_batch_parallel_indexes_all_documents(
index_calls = []
async def fake_index(self, document, connector_doc, llm):
async def fake_index(self, document, connector_doc):
index_calls.append(document.id)
document.status = DocumentStatus.ready()
return document
monkeypatch.setattr(IndexingPipelineService, "index", fake_index)
async def mock_get_llm(session):
return MagicMock()
_, indexed, failed = await pipeline.index_batch_parallel(
docs, mock_get_llm, max_concurrency=2
)
_, indexed, failed = await pipeline.index_batch_parallel(docs, max_concurrency=2)
assert indexed == 3
assert failed == 0
@ -233,20 +219,15 @@ async def test_batch_parallel_one_failure_does_not_affect_others(
_mock_session_factory(orm_by_id),
)
async def failing_index(self, document, connector_doc, llm):
async def failing_index(self, document, connector_doc):
if document.id == 2:
raise RuntimeError("LLM exploded")
raise RuntimeError("Indexing exploded")
document.status = DocumentStatus.ready()
return document
monkeypatch.setattr(IndexingPipelineService, "index", failing_index)
async def mock_get_llm(session):
return MagicMock()
_, indexed, failed = await pipeline.index_batch_parallel(
docs, mock_get_llm, max_concurrency=4
)
_, indexed, failed = await pipeline.index_batch_parallel(docs, max_concurrency=4)
assert indexed == 2
assert failed == 1