mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 17:26:23 +02:00
Merge pull request #865 from CREDO23/sur-182-fix-ux-experience-for-composio-google-drive-connector
[Perf] Batch embedding, non-blocking search, chunks index & Google Drive UX fix
This commit is contained in:
commit
547077e5b9
17 changed files with 183 additions and 108 deletions
|
|
@ -129,10 +129,12 @@ def patched_summarize_raises(monkeypatch) -> AsyncMock:
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_embed_text(monkeypatch) -> MagicMock:
|
||||
mock = MagicMock(return_value=[0.1] * _EMBEDDING_DIM)
|
||||
def patched_embed_texts(monkeypatch) -> MagicMock:
|
||||
mock = MagicMock(
|
||||
side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.embed_text",
|
||||
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
|
||||
mock,
|
||||
)
|
||||
return mock
|
||||
|
|
|
|||
|
|
@ -265,8 +265,8 @@ def _mock_external_apis(monkeypatch):
|
|||
AsyncMock(return_value="Mocked summary."),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.embed_text",
|
||||
MagicMock(return_value=[0.1] * _EMBEDDING_DIM),
|
||||
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
|
||||
MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ pytestmark = pytest.mark.integration
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):
|
||||
"""Document status is READY after successful indexing."""
|
||||
|
|
@ -31,7 +31,7 @@ async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
|
||||
"""Document content is set to the LLM-generated summary."""
|
||||
|
|
@ -55,7 +55,7 @@ async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker):
|
||||
"""Chunks derived from the source markdown are persisted in the DB."""
|
||||
|
|
@ -84,7 +84,7 @@ async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize_raises", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, mocker):
|
||||
"""RuntimeError is raised when the indexing step fails so the caller can fire a failure notification."""
|
||||
|
|
@ -107,7 +107,7 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user,
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_reindex_updates_content(db_session, db_search_space, db_user, mocker):
|
||||
"""Document content is updated to the new summary after reindexing."""
|
||||
|
|
@ -136,7 +136,7 @@ async def test_reindex_updates_content(db_session, db_search_space, db_user, moc
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_reindex_updates_content_hash(
|
||||
db_session, db_search_space, db_user, mocker
|
||||
|
|
@ -168,7 +168,7 @@ async def test_reindex_updates_content_hash(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, mocker):
|
||||
"""Document status is READY after successful reindexing."""
|
||||
|
|
@ -196,7 +196,7 @@ async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, m
|
|||
assert DocumentStatus.is_state(document.status, DocumentStatus.READY)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("patched_summarize", "patched_embed_text")
|
||||
@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts")
|
||||
async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, mocker):
|
||||
"""Reindexing replaces old chunks with new content rather than appending."""
|
||||
mocker.patch(
|
||||
|
|
@ -235,7 +235,7 @@ async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, moc
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_reindex_clears_reindexing_flag(
|
||||
db_session, db_search_space, db_user, mocker
|
||||
|
|
@ -266,7 +266,7 @@ async def test_reindex_clears_reindexing_flag(
|
|||
assert document.content_needs_reindexing is False
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("patched_embed_text", "patched_chunk_text")
|
||||
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
||||
async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, mocker):
|
||||
"""RuntimeError is raised when reindexing fails so the caller can handle it."""
|
||||
mocker.patch(
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ pytestmark = pytest.mark.integration
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_sets_status_ready(
|
||||
db_session,
|
||||
|
|
@ -38,7 +38,7 @@ async def test_sets_status_ready(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_content_is_summary_when_should_summarize_true(
|
||||
db_session,
|
||||
|
|
@ -65,7 +65,7 @@ async def test_content_is_summary_when_should_summarize_true(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_content_is_source_markdown_when_should_summarize_false(
|
||||
db_session,
|
||||
|
|
@ -95,7 +95,7 @@ async def test_content_is_source_markdown_when_should_summarize_false(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_chunks_written_to_db(
|
||||
db_session,
|
||||
|
|
@ -123,7 +123,7 @@ async def test_chunks_written_to_db(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_embedding_written_to_db(
|
||||
db_session,
|
||||
|
|
@ -151,7 +151,7 @@ async def test_embedding_written_to_db(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_updated_at_advances_after_indexing(
|
||||
db_session,
|
||||
|
|
@ -183,7 +183,7 @@ async def test_updated_at_advances_after_indexing(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_no_llm_falls_back_to_source_markdown(
|
||||
db_session,
|
||||
|
|
@ -214,7 +214,7 @@ async def test_no_llm_falls_back_to_source_markdown(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_fallback_summary_used_when_llm_unavailable(
|
||||
db_session,
|
||||
|
|
@ -245,7 +245,7 @@ async def test_fallback_summary_used_when_llm_unavailable(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_reindex_replaces_old_chunks(
|
||||
db_session,
|
||||
|
|
@ -282,7 +282,7 @@ async def test_reindex_replaces_old_chunks(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize_raises", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_llm_error_sets_status_failed(
|
||||
db_session,
|
||||
|
|
@ -309,7 +309,7 @@ async def test_llm_error_sets_status_failed(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize_raises", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_llm_error_leaves_no_partial_data(
|
||||
db_session,
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ async def test_new_document_is_persisted_with_pending_status(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_unchanged_ready_document_is_skipped(
|
||||
db_session,
|
||||
|
|
@ -56,7 +56,7 @@ async def test_unchanged_ready_document_is_skipped(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_title_only_change_updates_title_in_db(
|
||||
db_session,
|
||||
|
|
@ -339,7 +339,7 @@ async def test_same_content_from_different_source_is_skipped(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize_raises", "patched_embed_text", "patched_chunk_text"
|
||||
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_failed_document_with_unchanged_content_is_requeued(
|
||||
db_session,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue