mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
Merge upstream/dev
This commit is contained in:
commit
8bdfd00a15
191 changed files with 3301 additions and 4079 deletions
|
|
@ -0,0 +1,279 @@
|
|||
"""Integration tests for new-chat thread visibility invariants.
|
||||
|
||||
These tests exercise the route handlers directly with real DB-backed
|
||||
users, memberships, and permissions. The important contract is that a
|
||||
thread shared with a search space stays shared across normal metadata
|
||||
updates until the creator explicitly makes it private again.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from fastapi import HTTPException
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import (
|
||||
ChatVisibility,
|
||||
SearchSpace,
|
||||
SearchSpaceMembership,
|
||||
SearchSpaceRole,
|
||||
User,
|
||||
)
|
||||
from app.routes import new_chat_routes
|
||||
from app.schemas.new_chat import (
|
||||
NewChatThreadCreate,
|
||||
NewChatThreadUpdate,
|
||||
NewChatThreadVisibilityUpdate,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def db_member(db_session: AsyncSession, db_search_space: SearchSpace) -> User:
|
||||
member = User(
|
||||
id=uuid.uuid4(),
|
||||
email="member@surfsense.net",
|
||||
hashed_password="hashed",
|
||||
is_active=True,
|
||||
is_superuser=False,
|
||||
is_verified=True,
|
||||
)
|
||||
db_session.add(member)
|
||||
await db_session.flush()
|
||||
|
||||
role = (
|
||||
(
|
||||
await db_session.execute(
|
||||
select(SearchSpaceRole).where(
|
||||
SearchSpaceRole.search_space_id == db_search_space.id,
|
||||
SearchSpaceRole.name == "Editor",
|
||||
)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.one()
|
||||
)
|
||||
db_session.add(
|
||||
SearchSpaceMembership(
|
||||
user_id=member.id,
|
||||
search_space_id=db_search_space.id,
|
||||
role_id=role.id,
|
||||
is_owner=False,
|
||||
)
|
||||
)
|
||||
await db_session.flush()
|
||||
return member
|
||||
|
||||
|
||||
async def _create_thread(
|
||||
db_session: AsyncSession,
|
||||
db_user: User,
|
||||
db_search_space: SearchSpace,
|
||||
*,
|
||||
title: str = "Visibility Invariant Chat",
|
||||
):
|
||||
return await new_chat_routes.create_thread(
|
||||
NewChatThreadCreate(
|
||||
title=title,
|
||||
archived=False,
|
||||
search_space_id=db_search_space.id,
|
||||
visibility=ChatVisibility.PRIVATE,
|
||||
),
|
||||
session=db_session,
|
||||
user=db_user,
|
||||
)
|
||||
|
||||
|
||||
def _active_thread_ids(response) -> set[int]:
|
||||
return {thread.id for thread in response.threads}
|
||||
|
||||
|
||||
def _search_thread_ids(response) -> set[int]:
|
||||
return {thread.id for thread in response}
|
||||
|
||||
|
||||
async def test_private_thread_is_hidden_from_other_search_space_member(
|
||||
db_session: AsyncSession,
|
||||
db_user: User,
|
||||
db_member: User,
|
||||
db_search_space: SearchSpace,
|
||||
):
|
||||
thread = await _create_thread(db_session, db_user, db_search_space)
|
||||
|
||||
member_threads = await new_chat_routes.list_threads(
|
||||
search_space_id=db_search_space.id,
|
||||
session=db_session,
|
||||
user=db_member,
|
||||
)
|
||||
member_search = await new_chat_routes.search_threads(
|
||||
search_space_id=db_search_space.id,
|
||||
title="Visibility",
|
||||
session=db_session,
|
||||
user=db_member,
|
||||
)
|
||||
|
||||
assert thread.id not in _active_thread_ids(member_threads)
|
||||
assert thread.id not in _search_thread_ids(member_search)
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
await new_chat_routes.get_thread_full(
|
||||
thread_id=thread.id,
|
||||
session=db_session,
|
||||
user=db_member,
|
||||
)
|
||||
assert exc_info.value.status_code == 403
|
||||
|
||||
|
||||
async def test_creator_can_share_thread_and_member_can_list_search_read_it(
|
||||
db_session: AsyncSession,
|
||||
db_user: User,
|
||||
db_member: User,
|
||||
db_search_space: SearchSpace,
|
||||
):
|
||||
thread = await _create_thread(db_session, db_user, db_search_space)
|
||||
|
||||
updated = await new_chat_routes.update_thread_visibility(
|
||||
thread_id=thread.id,
|
||||
visibility_update=NewChatThreadVisibilityUpdate(
|
||||
visibility=ChatVisibility.SEARCH_SPACE,
|
||||
),
|
||||
session=db_session,
|
||||
user=db_user,
|
||||
)
|
||||
|
||||
member_threads = await new_chat_routes.list_threads(
|
||||
search_space_id=db_search_space.id,
|
||||
session=db_session,
|
||||
user=db_member,
|
||||
)
|
||||
member_search = await new_chat_routes.search_threads(
|
||||
search_space_id=db_search_space.id,
|
||||
title="Visibility",
|
||||
session=db_session,
|
||||
user=db_member,
|
||||
)
|
||||
full_thread = await new_chat_routes.get_thread_full(
|
||||
thread_id=thread.id,
|
||||
session=db_session,
|
||||
user=db_member,
|
||||
)
|
||||
|
||||
assert updated.visibility == ChatVisibility.SEARCH_SPACE
|
||||
assert thread.id in _active_thread_ids(member_threads)
|
||||
assert thread.id in _search_thread_ids(member_search)
|
||||
assert full_thread["id"] == thread.id
|
||||
assert full_thread["visibility"] == ChatVisibility.SEARCH_SPACE
|
||||
|
||||
|
||||
async def test_rename_and_archive_do_not_reset_shared_visibility(
|
||||
db_session: AsyncSession,
|
||||
db_user: User,
|
||||
db_search_space: SearchSpace,
|
||||
):
|
||||
thread = await _create_thread(db_session, db_user, db_search_space)
|
||||
await new_chat_routes.update_thread_visibility(
|
||||
thread_id=thread.id,
|
||||
visibility_update=NewChatThreadVisibilityUpdate(
|
||||
visibility=ChatVisibility.SEARCH_SPACE,
|
||||
),
|
||||
session=db_session,
|
||||
user=db_user,
|
||||
)
|
||||
|
||||
renamed = await new_chat_routes.update_thread(
|
||||
thread_id=thread.id,
|
||||
thread_update=NewChatThreadUpdate(title="Renamed Shared Chat"),
|
||||
session=db_session,
|
||||
user=db_user,
|
||||
)
|
||||
archived = await new_chat_routes.update_thread(
|
||||
thread_id=thread.id,
|
||||
thread_update=NewChatThreadUpdate(archived=True),
|
||||
session=db_session,
|
||||
user=db_user,
|
||||
)
|
||||
|
||||
assert renamed.visibility == ChatVisibility.SEARCH_SPACE
|
||||
assert archived.visibility == ChatVisibility.SEARCH_SPACE
|
||||
assert archived.archived is True
|
||||
|
||||
|
||||
async def test_non_creator_cannot_change_shared_thread_back_to_private(
|
||||
db_session: AsyncSession,
|
||||
db_user: User,
|
||||
db_member: User,
|
||||
db_search_space: SearchSpace,
|
||||
):
|
||||
thread = await _create_thread(db_session, db_user, db_search_space)
|
||||
await new_chat_routes.update_thread_visibility(
|
||||
thread_id=thread.id,
|
||||
visibility_update=NewChatThreadVisibilityUpdate(
|
||||
visibility=ChatVisibility.SEARCH_SPACE,
|
||||
),
|
||||
session=db_session,
|
||||
user=db_user,
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
await new_chat_routes.update_thread_visibility(
|
||||
thread_id=thread.id,
|
||||
visibility_update=NewChatThreadVisibilityUpdate(
|
||||
visibility=ChatVisibility.PRIVATE,
|
||||
),
|
||||
session=db_session,
|
||||
user=db_member,
|
||||
)
|
||||
|
||||
assert exc_info.value.status_code == 403
|
||||
|
||||
|
||||
async def test_creator_can_make_shared_thread_private_again(
|
||||
db_session: AsyncSession,
|
||||
db_user: User,
|
||||
db_member: User,
|
||||
db_search_space: SearchSpace,
|
||||
):
|
||||
thread = await _create_thread(db_session, db_user, db_search_space)
|
||||
await new_chat_routes.update_thread_visibility(
|
||||
thread_id=thread.id,
|
||||
visibility_update=NewChatThreadVisibilityUpdate(
|
||||
visibility=ChatVisibility.SEARCH_SPACE,
|
||||
),
|
||||
session=db_session,
|
||||
user=db_user,
|
||||
)
|
||||
|
||||
private_again = await new_chat_routes.update_thread_visibility(
|
||||
thread_id=thread.id,
|
||||
visibility_update=NewChatThreadVisibilityUpdate(
|
||||
visibility=ChatVisibility.PRIVATE,
|
||||
),
|
||||
session=db_session,
|
||||
user=db_user,
|
||||
)
|
||||
member_threads = await new_chat_routes.list_threads(
|
||||
search_space_id=db_search_space.id,
|
||||
session=db_session,
|
||||
user=db_member,
|
||||
)
|
||||
member_search = await new_chat_routes.search_threads(
|
||||
search_space_id=db_search_space.id,
|
||||
title="Visibility",
|
||||
session=db_session,
|
||||
user=db_member,
|
||||
)
|
||||
|
||||
assert private_again.visibility == ChatVisibility.PRIVATE
|
||||
assert thread.id not in _active_thread_ids(member_threads)
|
||||
assert thread.id not in _search_thread_ids(member_search)
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
await new_chat_routes.get_thread_full(
|
||||
thread_id=thread.id,
|
||||
session=db_session,
|
||||
user=db_member,
|
||||
)
|
||||
assert exc_info.value.status_code == 403
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
import importlib
|
||||
import sys
|
||||
import uuid
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
|
@ -123,26 +123,6 @@ async def db_search_space(db_session: AsyncSession, db_user: User) -> SearchSpac
|
|||
return space
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_summarize(monkeypatch) -> AsyncMock:
|
||||
mock = AsyncMock(return_value="Mocked summary.")
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
|
||||
mock,
|
||||
)
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_summarize_raises(monkeypatch) -> AsyncMock:
|
||||
mock = AsyncMock(side_effect=RuntimeError("LLM unavailable"))
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
|
||||
mock,
|
||||
)
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_embed_texts(monkeypatch) -> MagicMock:
|
||||
mock = MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts])
|
||||
|
|
@ -153,6 +133,16 @@ def patched_embed_texts(monkeypatch) -> MagicMock:
|
|||
return mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_embed_texts_raises(monkeypatch) -> MagicMock:
|
||||
mock = MagicMock(side_effect=RuntimeError("Embedding unavailable"))
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
|
||||
mock,
|
||||
)
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_chunk_text(monkeypatch) -> MagicMock:
|
||||
mock = MagicMock(return_value=["Test chunk content."])
|
||||
|
|
|
|||
|
|
@ -68,7 +68,6 @@ class InlineTaskDispatcher:
|
|||
filename: str,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
should_summarize: bool = False,
|
||||
use_vision_llm: bool = False,
|
||||
processing_mode: str = "basic",
|
||||
) -> None:
|
||||
|
|
@ -83,7 +82,6 @@ class InlineTaskDispatcher:
|
|||
filename,
|
||||
search_space_id,
|
||||
user_id,
|
||||
should_summarize=should_summarize,
|
||||
use_vision_llm=use_vision_llm,
|
||||
processing_mode=processing_mode,
|
||||
)
|
||||
|
|
@ -266,10 +264,6 @@ async def page_limits():
|
|||
@pytest.fixture(autouse=True)
|
||||
def _mock_external_apis(monkeypatch):
|
||||
"""Mock LLM, embedding, and chunking — these are external API boundaries."""
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
|
||||
AsyncMock(return_value="Mocked summary."),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
|
||||
MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]),
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ pytestmark = pytest.mark.integration
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):
|
||||
"""Document status is READY after successful indexing."""
|
||||
|
|
@ -19,7 +19,6 @@ async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):
|
|||
etl_service="UNSTRUCTURED",
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
llm=mocker.Mock(),
|
||||
)
|
||||
|
||||
result = await db_session.execute(
|
||||
|
|
@ -31,10 +30,10 @@ async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
|
||||
"""Document content is set to the LLM-generated summary."""
|
||||
async def test_content_is_source_markdown(db_session, db_search_space, db_user, mocker):
|
||||
"""Document content is set to the extracted source markdown."""
|
||||
adapter = UploadDocumentAdapter(db_session)
|
||||
await adapter.index(
|
||||
markdown_content="## Hello\n\nSome content.",
|
||||
|
|
@ -42,8 +41,6 @@ async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
|
|||
etl_service="UNSTRUCTURED",
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
llm=mocker.Mock(),
|
||||
should_summarize=True,
|
||||
)
|
||||
|
||||
result = await db_session.execute(
|
||||
|
|
@ -51,11 +48,11 @@ async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
|
|||
)
|
||||
document = result.scalars().first()
|
||||
|
||||
assert document.content == "Mocked summary."
|
||||
assert document.content == "## Hello\n\nSome content."
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker):
|
||||
"""Chunks derived from the source markdown are persisted in the DB."""
|
||||
|
|
@ -66,7 +63,6 @@ async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker
|
|||
etl_service="UNSTRUCTURED",
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
llm=mocker.Mock(),
|
||||
)
|
||||
|
||||
result = await db_session.execute(
|
||||
|
|
@ -83,9 +79,7 @@ async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker
|
|||
assert chunks[0].content == "Test chunk content."
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
|
||||
async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, mocker):
|
||||
"""RuntimeError is raised when the indexing step fails so the caller can fire a failure notification."""
|
||||
adapter = UploadDocumentAdapter(db_session)
|
||||
|
|
@ -96,8 +90,6 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user,
|
|||
etl_service="UNSTRUCTURED",
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
llm=mocker.Mock(),
|
||||
should_summarize=True,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -107,10 +99,10 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user,
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_reindex_updates_content(db_session, db_search_space, db_user, mocker):
|
||||
"""Document content is updated to the new summary after reindexing."""
|
||||
"""Document content is updated to the new source markdown after reindexing."""
|
||||
adapter = UploadDocumentAdapter(db_session)
|
||||
await adapter.index(
|
||||
markdown_content="## Original\n\nOriginal content.",
|
||||
|
|
@ -118,7 +110,6 @@ async def test_reindex_updates_content(db_session, db_search_space, db_user, moc
|
|||
etl_service="UNSTRUCTURED",
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
llm=mocker.Mock(),
|
||||
)
|
||||
|
||||
result = await db_session.execute(
|
||||
|
|
@ -129,14 +120,14 @@ async def test_reindex_updates_content(db_session, db_search_space, db_user, moc
|
|||
document.source_markdown = "## Edited\n\nNew content after user edit."
|
||||
await db_session.flush()
|
||||
|
||||
await adapter.reindex(document=document, llm=mocker.Mock())
|
||||
await adapter.reindex(document=document)
|
||||
|
||||
await db_session.refresh(document)
|
||||
assert document.content == "Mocked summary."
|
||||
assert document.content == "## Edited\n\nNew content after user edit."
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_reindex_updates_content_hash(
|
||||
db_session, db_search_space, db_user, mocker
|
||||
|
|
@ -149,7 +140,6 @@ async def test_reindex_updates_content_hash(
|
|||
etl_service="UNSTRUCTURED",
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
llm=mocker.Mock(),
|
||||
)
|
||||
|
||||
result = await db_session.execute(
|
||||
|
|
@ -161,14 +151,14 @@ async def test_reindex_updates_content_hash(
|
|||
document.source_markdown = "## Edited\n\nNew content after user edit."
|
||||
await db_session.flush()
|
||||
|
||||
await adapter.reindex(document=document, llm=mocker.Mock())
|
||||
await adapter.reindex(document=document)
|
||||
|
||||
await db_session.refresh(document)
|
||||
assert document.content_hash != original_hash
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, mocker):
|
||||
"""Document status is READY after successful reindexing."""
|
||||
|
|
@ -179,7 +169,6 @@ async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, m
|
|||
etl_service="UNSTRUCTURED",
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
llm=mocker.Mock(),
|
||||
)
|
||||
|
||||
result = await db_session.execute(
|
||||
|
|
@ -190,13 +179,13 @@ async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, m
|
|||
document.source_markdown = "## Edited\n\nNew content after user edit."
|
||||
await db_session.flush()
|
||||
|
||||
await adapter.reindex(document=document, llm=mocker.Mock())
|
||||
await adapter.reindex(document=document)
|
||||
|
||||
await db_session.refresh(document)
|
||||
assert DocumentStatus.is_state(document.status, DocumentStatus.READY)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts")
|
||||
@pytest.mark.usefixtures("patched_embed_texts")
|
||||
async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, mocker):
|
||||
"""Reindexing replaces old chunks with new content rather than appending."""
|
||||
mocker.patch(
|
||||
|
|
@ -211,7 +200,6 @@ async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, moc
|
|||
etl_service="UNSTRUCTURED",
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
llm=mocker.Mock(),
|
||||
)
|
||||
|
||||
result = await db_session.execute(
|
||||
|
|
@ -223,7 +211,7 @@ async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, moc
|
|||
document.source_markdown = "## Edited\n\nNew content after user edit."
|
||||
await db_session.flush()
|
||||
|
||||
await adapter.reindex(document=document, llm=mocker.Mock())
|
||||
await adapter.reindex(document=document)
|
||||
|
||||
chunks_result = await db_session.execute(
|
||||
select(Chunk).filter(Chunk.document_id == document_id)
|
||||
|
|
@ -235,7 +223,7 @@ async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, moc
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_reindex_clears_reindexing_flag(
|
||||
db_session, db_search_space, db_user, mocker
|
||||
|
|
@ -248,7 +236,6 @@ async def test_reindex_clears_reindexing_flag(
|
|||
etl_service="UNSTRUCTURED",
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
llm=mocker.Mock(),
|
||||
)
|
||||
|
||||
result = await db_session.execute(
|
||||
|
|
@ -260,19 +247,17 @@ async def test_reindex_clears_reindexing_flag(
|
|||
document.content_needs_reindexing = True
|
||||
await db_session.flush()
|
||||
|
||||
await adapter.reindex(document=document, llm=mocker.Mock())
|
||||
await adapter.reindex(document=document)
|
||||
|
||||
await db_session.refresh(document)
|
||||
assert document.content_needs_reindexing is False
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
||||
async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, mocker):
|
||||
async def test_reindex_raises_on_failure(
|
||||
db_session, db_search_space, db_user, patched_embed_texts, mocker
|
||||
):
|
||||
"""RuntimeError is raised when reindexing fails so the caller can handle it."""
|
||||
mocker.patch(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
|
||||
return_value="Mocked summary.",
|
||||
)
|
||||
|
||||
adapter = UploadDocumentAdapter(db_session)
|
||||
await adapter.index(
|
||||
|
|
@ -281,7 +266,6 @@ async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, m
|
|||
etl_service="UNSTRUCTURED",
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
llm=mocker.Mock(),
|
||||
)
|
||||
|
||||
result = await db_session.execute(
|
||||
|
|
@ -292,13 +276,10 @@ async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, m
|
|||
document.source_markdown = "## Edited\n\nNew content after user edit."
|
||||
await db_session.flush()
|
||||
|
||||
mocker.patch(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
|
||||
side_effect=RuntimeError("LLM unavailable"),
|
||||
)
|
||||
patched_embed_texts.side_effect = RuntimeError("Embedding unavailable")
|
||||
|
||||
with pytest.raises(RuntimeError, match=r"Embedding failed|Reindexing failed"):
|
||||
await adapter.reindex(document=document, llm=mocker.Mock())
|
||||
await adapter.reindex(document=document)
|
||||
|
||||
|
||||
async def test_reindex_raises_on_empty_source_markdown(
|
||||
|
|
@ -323,4 +304,4 @@ async def test_reindex_raises_on_empty_source_markdown(
|
|||
adapter = UploadDocumentAdapter(db_session)
|
||||
|
||||
with pytest.raises(RuntimeError, match="no source_markdown"):
|
||||
await adapter.reindex(document=document, llm=mocker.Mock())
|
||||
await adapter.reindex(document=document)
|
||||
|
|
|
|||
|
|
@ -25,8 +25,6 @@ def _cal_doc(
|
|||
search_space_id=search_space_id,
|
||||
connector_id=connector_id,
|
||||
created_by_id=user_id,
|
||||
should_summarize=True,
|
||||
fallback_summary=f"Calendar: Event {unique_id}",
|
||||
metadata={
|
||||
"event_id": unique_id,
|
||||
"start_time": "2025-01-15T10:00:00",
|
||||
|
|
@ -37,7 +35,7 @@ def _cal_doc(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_calendar_pipeline_creates_ready_document(
|
||||
db_session, db_search_space, db_connector, db_user, mocker
|
||||
|
|
@ -55,7 +53,7 @@ async def test_calendar_pipeline_creates_ready_document(
|
|||
prepared = await service.prepare_for_indexing([doc])
|
||||
assert len(prepared) == 1
|
||||
|
||||
await service.index(prepared[0], doc, llm=mocker.Mock())
|
||||
await service.index(prepared[0], doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.search_space_id == space_id)
|
||||
|
|
@ -68,7 +66,7 @@ async def test_calendar_pipeline_creates_ready_document(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_calendar_legacy_doc_migrated(
|
||||
db_session, db_search_space, db_connector, db_user, mocker
|
||||
|
|
|
|||
|
|
@ -25,8 +25,6 @@ def _drive_doc(
|
|||
search_space_id=search_space_id,
|
||||
connector_id=connector_id,
|
||||
created_by_id=user_id,
|
||||
should_summarize=True,
|
||||
fallback_summary=f"File: {unique_id}.pdf",
|
||||
metadata={
|
||||
"google_drive_file_id": unique_id,
|
||||
"google_drive_file_name": f"{unique_id}.pdf",
|
||||
|
|
@ -36,7 +34,7 @@ def _drive_doc(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_drive_pipeline_creates_ready_document(
|
||||
db_session, db_search_space, db_connector, db_user, mocker
|
||||
|
|
@ -54,7 +52,7 @@ async def test_drive_pipeline_creates_ready_document(
|
|||
prepared = await service.prepare_for_indexing([doc])
|
||||
assert len(prepared) == 1
|
||||
|
||||
await service.index(prepared[0], doc, llm=mocker.Mock())
|
||||
await service.index(prepared[0], doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.search_space_id == space_id)
|
||||
|
|
@ -67,7 +65,7 @@ async def test_drive_pipeline_creates_ready_document(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_drive_legacy_doc_migrated(
|
||||
db_session, db_search_space, db_connector, db_user, mocker
|
||||
|
|
|
|||
|
|
@ -24,8 +24,6 @@ def _dropbox_doc(
|
|||
search_space_id=search_space_id,
|
||||
connector_id=connector_id,
|
||||
created_by_id=user_id,
|
||||
should_summarize=True,
|
||||
fallback_summary=f"File: {unique_id}.docx",
|
||||
metadata={
|
||||
"dropbox_file_id": unique_id,
|
||||
"dropbox_file_name": f"{unique_id}.docx",
|
||||
|
|
@ -35,7 +33,7 @@ def _dropbox_doc(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_dropbox_pipeline_creates_ready_document(
|
||||
db_session, db_search_space, db_connector, db_user, mocker
|
||||
|
|
@ -53,7 +51,7 @@ async def test_dropbox_pipeline_creates_ready_document(
|
|||
prepared = await service.prepare_for_indexing([doc])
|
||||
assert len(prepared) == 1
|
||||
|
||||
await service.index(prepared[0], doc, llm=mocker.Mock())
|
||||
await service.index(prepared[0], doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.search_space_id == space_id)
|
||||
|
|
@ -66,7 +64,7 @@ async def test_dropbox_pipeline_creates_ready_document(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_dropbox_duplicate_content_skipped(
|
||||
db_session, db_search_space, db_connector, db_user, mocker
|
||||
|
|
@ -86,7 +84,7 @@ async def test_dropbox_duplicate_content_skipped(
|
|||
|
||||
prepared = await service.prepare_for_indexing([doc])
|
||||
assert len(prepared) == 1
|
||||
await service.index(prepared[0], doc, llm=mocker.Mock())
|
||||
await service.index(prepared[0], doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.search_space_id == space_id)
|
||||
|
|
|
|||
|
|
@ -28,8 +28,6 @@ def _gmail_doc(
|
|||
search_space_id=search_space_id,
|
||||
connector_id=connector_id,
|
||||
created_by_id=user_id,
|
||||
should_summarize=True,
|
||||
fallback_summary=f"Gmail: Subject for {unique_id}",
|
||||
metadata={
|
||||
"message_id": unique_id,
|
||||
"from": "sender@example.com",
|
||||
|
|
@ -39,7 +37,7 @@ def _gmail_doc(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_gmail_pipeline_creates_ready_document(
|
||||
db_session, db_search_space, db_connector, db_user, mocker
|
||||
|
|
@ -57,7 +55,7 @@ async def test_gmail_pipeline_creates_ready_document(
|
|||
prepared = await service.prepare_for_indexing([doc])
|
||||
assert len(prepared) == 1
|
||||
|
||||
await service.index(prepared[0], doc, llm=mocker.Mock())
|
||||
await service.index(prepared[0], doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.search_space_id == space_id)
|
||||
|
|
@ -71,7 +69,7 @@ async def test_gmail_pipeline_creates_ready_document(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_gmail_legacy_doc_migrated_then_reused(
|
||||
db_session, db_search_space, db_connector, db_user, mocker
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ pytestmark = pytest.mark.integration
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_index_batch_creates_ready_documents(
|
||||
db_session, db_search_space, make_connector_document, mocker
|
||||
|
|
@ -33,7 +33,7 @@ async def test_index_batch_creates_ready_documents(
|
|||
]
|
||||
|
||||
service = IndexingPipelineService(session=db_session)
|
||||
results = await service.index_batch(docs, llm=mocker.Mock())
|
||||
results = await service.index_batch(docs)
|
||||
|
||||
assert len(results) == 2
|
||||
|
||||
|
|
@ -50,10 +50,10 @@ async def test_index_batch_creates_ready_documents(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_index_batch_empty_returns_empty(db_session, mocker):
|
||||
"""index_batch with empty input returns an empty list."""
|
||||
service = IndexingPipelineService(session=db_session)
|
||||
results = await service.index_batch([], llm=mocker.Mock())
|
||||
results = await service.index_batch([])
|
||||
assert results == []
|
||||
|
|
|
|||
|
|
@ -10,9 +10,7 @@ _EMBEDDING_DIM = app_config.embedding_model_instance.dimension
|
|||
pytestmark = pytest.mark.integration
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
||||
async def test_sets_status_ready(
|
||||
db_session,
|
||||
db_search_space,
|
||||
|
|
@ -27,7 +25,7 @@ async def test_sets_status_ready(
|
|||
document = prepared[0]
|
||||
document_id = document.id
|
||||
|
||||
await service.index(document, connector_doc, llm=mocker.Mock())
|
||||
await service.index(document, connector_doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.id == document_id)
|
||||
|
|
@ -37,16 +35,14 @@ async def test_sets_status_ready(
|
|||
assert DocumentStatus.is_state(reloaded.status, DocumentStatus.READY)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_content_is_summary_when_should_summarize_true(
|
||||
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
||||
async def test_content_is_source_markdown_by_default(
|
||||
db_session,
|
||||
db_search_space,
|
||||
make_connector_document,
|
||||
mocker,
|
||||
):
|
||||
"""Document content is set to the LLM-generated summary when should_summarize=True."""
|
||||
"""Document content is set to source_markdown by default."""
|
||||
connector_doc = make_connector_document(search_space_id=db_search_space.id)
|
||||
service = IndexingPipelineService(session=db_session)
|
||||
|
||||
|
|
@ -54,28 +50,25 @@ async def test_content_is_summary_when_should_summarize_true(
|
|||
document = prepared[0]
|
||||
document_id = document.id
|
||||
|
||||
await service.index(document, connector_doc, llm=mocker.Mock())
|
||||
await service.index(document, connector_doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.id == document_id)
|
||||
)
|
||||
reloaded = result.scalars().first()
|
||||
|
||||
assert reloaded.content == "Mocked summary."
|
||||
assert reloaded.content == connector_doc.source_markdown
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_content_is_source_markdown_when_should_summarize_false(
|
||||
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
||||
async def test_content_is_source_markdown_when_custom_content(
|
||||
db_session,
|
||||
db_search_space,
|
||||
make_connector_document,
|
||||
):
|
||||
"""Document content is set to source_markdown verbatim when should_summarize=False."""
|
||||
"""Document content is set to source_markdown verbatim."""
|
||||
connector_doc = make_connector_document(
|
||||
search_space_id=db_search_space.id,
|
||||
should_summarize=False,
|
||||
source_markdown="## Raw content",
|
||||
)
|
||||
service = IndexingPipelineService(session=db_session)
|
||||
|
|
@ -84,7 +77,7 @@ async def test_content_is_source_markdown_when_should_summarize_false(
|
|||
document = prepared[0]
|
||||
document_id = document.id
|
||||
|
||||
await service.index(document, connector_doc, llm=None)
|
||||
await service.index(document, connector_doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.id == document_id)
|
||||
|
|
@ -94,9 +87,7 @@ async def test_content_is_source_markdown_when_should_summarize_false(
|
|||
assert reloaded.content == "## Raw content"
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
||||
async def test_chunks_written_to_db(
|
||||
db_session,
|
||||
db_search_space,
|
||||
|
|
@ -111,7 +102,7 @@ async def test_chunks_written_to_db(
|
|||
document = prepared[0]
|
||||
document_id = document.id
|
||||
|
||||
await service.index(document, connector_doc, llm=mocker.Mock())
|
||||
await service.index(document, connector_doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Chunk).filter(Chunk.document_id == document_id)
|
||||
|
|
@ -122,9 +113,7 @@ async def test_chunks_written_to_db(
|
|||
assert chunks[0].content == "Test chunk content."
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
||||
async def test_embedding_written_to_db(
|
||||
db_session,
|
||||
db_search_space,
|
||||
|
|
@ -139,7 +128,7 @@ async def test_embedding_written_to_db(
|
|||
document = prepared[0]
|
||||
document_id = document.id
|
||||
|
||||
await service.index(document, connector_doc, llm=mocker.Mock())
|
||||
await service.index(document, connector_doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.id == document_id)
|
||||
|
|
@ -150,9 +139,7 @@ async def test_embedding_written_to_db(
|
|||
assert len(reloaded.embedding) == _EMBEDDING_DIM
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
||||
async def test_updated_at_advances_after_indexing(
|
||||
db_session,
|
||||
db_search_space,
|
||||
|
|
@ -172,7 +159,7 @@ async def test_updated_at_advances_after_indexing(
|
|||
)
|
||||
updated_at_pending = result.scalars().first().updated_at
|
||||
|
||||
await service.index(document, connector_doc, llm=mocker.Mock())
|
||||
await service.index(document, connector_doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.id == document_id)
|
||||
|
|
@ -182,18 +169,15 @@ async def test_updated_at_advances_after_indexing(
|
|||
assert updated_at_ready > updated_at_pending
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
||||
async def test_no_llm_falls_back_to_source_markdown(
|
||||
db_session,
|
||||
db_search_space,
|
||||
make_connector_document,
|
||||
):
|
||||
"""When llm=None and no fallback_summary, content falls back to source_markdown."""
|
||||
"""Content stays deterministic source markdown without an LLM."""
|
||||
connector_doc = make_connector_document(
|
||||
search_space_id=db_search_space.id,
|
||||
should_summarize=True,
|
||||
source_markdown="## Fallback content",
|
||||
)
|
||||
service = IndexingPipelineService(session=db_session)
|
||||
|
|
@ -202,7 +186,7 @@ async def test_no_llm_falls_back_to_source_markdown(
|
|||
document = prepared[0]
|
||||
document_id = document.id
|
||||
|
||||
await service.index(document, connector_doc, llm=None)
|
||||
await service.index(document, connector_doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.id == document_id)
|
||||
|
|
@ -213,27 +197,23 @@ async def test_no_llm_falls_back_to_source_markdown(
|
|||
assert reloaded.content == "## Fallback content"
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_fallback_summary_used_when_llm_unavailable(
|
||||
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
||||
async def test_source_markdown_used_without_preview(
|
||||
db_session,
|
||||
db_search_space,
|
||||
make_connector_document,
|
||||
):
|
||||
"""fallback_summary is used as content when llm=None and should_summarize=True."""
|
||||
"""Source markdown is used without fallback preview fields."""
|
||||
connector_doc = make_connector_document(
|
||||
search_space_id=db_search_space.id,
|
||||
should_summarize=True,
|
||||
source_markdown="## Full raw content",
|
||||
fallback_summary="Short pre-built summary.",
|
||||
)
|
||||
service = IndexingPipelineService(session=db_session)
|
||||
|
||||
prepared = await service.prepare_for_indexing([connector_doc])
|
||||
document_id = prepared[0].id
|
||||
|
||||
await service.index(prepared[0], connector_doc, llm=None)
|
||||
await service.index(prepared[0], connector_doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.id == document_id)
|
||||
|
|
@ -241,12 +221,10 @@ async def test_fallback_summary_used_when_llm_unavailable(
|
|||
reloaded = result.scalars().first()
|
||||
|
||||
assert DocumentStatus.is_state(reloaded.status, DocumentStatus.READY)
|
||||
assert reloaded.content == "Short pre-built summary."
|
||||
assert reloaded.content == "## Full raw content"
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
||||
async def test_reindex_replaces_old_chunks(
|
||||
db_session,
|
||||
db_search_space,
|
||||
|
|
@ -264,14 +242,14 @@ async def test_reindex_replaces_old_chunks(
|
|||
document = prepared[0]
|
||||
document_id = document.id
|
||||
|
||||
await service.index(document, connector_doc, llm=mocker.Mock())
|
||||
await service.index(document, connector_doc)
|
||||
|
||||
updated_doc = make_connector_document(
|
||||
search_space_id=db_search_space.id,
|
||||
source_markdown="## v2",
|
||||
)
|
||||
re_prepared = await service.prepare_for_indexing([updated_doc])
|
||||
await service.index(re_prepared[0], updated_doc, llm=mocker.Mock())
|
||||
await service.index(re_prepared[0], updated_doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Chunk).filter(Chunk.document_id == document_id)
|
||||
|
|
@ -281,16 +259,14 @@ async def test_reindex_replaces_old_chunks(
|
|||
assert len(chunks) == 1
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_llm_error_sets_status_failed(
|
||||
@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
|
||||
async def test_embedding_error_sets_status_failed(
|
||||
db_session,
|
||||
db_search_space,
|
||||
make_connector_document,
|
||||
mocker,
|
||||
):
|
||||
"""Document status is FAILED when the LLM raises during indexing."""
|
||||
"""Document status is FAILED when embedding raises during indexing."""
|
||||
connector_doc = make_connector_document(search_space_id=db_search_space.id)
|
||||
service = IndexingPipelineService(session=db_session)
|
||||
|
||||
|
|
@ -298,7 +274,7 @@ async def test_llm_error_sets_status_failed(
|
|||
document = prepared[0]
|
||||
document_id = document.id
|
||||
|
||||
await service.index(document, connector_doc, llm=mocker.Mock())
|
||||
await service.index(document, connector_doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.id == document_id)
|
||||
|
|
@ -308,10 +284,8 @@ async def test_llm_error_sets_status_failed(
|
|||
assert DocumentStatus.is_state(reloaded.status, DocumentStatus.FAILED)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_llm_error_leaves_no_partial_data(
|
||||
@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
|
||||
async def test_embedding_error_leaves_no_partial_data(
|
||||
db_session,
|
||||
db_search_space,
|
||||
make_connector_document,
|
||||
|
|
@ -325,7 +299,7 @@ async def test_llm_error_leaves_no_partial_data(
|
|||
document = prepared[0]
|
||||
document_id = document.id
|
||||
|
||||
await service.index(document, connector_doc, llm=mocker.Mock())
|
||||
await service.index(document, connector_doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.id == document_id)
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@ from app.db import (
|
|||
pytestmark = pytest.mark.integration
|
||||
|
||||
UNIFIED_FIXTURES = (
|
||||
"patched_summarize",
|
||||
"patched_embed_texts",
|
||||
"patched_chunk_text",
|
||||
)
|
||||
|
|
@ -787,7 +786,7 @@ class TestPipelineIntegration:
|
|||
assert len(prepared) == 1
|
||||
|
||||
db_doc = prepared[0]
|
||||
result = await service.index(db_doc, doc, llm=mocker.Mock())
|
||||
result = await service.index(db_doc, doc)
|
||||
assert result is not None
|
||||
|
||||
docs = (
|
||||
|
|
@ -1272,7 +1271,7 @@ class TestIndexingProgressFlag:
|
|||
original_index = IndexingPipelineService.index
|
||||
flag_observed = []
|
||||
|
||||
async def patched_index(self_pipe, document, connector_doc, llm):
|
||||
async def patched_index(self_pipe, document, connector_doc):
|
||||
folder = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(
|
||||
|
|
@ -1284,7 +1283,7 @@ class TestIndexingProgressFlag:
|
|||
if folder:
|
||||
meta = folder.folder_metadata or {}
|
||||
flag_observed.append(meta.get("indexing_in_progress", False))
|
||||
return await original_index(self_pipe, document, connector_doc, llm)
|
||||
return await original_index(self_pipe, document, connector_doc)
|
||||
|
||||
IndexingPipelineService.index = patched_index
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -24,8 +24,6 @@ def _onedrive_doc(
|
|||
search_space_id=search_space_id,
|
||||
connector_id=connector_id,
|
||||
created_by_id=user_id,
|
||||
should_summarize=True,
|
||||
fallback_summary=f"File: {unique_id}.docx",
|
||||
metadata={
|
||||
"onedrive_file_id": unique_id,
|
||||
"onedrive_file_name": f"{unique_id}.docx",
|
||||
|
|
@ -35,7 +33,7 @@ def _onedrive_doc(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_onedrive_pipeline_creates_ready_document(
|
||||
db_session, db_search_space, db_connector, db_user, mocker
|
||||
|
|
@ -53,7 +51,7 @@ async def test_onedrive_pipeline_creates_ready_document(
|
|||
prepared = await service.prepare_for_indexing([doc])
|
||||
assert len(prepared) == 1
|
||||
|
||||
await service.index(prepared[0], doc, llm=mocker.Mock())
|
||||
await service.index(prepared[0], doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.search_space_id == space_id)
|
||||
|
|
@ -66,7 +64,7 @@ async def test_onedrive_pipeline_creates_ready_document(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_onedrive_duplicate_content_skipped(
|
||||
db_session, db_search_space, db_connector, db_user, mocker
|
||||
|
|
@ -86,7 +84,7 @@ async def test_onedrive_duplicate_content_skipped(
|
|||
|
||||
prepared = await service.prepare_for_indexing([doc])
|
||||
assert len(prepared) == 1
|
||||
await service.index(prepared[0], doc, llm=mocker.Mock())
|
||||
await service.index(prepared[0], doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.search_space_id == space_id)
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ async def test_new_document_is_persisted_with_pending_status(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_unchanged_ready_document_is_skipped(
|
||||
db_session,
|
||||
|
|
@ -47,7 +47,7 @@ async def test_unchanged_ready_document_is_skipped(
|
|||
|
||||
# Index fully so the document reaches ready state
|
||||
prepared = await service.prepare_for_indexing([doc])
|
||||
await service.index(prepared[0], doc, llm=mocker.Mock())
|
||||
await service.index(prepared[0], doc)
|
||||
|
||||
# Same content on the next run — a ready document must be skipped
|
||||
results = await service.prepare_for_indexing([doc])
|
||||
|
|
@ -56,7 +56,7 @@ async def test_unchanged_ready_document_is_skipped(
|
|||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text"
|
||||
"patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
async def test_title_only_change_updates_title_in_db(
|
||||
db_session,
|
||||
|
|
@ -72,7 +72,7 @@ async def test_title_only_change_updates_title_in_db(
|
|||
|
||||
prepared = await service.prepare_for_indexing([original])
|
||||
document_id = prepared[0].id
|
||||
await service.index(prepared[0], original, llm=mocker.Mock())
|
||||
await service.index(prepared[0], original)
|
||||
|
||||
renamed = make_connector_document(
|
||||
search_space_id=db_search_space.id, title="Updated Title"
|
||||
|
|
@ -338,9 +338,7 @@ async def test_same_content_from_different_source_is_skipped(
|
|||
assert len(result.scalars().all()) == 1
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
|
||||
)
|
||||
@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
|
||||
async def test_failed_document_with_unchanged_content_is_requeued(
|
||||
db_session,
|
||||
db_search_space,
|
||||
|
|
@ -351,10 +349,10 @@ async def test_failed_document_with_unchanged_content_is_requeued(
|
|||
doc = make_connector_document(search_space_id=db_search_space.id)
|
||||
service = IndexingPipelineService(session=db_session)
|
||||
|
||||
# First run: document is created and indexing crashes → status = failed
|
||||
# First run: document is created and indexing crashes, so status becomes failed.
|
||||
prepared = await service.prepare_for_indexing([doc])
|
||||
document_id = prepared[0].id
|
||||
await service.index(prepared[0], doc, llm=mocker.Mock())
|
||||
await service.index(prepared[0], doc)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.id == document_id)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue