diff --git a/surfsense_backend/app/agents/new_chat/sandbox.py b/surfsense_backend/app/agents/new_chat/sandbox.py index 24b380b0b..3814a5208 100644 --- a/surfsense_backend/app/agents/new_chat/sandbox.py +++ b/surfsense_backend/app/agents/new_chat/sandbox.py @@ -147,7 +147,9 @@ async def delete_sandbox(thread_id: int | str) -> None: try: sandbox = client.find_one(labels=labels) except DaytonaError: - logger.debug("No sandbox to delete for thread %s (already removed)", thread_id) + logger.debug( + "No sandbox to delete for thread %s (already removed)", thread_id + ) return try: client.delete(sandbox) @@ -166,6 +168,7 @@ async def delete_sandbox(thread_id: int | str) -> None: # Local file persistence # --------------------------------------------------------------------------- + def _get_sandbox_files_dir() -> Path: return Path(os.environ.get("SANDBOX_FILES_DIR", "sandbox_files")) diff --git a/surfsense_backend/app/indexing_pipeline/connector_document.py b/surfsense_backend/app/indexing_pipeline/connector_document.py index ecd47bab2..019efe287 100644 --- a/surfsense_backend/app/indexing_pipeline/connector_document.py +++ b/surfsense_backend/app/indexing_pipeline/connector_document.py @@ -5,6 +5,7 @@ from app.db import DocumentType class ConnectorDocument(BaseModel): """Canonical data transfer object produced by connector adapters and consumed by the indexing pipeline.""" + title: str source_markdown: str unique_id: str diff --git a/surfsense_backend/app/indexing_pipeline/document_chunker.py b/surfsense_backend/app/indexing_pipeline/document_chunker.py index 719c9f4bb..4f3c698ef 100644 --- a/surfsense_backend/app/indexing_pipeline/document_chunker.py +++ b/surfsense_backend/app/indexing_pipeline/document_chunker.py @@ -3,5 +3,7 @@ from app.config import config def chunk_text(text: str, use_code_chunker: bool = False) -> list[str]: """Chunk a text string using the configured chunker and return the chunk texts.""" - chunker = config.code_chunker_instance if use_code_chunker else config.chunker_instance + chunker = ( + config.code_chunker_instance if use_code_chunker else config.chunker_instance + ) return [c.text for c in chunker.chunk(text)] diff --git a/surfsense_backend/app/indexing_pipeline/document_summarizer.py b/surfsense_backend/app/indexing_pipeline/document_summarizer.py index 1e708075e..76cc77377 100644 --- a/surfsense_backend/app/indexing_pipeline/document_summarizer.py +++ b/surfsense_backend/app/indexing_pipeline/document_summarizer.py @@ -2,7 +2,9 @@ from app.prompts import SUMMARY_PROMPT_TEMPLATE from app.utils.document_converters import optimize_content_for_context_window -async def summarize_document(source_markdown: str, llm, metadata: dict | None = None) -> str: +async def summarize_document( + source_markdown: str, llm, metadata: dict | None = None +) -> str: """Generate a text summary of a document using an LLM, prefixed with metadata when provided.""" model_name = getattr(llm, "model", "gpt-3.5-turbo") optimized_content = optimize_content_for_context_window( diff --git a/surfsense_backend/app/indexing_pipeline/exceptions.py b/surfsense_backend/app/indexing_pipeline/exceptions.py index 8c9c6f2d5..2a86dc76e 100644 --- a/surfsense_backend/app/indexing_pipeline/exceptions.py +++ b/surfsense_backend/app/indexing_pipeline/exceptions.py @@ -12,7 +12,6 @@ from litellm.exceptions import ( Timeout, UnprocessableEntityError, ) -from sqlalchemy.exc import IntegrityError # Tuples for use directly in except clauses. RETRYABLE_LLM_ERRORS = ( @@ -36,29 +35,33 @@ PERMANENT_LLM_ERRORS = ( # (LiteLLMEmbeddings, CohereEmbeddings, GeminiEmbeddings all normalize to RuntimeError). EMBEDDING_ERRORS = ( RuntimeError, # local device failure or API backend normalization - OSError, # model files missing or corrupted (local backends) - MemoryError, # document too large for available RAM + OSError, # model files missing or corrupted (local backends) + MemoryError, # document too large for available RAM ) class PipelineMessages: - RATE_LIMIT = "LLM rate limit exceeded. Will retry on next sync." - LLM_TIMEOUT = "LLM request timed out. Will retry on next sync." - LLM_UNAVAILABLE = "LLM service temporarily unavailable. Will retry on next sync." - LLM_BAD_GATEWAY = "LLM gateway error. Will retry on next sync." - LLM_SERVER_ERROR = "LLM internal server error. Will retry on next sync." - LLM_CONNECTION = "Could not reach the LLM service. Check network connectivity." + RATE_LIMIT = "LLM rate limit exceeded. Will retry on next sync." + LLM_TIMEOUT = "LLM request timed out. Will retry on next sync." + LLM_UNAVAILABLE = "LLM service temporarily unavailable. Will retry on next sync." + LLM_BAD_GATEWAY = "LLM gateway error. Will retry on next sync." + LLM_SERVER_ERROR = "LLM internal server error. Will retry on next sync." + LLM_CONNECTION = "Could not reach the LLM service. Check network connectivity." - LLM_AUTH = "LLM authentication failed. Check your API key." - LLM_PERMISSION = "LLM request denied. Check your account permissions." - LLM_NOT_FOUND = "LLM model not found. Check your model configuration." - LLM_BAD_REQUEST = "LLM rejected the request. Document content may be invalid." - LLM_UNPROCESSABLE = "Document exceeds the LLM context window even after optimization." - LLM_RESPONSE = "LLM returned an invalid response." + LLM_AUTH = "LLM authentication failed. Check your API key." + LLM_PERMISSION = "LLM request denied. Check your account permissions." + LLM_NOT_FOUND = "LLM model not found. Check your model configuration." + LLM_BAD_REQUEST = "LLM rejected the request. Document content may be invalid." + LLM_UNPROCESSABLE = ( + "Document exceeds the LLM context window even after optimization." + ) + LLM_RESPONSE = "LLM returned an invalid response." - EMBEDDING_FAILED = "Embedding failed. Check your embedding model configuration or service." - EMBEDDING_MODEL = "Embedding model files are missing or corrupted." - EMBEDDING_MEMORY = "Not enough memory to embed this document." + EMBEDDING_FAILED = ( + "Embedding failed. Check your embedding model configuration or service." + ) + EMBEDDING_MODEL = "Embedding model files are missing or corrupted." + EMBEDDING_MEMORY = "Not enough memory to embed this document." CHUNKING_OVERFLOW = "Document structure is too deeply nested to chunk." diff --git a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py index 7774a7347..eea3d6e25 100644 --- a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py +++ b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py @@ -2,6 +2,7 @@ import contextlib from datetime import UTC, datetime from sqlalchemy import delete, select +from sqlalchemy.exc import IntegrityError from sqlalchemy.ext.asyncio import AsyncSession from app.db import Chunk, Document, DocumentStatus @@ -21,7 +22,6 @@ from app.indexing_pipeline.exceptions import ( EMBEDDING_ERRORS, PERMANENT_LLM_ERRORS, RETRYABLE_LLM_ERRORS, - IntegrityError, PipelineMessages, embedding_message, llm_permanent_message, diff --git a/surfsense_backend/app/indexing_pipeline/pipeline_logger.py b/surfsense_backend/app/indexing_pipeline/pipeline_logger.py index 6571920cf..281a92c52 100644 --- a/surfsense_backend/app/indexing_pipeline/pipeline_logger.py +++ b/surfsense_backend/app/indexing_pipeline/pipeline_logger.py @@ -8,27 +8,29 @@ logger = logging.getLogger(__name__) class PipelineLogContext: connector_id: int | None search_space_id: int - unique_id: str # always available from ConnectorDocument - doc_id: int | None = None # set once the DB row exists (index phase only) + unique_id: str # always available from ConnectorDocument + doc_id: int | None = None # set once the DB row exists (index phase only) class LogMessages: # prepare_for_indexing - DOCUMENT_QUEUED = "New document queued for indexing." - DOCUMENT_UPDATED = "Document content changed, re-queued for indexing." - DOCUMENT_REQUEUED = "Stuck document re-queued for indexing." + DOCUMENT_QUEUED = "New document queued for indexing." + DOCUMENT_UPDATED = "Document content changed, re-queued for indexing." + DOCUMENT_REQUEUED = "Stuck document re-queued for indexing." DOC_SKIPPED_UNKNOWN = "Unexpected error — document skipped." - BATCH_ABORTED = "Fatal DB error — aborting prepare batch." - RACE_CONDITION = "Concurrent worker beat us to the commit — rolling back batch." + BATCH_ABORTED = "Fatal DB error — aborting prepare batch." + RACE_CONDITION = "Concurrent worker beat us to the commit — rolling back batch." # index - INDEX_STARTED = "Document indexing started." - INDEX_SUCCESS = "Document indexed successfully." - LLM_RETRYABLE = "Retryable LLM error — document marked failed, will retry on next sync." - LLM_PERMANENT = "Permanent LLM error — document marked failed." - EMBEDDING_FAILED = "Embedding error — document marked failed." - CHUNKING_OVERFLOW = "Chunking overflow — document marked failed." - UNEXPECTED = "Unexpected error — document marked failed." + INDEX_STARTED = "Document indexing started." + INDEX_SUCCESS = "Document indexed successfully." + LLM_RETRYABLE = ( + "Retryable LLM error — document marked failed, will retry on next sync." + ) + LLM_PERMANENT = "Permanent LLM error — document marked failed." + EMBEDDING_FAILED = "Embedding error — document marked failed." + CHUNKING_OVERFLOW = "Chunking overflow — document marked failed." + UNEXPECTED = "Unexpected error — document marked failed." def _format_context(ctx: PipelineLogContext) -> str: @@ -52,7 +54,9 @@ def _build_message(msg: str, ctx: PipelineLogContext, **extra) -> str: return msg -def _safe_log(level_fn, msg: str, ctx: PipelineLogContext, exc_info=None, **extra) -> None: +def _safe_log( + level_fn, msg: str, ctx: PipelineLogContext, exc_info=None, **extra +) -> None: # Logging must never raise — a broken log call inside an except block would # chain with the original exception and mask it entirely. try: @@ -64,6 +68,7 @@ def _safe_log(level_fn, msg: str, ctx: PipelineLogContext, exc_info=None, **extr # ── prepare_for_indexing ────────────────────────────────────────────────────── + def log_document_queued(ctx: PipelineLogContext) -> None: _safe_log(logger.info, LogMessages.DOCUMENT_QUEUED, ctx) @@ -77,7 +82,9 @@ def log_document_requeued(ctx: PipelineLogContext) -> None: def log_doc_skipped_unknown(ctx: PipelineLogContext, exc: Exception) -> None: - _safe_log(logger.warning, LogMessages.DOC_SKIPPED_UNKNOWN, ctx, exc_info=exc, error=exc) + _safe_log( + logger.warning, LogMessages.DOC_SKIPPED_UNKNOWN, ctx, exc_info=exc, error=exc + ) def log_race_condition(ctx: PipelineLogContext) -> None: @@ -90,6 +97,7 @@ def log_batch_aborted(ctx: PipelineLogContext, exc: Exception) -> None: # ── index ───────────────────────────────────────────────────────────────────── + def log_index_started(ctx: PipelineLogContext) -> None: _safe_log(logger.info, LogMessages.INDEX_STARTED, ctx) diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index 7856a2c17..28e02627c 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -10,6 +10,8 @@ These endpoints support the ThreadHistoryAdapter pattern from assistant-ui: - POST /threads/{thread_id}/messages - Append message """ +import asyncio +import logging from datetime import UTC, datetime from fastapi import APIRouter, Depends, HTTPException, Request @@ -52,9 +54,6 @@ from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat from app.users import current_active_user from app.utils.rbac import check_permission -import asyncio -import logging - _logger = logging.getLogger(__name__) router = APIRouter() @@ -75,11 +74,19 @@ def _try_delete_sandbox(thread_id: int) -> None: try: await delete_sandbox(thread_id) except Exception: - _logger.warning("Background sandbox delete failed for thread %s", thread_id, exc_info=True) + _logger.warning( + "Background sandbox delete failed for thread %s", + thread_id, + exc_info=True, + ) try: delete_local_sandbox_files(thread_id) except Exception: - _logger.warning("Local sandbox file cleanup failed for thread %s", thread_id, exc_info=True) + _logger.warning( + "Local sandbox file cleanup failed for thread %s", + thread_id, + exc_info=True, + ) try: loop = asyncio.get_running_loop() diff --git a/surfsense_backend/app/routes/sandbox_routes.py b/surfsense_backend/app/routes/sandbox_routes.py index e5b737371..2c12c3a1e 100644 --- a/surfsense_backend/app/routes/sandbox_routes.py +++ b/surfsense_backend/app/routes/sandbox_routes.py @@ -87,7 +87,7 @@ async def download_sandbox_file( # Fall back to live sandbox download try: sandbox = await get_or_create_sandbox(thread_id) - raw_sandbox = sandbox._sandbox # noqa: SLF001 + raw_sandbox = sandbox._sandbox content: bytes = await asyncio.to_thread(raw_sandbox.fs.download_file, path) except Exception as exc: logger.warning("Sandbox file download failed for %s: %s", path, exc) diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index bf942f548..a1d0b5cec 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -877,7 +877,9 @@ async def _stream_agent_events( output_text = om.group(1) if om else "" thread_id_str = config.get("configurable", {}).get("thread_id", "") - for sf_match in re.finditer(r"^SANDBOX_FILE:\s*(.+)$", output_text, re.MULTILINE): + for sf_match in re.finditer( + r"^SANDBOX_FILE:\s*(.+)$", output_text, re.MULTILINE + ): fpath = sf_match.group(1).strip() if fpath and fpath not in result.sandbox_files: result.sandbox_files.append(fpath) @@ -963,7 +965,10 @@ def _try_persist_and_delete_sandbox( sandbox_files: list[str], ) -> None: """Fire-and-forget: persist sandbox files locally then delete the sandbox.""" - from app.agents.new_chat.sandbox import is_sandbox_enabled, persist_and_delete_sandbox + from app.agents.new_chat.sandbox import ( + is_sandbox_enabled, + persist_and_delete_sandbox, + ) if not is_sandbox_enabled(): return diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index 38db67c60..181c2fb31 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -1886,11 +1886,11 @@ async def process_file_in_background_with_document( await task_logger.log_task_success( log_entry, f"Successfully processed file: {filename}", - { + { "document_id": doc_id, "content_hash": content_hash, "file_type": etl_service, - }, + }, ) return document diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml index 616f76fd5..36dc4e7c1 100644 --- a/surfsense_backend/pyproject.toml +++ b/surfsense_backend/pyproject.toml @@ -179,11 +179,7 @@ addopts = "-v --tb=short -x --strict-markers -ra --durations=5" markers = [ "unit: pure logic tests, no DB or external services", "integration: tests that require a real PostgreSQL database", - "document: document upload and processing tests", - "connector: connector indexing tests", - "chat: chat and agent tests", - "page_limit: page limit enforcement tests", - "upload_limit: file upload limit validation tests", + "e2e: tests requiring a running backend and real HTTP calls" ] filterwarnings = [ "ignore::UserWarning:chonkie", diff --git a/surfsense_backend/tests/conftest.py b/surfsense_backend/tests/conftest.py index 54a3b176f..2c8e81fa3 100644 --- a/surfsense_backend/tests/conftest.py +++ b/surfsense_backend/tests/conftest.py @@ -46,6 +46,7 @@ def make_connector_document(): Generic factory for unit tests. Overridden in tests/integration/conftest.py with real DB-backed IDs for integration tests. """ + def _make(**overrides): defaults = { "title": "Test Document", @@ -58,4 +59,5 @@ def make_connector_document(): } defaults.update(overrides) return ConnectorDocument(**defaults) + return _make diff --git a/surfsense_backend/tests/e2e/conftest.py b/surfsense_backend/tests/e2e/conftest.py index 6aca593e8..4c418612c 100644 --- a/surfsense_backend/tests/e2e/conftest.py +++ b/surfsense_backend/tests/e2e/conftest.py @@ -18,7 +18,6 @@ from tests.utils.helpers import ( get_search_space_id, ) - # --------------------------------------------------------------------------- # Backend connectivity fixtures # --------------------------------------------------------------------------- diff --git a/surfsense_backend/tests/e2e/test_document_upload.py b/surfsense_backend/tests/e2e/test_document_upload.py index c1193a63b..f89295d5f 100644 --- a/surfsense_backend/tests/e2e/test_document_upload.py +++ b/surfsense_backend/tests/e2e/test_document_upload.py @@ -28,7 +28,7 @@ from tests.utils.helpers import ( upload_multiple_files, ) -pytestmark = pytest.mark.document +pytestmark = pytest.mark.e2e # --------------------------------------------------------------------------- # Helpers local to this module diff --git a/surfsense_backend/tests/e2e/test_page_limits.py b/surfsense_backend/tests/e2e/test_page_limits.py index 6d54bcb09..092772ceb 100644 --- a/surfsense_backend/tests/e2e/test_page_limits.py +++ b/surfsense_backend/tests/e2e/test_page_limits.py @@ -31,7 +31,7 @@ from tests.utils.helpers import ( upload_file, ) -pytestmark = pytest.mark.page_limit +pytestmark = pytest.mark.e2e # --------------------------------------------------------------------------- diff --git a/surfsense_backend/tests/e2e/test_upload_limits.py b/surfsense_backend/tests/e2e/test_upload_limits.py index b81e701e6..9368b0f43 100644 --- a/surfsense_backend/tests/e2e/test_upload_limits.py +++ b/surfsense_backend/tests/e2e/test_upload_limits.py @@ -21,7 +21,7 @@ import io import httpx import pytest -pytestmark = pytest.mark.upload_limit +pytestmark = pytest.mark.e2e # --------------------------------------------------------------------------- diff --git a/surfsense_backend/tests/integration/conftest.py b/surfsense_backend/tests/integration/conftest.py index 99e182c6b..119045d29 100644 --- a/surfsense_backend/tests/integration/conftest.py +++ b/surfsense_backend/tests/integration/conftest.py @@ -1,4 +1,3 @@ - import os import uuid from unittest.mock import AsyncMock, MagicMock @@ -9,14 +8,21 @@ from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.pool import NullPool -from app.db import Base, SearchSpace, SearchSourceConnector, SearchSourceConnectorType -from app.db import User -from app.db import DocumentType +from app.db import ( + Base, + DocumentType, + SearchSourceConnector, + SearchSourceConnectorType, + SearchSpace, + User, +) from app.indexing_pipeline.connector_document import ConnectorDocument _EMBEDDING_DIM = 1024 # must match the Vector() dimension used in DB column creation -_DEFAULT_TEST_DB = "postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense_test" +_DEFAULT_TEST_DB = ( + "postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense_test" +) TEST_DATABASE_URL = os.environ.get("TEST_DATABASE_URL", _DEFAULT_TEST_DB) @@ -80,7 +86,9 @@ async def db_user(db_session: AsyncSession) -> User: @pytest_asyncio.fixture -async def db_connector(db_session: AsyncSession, db_user: User, db_search_space: "SearchSpace") -> SearchSourceConnector: +async def db_connector( + db_session: AsyncSession, db_user: User, db_search_space: "SearchSpace" +) -> SearchSourceConnector: connector = SearchSourceConnector( name="Test Connector", connector_type=SearchSourceConnectorType.CLICKUP_CONNECTOR, @@ -147,6 +155,7 @@ def patched_chunk_text(monkeypatch) -> MagicMock: @pytest.fixture def make_connector_document(db_connector, db_user): """Integration-scoped override: uses real DB connector and user IDs.""" + def _make(**overrides): defaults = { "title": "Test Document", @@ -159,6 +168,5 @@ def make_connector_document(db_connector, db_user): } defaults.update(overrides) return ConnectorDocument(**defaults) + return _make - - diff --git a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py index 723c0e13b..193e4bd80 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py @@ -7,7 +7,9 @@ from app.indexing_pipeline.adapters.file_upload_adapter import index_uploaded_fi pytestmark = pytest.mark.integration -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_sets_status_ready(db_session, db_search_space, db_user, mocker): """Document status is READY after successful indexing.""" await index_uploaded_file( @@ -28,7 +30,9 @@ async def test_sets_status_ready(db_session, db_search_space, db_user, mocker): assert DocumentStatus.is_state(document.status, DocumentStatus.READY) -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_content_is_summary(db_session, db_search_space, db_user, mocker): """Document content is set to the LLM-generated summary.""" await index_uploaded_file( @@ -49,7 +53,9 @@ async def test_content_is_summary(db_session, db_search_space, db_user, mocker): assert document.content == "Mocked summary." -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker): """Chunks derived from the source markdown are persisted in the DB.""" await index_uploaded_file( @@ -76,7 +82,9 @@ async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker assert chunks[0].content == "Test chunk content." -@pytest.mark.usefixtures("patched_summarize_raises", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize_raises", "patched_embed_text", "patched_chunk_text" +) async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, mocker): """RuntimeError is raised when the indexing step fails so the caller can fire a failure notification.""" with pytest.raises(RuntimeError): diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py b/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py index 7c5e1e4f4..0065a03e1 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py @@ -7,9 +7,14 @@ from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineServ pytestmark = pytest.mark.integration -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_sets_status_ready( - db_session, db_search_space, make_connector_document, mocker, + db_session, + db_search_space, + make_connector_document, + mocker, ): """Document status is READY after successful indexing.""" connector_doc = make_connector_document(search_space_id=db_search_space.id) @@ -21,15 +26,22 @@ async def test_sets_status_ready( await service.index(document, connector_doc, llm=mocker.Mock()) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) reloaded = result.scalars().first() assert DocumentStatus.is_state(reloaded.status, DocumentStatus.READY) -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_content_is_summary_when_should_summarize_true( - db_session, db_search_space, make_connector_document, mocker, + db_session, + db_search_space, + make_connector_document, + mocker, ): """Document content is set to the LLM-generated summary when should_summarize=True.""" connector_doc = make_connector_document(search_space_id=db_search_space.id) @@ -41,15 +53,21 @@ async def test_content_is_summary_when_should_summarize_true( await service.index(document, connector_doc, llm=mocker.Mock()) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) reloaded = result.scalars().first() assert reloaded.content == "Mocked summary." -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_content_is_source_markdown_when_should_summarize_false( - db_session, db_search_space, make_connector_document, + db_session, + db_search_space, + make_connector_document, ): """Document content is set to source_markdown verbatim when should_summarize=False.""" connector_doc = make_connector_document( @@ -65,15 +83,22 @@ async def test_content_is_source_markdown_when_should_summarize_false( await service.index(document, connector_doc, llm=None) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) reloaded = result.scalars().first() assert reloaded.content == "## Raw content" -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_chunks_written_to_db( - db_session, db_search_space, make_connector_document, mocker, + db_session, + db_search_space, + make_connector_document, + mocker, ): """Chunks derived from source_markdown are persisted in the DB.""" connector_doc = make_connector_document(search_space_id=db_search_space.id) @@ -94,9 +119,14 @@ async def test_chunks_written_to_db( assert chunks[0].content == "Test chunk content." -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_embedding_written_to_db( - db_session, db_search_space, make_connector_document, mocker, + db_session, + db_search_space, + make_connector_document, + mocker, ): """Document embedding vector is persisted in the DB after indexing.""" connector_doc = make_connector_document(search_space_id=db_search_space.id) @@ -108,16 +138,23 @@ async def test_embedding_written_to_db( await service.index(document, connector_doc, llm=mocker.Mock()) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) reloaded = result.scalars().first() assert reloaded.embedding is not None assert len(reloaded.embedding) == 1024 -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_updated_at_advances_after_indexing( - db_session, db_search_space, make_connector_document, mocker, + db_session, + db_search_space, + make_connector_document, + mocker, ): """updated_at timestamp is later after indexing than it was at prepare time.""" connector_doc = make_connector_document(search_space_id=db_search_space.id) @@ -127,20 +164,28 @@ async def test_updated_at_advances_after_indexing( document = prepared[0] document_id = document.id - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) updated_at_pending = result.scalars().first().updated_at await service.index(document, connector_doc, llm=mocker.Mock()) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) updated_at_ready = result.scalars().first().updated_at assert updated_at_ready > updated_at_pending -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_no_llm_falls_back_to_source_markdown( - db_session, db_search_space, make_connector_document, + db_session, + db_search_space, + make_connector_document, ): """When llm=None and no fallback_summary, content falls back to source_markdown.""" connector_doc = make_connector_document( @@ -156,16 +201,22 @@ async def test_no_llm_falls_back_to_source_markdown( await service.index(document, connector_doc, llm=None) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) reloaded = result.scalars().first() assert DocumentStatus.is_state(reloaded.status, DocumentStatus.READY) assert reloaded.content == "## Fallback content" -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_fallback_summary_used_when_llm_unavailable( - db_session, db_search_space, make_connector_document, + db_session, + db_search_space, + make_connector_document, ): """fallback_summary is used as content when llm=None and should_summarize=True.""" connector_doc = make_connector_document( @@ -181,16 +232,23 @@ async def test_fallback_summary_used_when_llm_unavailable( await service.index(prepared[0], connector_doc, llm=None) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) reloaded = result.scalars().first() assert DocumentStatus.is_state(reloaded.status, DocumentStatus.READY) assert reloaded.content == "Short pre-built summary." -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_reindex_replaces_old_chunks( - db_session, db_search_space, make_connector_document, mocker, + db_session, + db_search_space, + make_connector_document, + mocker, ): """Re-indexing a document replaces its old chunks rather than appending.""" connector_doc = make_connector_document( @@ -220,9 +278,14 @@ async def test_reindex_replaces_old_chunks( assert len(chunks) == 1 -@pytest.mark.usefixtures("patched_summarize_raises", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize_raises", "patched_embed_text", "patched_chunk_text" +) async def test_llm_error_sets_status_failed( - db_session, db_search_space, make_connector_document, mocker, + db_session, + db_search_space, + make_connector_document, + mocker, ): """Document status is FAILED when the LLM raises during indexing.""" connector_doc = make_connector_document(search_space_id=db_search_space.id) @@ -234,15 +297,22 @@ async def test_llm_error_sets_status_failed( await service.index(document, connector_doc, llm=mocker.Mock()) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) reloaded = result.scalars().first() assert DocumentStatus.is_state(reloaded.status, DocumentStatus.FAILED) -@pytest.mark.usefixtures("patched_summarize_raises", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize_raises", "patched_embed_text", "patched_chunk_text" +) async def test_llm_error_leaves_no_partial_data( - db_session, db_search_space, make_connector_document, mocker, + db_session, + db_search_space, + make_connector_document, + mocker, ): """A failed indexing attempt leaves no partial embedding or chunks in the DB.""" connector_doc = make_connector_document(search_space_id=db_search_space.id) @@ -254,7 +324,9 @@ async def test_llm_error_leaves_no_partial_data( await service.index(document, connector_doc, llm=mocker.Mock()) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) reloaded = result.scalars().first() assert reloaded.embedding is None diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py index b6d257f7a..837b02c9f 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py @@ -2,7 +2,9 @@ import pytest from sqlalchemy import select from app.db import Document, DocumentStatus -from app.indexing_pipeline.document_hashing import compute_content_hash as real_compute_content_hash +from app.indexing_pipeline.document_hashing import ( + compute_content_hash as real_compute_content_hash, +) from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService pytestmark = pytest.mark.integration @@ -20,7 +22,9 @@ async def test_new_document_is_persisted_with_pending_status( assert len(results) == 1 document_id = results[0].id - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) reloaded = result.scalars().first() assert reloaded is not None @@ -28,9 +32,14 @@ async def test_new_document_is_persisted_with_pending_status( assert reloaded.source_markdown == doc.source_markdown -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_unchanged_ready_document_is_skipped( - db_session, db_search_space, make_connector_document, mocker, + db_session, + db_search_space, + make_connector_document, + mocker, ): """A READY document with unchanged content is not returned for re-indexing.""" doc = make_connector_document(search_space_id=db_search_space.id) @@ -46,24 +55,35 @@ async def test_unchanged_ready_document_is_skipped( assert results == [] -@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize", "patched_embed_text", "patched_chunk_text" +) async def test_title_only_change_updates_title_in_db( - db_session, db_search_space, make_connector_document, mocker, + db_session, + db_search_space, + make_connector_document, + mocker, ): """A title-only change updates the DB title without re-queuing the document.""" - original = make_connector_document(search_space_id=db_search_space.id, title="Original Title") + original = make_connector_document( + search_space_id=db_search_space.id, title="Original Title" + ) service = IndexingPipelineService(session=db_session) prepared = await service.prepare_for_indexing([original]) document_id = prepared[0].id await service.index(prepared[0], original, llm=mocker.Mock()) - renamed = make_connector_document(search_space_id=db_search_space.id, title="Updated Title") + renamed = make_connector_document( + search_space_id=db_search_space.id, title="Updated Title" + ) results = await service.prepare_for_indexing([renamed]) assert results == [] - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) reloaded = result.scalars().first() assert reloaded.title == "Updated Title" @@ -73,19 +93,25 @@ async def test_changed_content_is_returned_for_reprocessing( db_session, db_search_space, make_connector_document ): """A document with changed content is returned for re-indexing with updated markdown.""" - original = make_connector_document(search_space_id=db_search_space.id, source_markdown="## v1") + original = make_connector_document( + search_space_id=db_search_space.id, source_markdown="## v1" + ) service = IndexingPipelineService(session=db_session) first = await service.prepare_for_indexing([original]) original_id = first[0].id - updated = make_connector_document(search_space_id=db_search_space.id, source_markdown="## v2") + updated = make_connector_document( + search_space_id=db_search_space.id, source_markdown="## v2" + ) results = await service.prepare_for_indexing([updated]) assert len(results) == 1 assert results[0].id == original_id - result = await db_session.execute(select(Document).filter(Document.id == original_id)) + result = await db_session.execute( + select(Document).filter(Document.id == original_id) + ) reloaded = result.scalars().first() assert reloaded.source_markdown == "## v2" @@ -97,9 +123,24 @@ async def test_all_documents_in_batch_are_persisted( ): """All documents in a batch are persisted and returned.""" docs = [ - make_connector_document(search_space_id=db_search_space.id, unique_id="id-1", title="Doc 1", source_markdown="## Content 1"), - make_connector_document(search_space_id=db_search_space.id, unique_id="id-2", title="Doc 2", source_markdown="## Content 2"), - make_connector_document(search_space_id=db_search_space.id, unique_id="id-3", title="Doc 3", source_markdown="## Content 3"), + make_connector_document( + search_space_id=db_search_space.id, + unique_id="id-1", + title="Doc 1", + source_markdown="## Content 1", + ), + make_connector_document( + search_space_id=db_search_space.id, + unique_id="id-2", + title="Doc 2", + source_markdown="## Content 2", + ), + make_connector_document( + search_space_id=db_search_space.id, + unique_id="id-3", + title="Doc 3", + source_markdown="## Content 3", + ), ] service = IndexingPipelineService(session=db_session) @@ -107,7 +148,9 @@ async def test_all_documents_in_batch_are_persisted( assert len(results) == 3 - result = await db_session.execute(select(Document).filter(Document.search_space_id == db_search_space.id)) + result = await db_session.execute( + select(Document).filter(Document.search_space_id == db_search_space.id) + ) rows = result.scalars().all() assert len(rows) == 3 @@ -124,7 +167,9 @@ async def test_duplicate_in_batch_is_persisted_once( assert len(results) == 1 - result = await db_session.execute(select(Document).filter(Document.search_space_id == db_search_space.id)) + result = await db_session.execute( + select(Document).filter(Document.search_space_id == db_search_space.id) + ) rows = result.scalars().all() assert len(rows) == 1 @@ -143,7 +188,9 @@ async def test_created_by_id_is_persisted( results = await service.prepare_for_indexing([doc]) document_id = results[0].id - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) reloaded = result.scalars().first() assert str(reloaded.created_by_id) == str(db_user.id) @@ -170,7 +217,9 @@ async def test_metadata_is_updated_when_content_changes( ) await service.prepare_for_indexing([updated]) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) reloaded = result.scalars().first() assert reloaded.document_metadata == {"status": "done"} @@ -180,19 +229,27 @@ async def test_updated_at_advances_when_title_only_changes( db_session, db_search_space, make_connector_document ): """updated_at advances even when only the title changes.""" - original = make_connector_document(search_space_id=db_search_space.id, title="Old Title") + original = make_connector_document( + search_space_id=db_search_space.id, title="Old Title" + ) service = IndexingPipelineService(session=db_session) first = await service.prepare_for_indexing([original]) document_id = first[0].id - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) updated_at_v1 = result.scalars().first().updated_at - renamed = make_connector_document(search_space_id=db_search_space.id, title="New Title") + renamed = make_connector_document( + search_space_id=db_search_space.id, title="New Title" + ) await service.prepare_for_indexing([renamed]) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) updated_at_v2 = result.scalars().first().updated_at assert updated_at_v2 > updated_at_v1 @@ -202,19 +259,27 @@ async def test_updated_at_advances_when_content_changes( db_session, db_search_space, make_connector_document ): """updated_at advances when document content changes.""" - original = make_connector_document(search_space_id=db_search_space.id, source_markdown="## v1") + original = make_connector_document( + search_space_id=db_search_space.id, source_markdown="## v1" + ) service = IndexingPipelineService(session=db_session) first = await service.prepare_for_indexing([original]) document_id = first[0].id - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) updated_at_v1 = result.scalars().first().updated_at - updated = make_connector_document(search_space_id=db_search_space.id, source_markdown="## v2") + updated = make_connector_document( + search_space_id=db_search_space.id, source_markdown="## v2" + ) await service.prepare_for_indexing([updated]) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) updated_at_v2 = result.scalars().first().updated_at assert updated_at_v2 > updated_at_v1 @@ -273,9 +338,14 @@ async def test_same_content_from_different_source_is_skipped( assert len(result.scalars().all()) == 1 -@pytest.mark.usefixtures("patched_summarize_raises", "patched_embed_text", "patched_chunk_text") +@pytest.mark.usefixtures( + "patched_summarize_raises", "patched_embed_text", "patched_chunk_text" +) async def test_failed_document_with_unchanged_content_is_requeued( - db_session, db_search_space, make_connector_document, mocker, + db_session, + db_search_space, + make_connector_document, + mocker, ): """A FAILED document with unchanged content is re-queued as PENDING on the next run.""" doc = make_connector_document(search_space_id=db_search_space.id) @@ -286,8 +356,12 @@ async def test_failed_document_with_unchanged_content_is_requeued( document_id = prepared[0].id await service.index(prepared[0], doc, llm=mocker.Mock()) - result = await db_session.execute(select(Document).filter(Document.id == document_id)) - assert DocumentStatus.is_state(result.scalars().first().status, DocumentStatus.FAILED) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) + assert DocumentStatus.is_state( + result.scalars().first().status, DocumentStatus.FAILED + ) # Next run: same content, pipeline must re-queue the failed document results = await service.prepare_for_indexing([doc]) @@ -295,8 +369,12 @@ async def test_failed_document_with_unchanged_content_is_requeued( assert len(results) == 1 assert results[0].id == document_id - result = await db_session.execute(select(Document).filter(Document.id == document_id)) - assert DocumentStatus.is_state(result.scalars().first().status, DocumentStatus.PENDING) + result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) + assert DocumentStatus.is_state( + result.scalars().first().status, DocumentStatus.PENDING + ) async def test_title_and_content_change_updates_both_and_returns_document( @@ -323,16 +401,20 @@ async def test_title_and_content_change_updates_both_and_returns_document( assert len(results) == 1 assert results[0].id == original_id - result = await db_session.execute(select(Document).filter(Document.id == original_id)) + result = await db_session.execute( + select(Document).filter(Document.id == original_id) + ) reloaded = result.scalars().first() assert reloaded.title == "Updated Title" assert reloaded.source_markdown == "## v2" - async def test_one_bad_document_in_batch_does_not_prevent_others_from_being_persisted( - db_session, db_search_space, make_connector_document, monkeypatch, + db_session, + db_search_space, + make_connector_document, + monkeypatch, ): """ A per-document error during prepare_for_indexing must be isolated. @@ -374,4 +456,4 @@ async def test_one_bad_document_in_batch_does_not_prevent_others_from_being_pers result = await db_session.execute( select(Document).filter(Document.search_space_id == db_search_space.id) ) - assert len(result.scalars().all()) == 2 \ No newline at end of file + assert len(result.scalars().all()) == 2 diff --git a/surfsense_backend/tests/unit/indexing_pipeline/conftest.py b/surfsense_backend/tests/unit/indexing_pipeline/conftest.py index 2147cfa3f..11f84dce5 100644 --- a/surfsense_backend/tests/unit/indexing_pipeline/conftest.py +++ b/surfsense_backend/tests/unit/indexing_pipeline/conftest.py @@ -1,6 +1,7 @@ -import pytest from unittest.mock import AsyncMock, MagicMock +import pytest + @pytest.fixture def patched_summarizer_chain(monkeypatch): @@ -21,7 +22,9 @@ def patched_summarizer_chain(monkeypatch): def patched_chunker_instance(monkeypatch): mock = MagicMock() mock.chunk.return_value = [MagicMock(text="prose chunk")] - monkeypatch.setattr("app.indexing_pipeline.document_chunker.config.chunker_instance", mock) + monkeypatch.setattr( + "app.indexing_pipeline.document_chunker.config.chunker_instance", mock + ) return mock @@ -29,5 +32,7 @@ def patched_chunker_instance(monkeypatch): def patched_code_chunker_instance(monkeypatch): mock = MagicMock() mock.chunk.return_value = [MagicMock(text="code chunk")] - monkeypatch.setattr("app.indexing_pipeline.document_chunker.config.code_chunker_instance", mock) + monkeypatch.setattr( + "app.indexing_pipeline.document_chunker.config.code_chunker_instance", mock + ) return mock diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py b/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py index 6b7a47f51..fe536b066 100644 --- a/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py +++ b/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py @@ -1,7 +1,10 @@ import pytest from app.db import DocumentType -from app.indexing_pipeline.document_hashing import compute_content_hash, compute_unique_identifier_hash +from app.indexing_pipeline.document_hashing import ( + compute_content_hash, + compute_unique_identifier_hash, +) pytestmark = pytest.mark.unit @@ -10,21 +13,31 @@ def test_different_unique_id_produces_different_hash(make_connector_document): """Two documents with different unique_ids produce different identifier hashes.""" doc_a = make_connector_document(unique_id="id-001") doc_b = make_connector_document(unique_id="id-002") - assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b) + assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash( + doc_b + ) -def test_different_search_space_produces_different_identifier_hash(make_connector_document): +def test_different_search_space_produces_different_identifier_hash( + make_connector_document, +): """Same document in different search spaces produces different identifier hashes.""" doc_a = make_connector_document(search_space_id=1) doc_b = make_connector_document(search_space_id=2) - assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b) + assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash( + doc_b + ) -def test_different_document_type_produces_different_identifier_hash(make_connector_document): +def test_different_document_type_produces_different_identifier_hash( + make_connector_document, +): """Same unique_id with different document types produces different identifier hashes.""" doc_a = make_connector_document(document_type=DocumentType.CLICKUP_CONNECTOR) doc_b = make_connector_document(document_type=DocumentType.NOTION_CONNECTOR) - assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b) + assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash( + doc_b + ) def test_same_content_same_space_produces_same_content_hash(make_connector_document): @@ -34,7 +47,9 @@ def test_same_content_same_space_produces_same_content_hash(make_connector_docum assert compute_content_hash(doc_a) == compute_content_hash(doc_b) -def test_same_content_different_space_produces_different_content_hash(make_connector_document): +def test_same_content_different_space_produces_different_content_hash( + make_connector_document, +): """Identical content in different search spaces produces different content hashes.""" doc_a = make_connector_document(source_markdown="Hello world", search_space_id=1) doc_b = make_connector_document(source_markdown="Hello world", search_space_id=2) diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_document_summarizer.py b/surfsense_backend/tests/unit/indexing_pipeline/test_document_summarizer.py index a3a8ecfc2..eee32357f 100644 --- a/surfsense_backend/tests/unit/indexing_pipeline/test_document_summarizer.py +++ b/surfsense_backend/tests/unit/indexing_pipeline/test_document_summarizer.py @@ -1,6 +1,7 @@ -import pytest from unittest.mock import MagicMock +import pytest + from app.indexing_pipeline.document_summarizer import summarize_document pytestmark = pytest.mark.unit @@ -38,5 +39,3 @@ async def test_with_metadata_omits_empty_fields_from_output(): assert "Alice" in result assert "description" not in result.lower() - -