test(backend): add E2E harness foundation (entrypoints, middleware, LLM/embedding fakes)

2026-05-12 01:02:39 +02:00 · 2026-05-06 17:17:42 +05:30 · 2026-05-06 17:17:42 +05:30 · 58ba95fad2
commit 58ba95fad2
parent c720866a67
9 changed files with 550 additions and 0 deletions
--- a/surfsense_backend/tests/e2e/fakes/init.py
+++ b/surfsense_backend/tests/e2e/fakes/init.py
@ -0,0 +1,8 @@
+"""Strict fakes for third-party SDKs, used in E2E mode only.
+
+Every fake here implements __getattr__ that raises NotImplementedError
+on any unknown surface. Combined with sys.modules-level hijacking in
+run_backend.py / run_celery.py, this makes silent pass-through to the
+real SDK impossible: a future production code path that introduces a
+new SDK call site fails CI with a clear "add this to the fake" message.
+"""
--- a/surfsense_backend/tests/e2e/fakes/embeddings.py
+++ b/surfsense_backend/tests/e2e/fakes/embeddings.py
@ -0,0 +1,79 @@
+"""Deterministic embedding fakes for E2E.
+
+Mirrors the existing `patched_embed_texts` fixture in
+`surfsense_backend/tests/integration/conftest.py`:
+
+    MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts])
+
+The dimension matches whatever `config.embedding_model_instance.dimension`
+returns in the running process so the fakes are vector-compatible with
+the documents.embedding pgvector column.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+import numpy as np
+
+from app.config import config
+
+logger = logging.getLogger(__name__)
+
+
+def _embedding_dim() -> int:
+    """Resolve the dimension once, lazily, so tests work for any embedding model."""
+    return int(config.embedding_model_instance.dimension)
+
+
+def fake_embed_text(text: str) -> np.ndarray:
+    """Deterministic single-text embedding."""
+    return np.full(shape=(_embedding_dim(),), fill_value=0.1, dtype=np.float32)
+
+
+def fake_embed_texts(texts: list[str]) -> list[np.ndarray]:
+    """Deterministic batch embedding. One vector per input text."""
+    if not texts:
+        return []
+    dim = _embedding_dim()
+    return [
+        np.full(shape=(dim,), fill_value=0.1, dtype=np.float32) for _ in texts
+    ]
+
+
+def install(patches: list[Any]) -> None:
+    """Install embedding patches at every binding site we know about.
+
+    Caller passes a `patches` list that the entrypoint will track in
+    order to start them (and, in principle, stop them on shutdown — we
+    intentionally never stop because the process exits when the test
+    server stops).
+    """
+    from unittest.mock import patch as _patch
+
+    targets = [
+        # Source binding (where the real implementation lives)
+        ("app.utils.document_converters.embed_text", fake_embed_text),
+        ("app.utils.document_converters.embed_texts", fake_embed_texts),
+        # Consumers that did `from app.utils.document_converters import embed_text/texts`
+        ("app.indexing_pipeline.document_embedder.embed_text", fake_embed_text),
+        ("app.indexing_pipeline.document_embedder.embed_texts", fake_embed_texts),
+        # Pipeline service binding (the actual call site for indexing.index)
+        ("app.indexing_pipeline.indexing_pipeline_service.embed_texts", fake_embed_texts),
+    ]
+    for target, replacement in targets:
+        try:
+            p = _patch(target, replacement)
+            p.start()
+            patches.append(p)
+            logger.info("[fake-embeddings] patched %s", target)
+        except (ModuleNotFoundError, AttributeError) as exc:
+            # If a future refactor moves a binding, fail loudly — silent
+            # passthrough to a real embedding model would be expensive
+            # and non-deterministic.
+            raise RuntimeError(
+                f"Could not patch embedding binding {target!r}: {exc!s}. "
+                f"Update surfsense_backend/tests/e2e/fakes/embeddings.py "
+                f"to point at the new binding site."
+            ) from exc
--- a/surfsense_backend/tests/e2e/fakes/llm.py
+++ b/surfsense_backend/tests/e2e/fakes/llm.py
@ -0,0 +1,48 @@
+"""Deterministic LLM fake for the E2E indexing pipeline.
+
+The production indexing pipeline summarizes documents with:
+
+    summary_chain = SUMMARY_PROMPT_TEMPLATE | llm
+    summary_result = await summary_chain.ainvoke({"document": ...})
+    summary_content = summary_result.content
+
+The `llm` parameter is supplied per-document by
+`app.services.llm_service.get_user_long_context_llm`. We patch THAT
+function to return a langchain-native FakeListChatModel so the rest of
+the chain works unchanged. No real LLM provider package is touched.
+
+Run-backend / run-celery use unittest.mock.patch.start() to install
+this at every binding site (the source module + every consumer that
+did `from app.services.llm_service import get_user_long_context_llm`
+at module load time).
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from langchain_core.language_models.fake_chat_models import FakeListChatModel
+
+logger = logging.getLogger(__name__)
+
+
+def _make_fake_llm() -> FakeListChatModel:
+    """Build a fresh FakeListChatModel that returns a deterministic summary."""
+    # FakeListChatModel cycles through `responses` for each invocation. We
+    # supply a single deterministic string. The summary content is tagged
+    # with a marker that specs CAN assert on if they want, but the
+    # primary indexing assertion is on the file content (chunked + stored
+    # separately by the pipeline).
+    fake = FakeListChatModel(
+        responses=[
+            "E2E_FAKE_SUMMARY: Indexed by Playwright E2E run with deterministic LLM stub."
+        ]
+    )
+    return fake
+
+
+async def fake_get_user_long_context_llm(*args: Any, **kwargs: Any) -> Any:
+    """Drop-in replacement for app.services.llm_service.get_user_long_context_llm."""
+    logger.info("[fake-llm] returning FakeListChatModel for E2E indexing")
+    return _make_fake_llm()