test(backend): add E2E harness foundation (entrypoints, middleware, LLM/embedding fakes)

2026-05-12 09:12:40 +02:00 · 2026-05-06 17:17:42 +05:30 · 2026-05-06 17:17:42 +05:30 · 58ba95fad2
commit 58ba95fad2
parent c720866a67
9 changed files with 550 additions and 0 deletions
--- a/surfsense_backend/tests/e2e/README.md
+++ b/surfsense_backend/tests/e2e/README.md
@ -0,0 +1,69 @@
+# Backend E2E Test Harness
+
+Strict fakes + alternative entrypoints used **only** by Playwright E2E.
+Excluded from the production Docker image via `.dockerignore`.
+
+## Files
+
+| Path                             | Role                                                                            |
+| -------------------------------- | ------------------------------------------------------------------------------- |
+| `run_backend.py`                 | FastAPI entrypoint that hijacks `sys.modules` before importing `app.app:app`    |
+| `run_celery.py`                  | Celery worker entrypoint with the same hijack + patch logic                     |
+| `middleware/scenario.py`         | `X-E2E-Scenario` header → ContextVar (read by fakes)                            |
+| `fakes/composio_module.py`       | Strict drop-in for the `composio` package; raises on unknown surface            |
+| `fakes/llm.py`                   | `fake_get_user_long_context_llm` returning a `FakeListChatModel`                |
+| `fakes/embeddings.py`            | Deterministic 0.1-vector `embed_text` / `embed_texts`                           |
+| `fakes/fixtures/drive_files.json`| Canned Drive listings + file contents (incl. canary tokens)                     |
+
+## Why a sys.modules hijack?
+
+Production code does `from composio import Composio` at module load
+time. By the time the FastAPI app object exists, that binding has
+already been resolved. The hijack runs **before** any `app.*` import,
+so the binding resolves to our strict fake. No production source
+changes; fakes are physically excluded from production images.
+
+Belt + suspenders + no internet: the strict `__getattr__` in every
+fake raises `NotImplementedError` if a future production code path
+introduces a new SDK call. CI also sets `HTTPS_PROXY=http://127.0.0.1:1`
+plus sentinel API keys so any leaked outbound HTTP fails immediately.
+
+## Adding a new fake
+
+1. Create `fakes/<sdk>_module.py` modelled on `composio_module.py`.
+2. In `run_backend.py` and `run_celery.py`, register
+   `sys.modules["<sdk>"] = _fake_<sdk>` before the `from app.app import app`
+   line.
+3. If the new fake needs scenario branching, read from
+   `tests.e2e.middleware.scenario.current_scenario()`.
+
+## Reused by backend integration tests
+
+The strict fakes are not only for Playwright. Backend route integration
+tests can import the same fake before importing `app.app`, so Composio
+route tests exercise production route code without touching the real
+SDK:
+
+```python
+from tests.e2e.fakes import composio_module as _fake_composio
+sys.modules["composio"] = _fake_composio
+from app.app import app
+```
+
+See `surfsense_backend/tests/integration/composio/conftest.py` for the
+current pattern.
+
+## Running locally
+
+```bash
+cd surfsense_backend
+uv run python tests/e2e/run_backend.py
+# in a second shell:
+uv run python tests/e2e/run_celery.py
+```
+
+Then in `surfsense_web`:
+
+```bash
+pnpm test:e2e
+```
--- a/surfsense_backend/tests/e2e/init.py
+++ b/surfsense_backend/tests/e2e/init.py
@ -0,0 +1,7 @@
+"""E2E test harness root.
+
+This package is loaded only by the test entrypoints
+(`tests/e2e/run_backend.py`, `tests/e2e/run_celery.py`). It is excluded
+from the production Docker image via `surfsense_backend/.dockerignore`,
+so production binaries never see this code.
+"""
--- a/surfsense_backend/tests/e2e/fakes/init.py
+++ b/surfsense_backend/tests/e2e/fakes/init.py
@ -0,0 +1,8 @@
+"""Strict fakes for third-party SDKs, used in E2E mode only.
+
+Every fake here implements __getattr__ that raises NotImplementedError
+on any unknown surface. Combined with sys.modules-level hijacking in
+run_backend.py / run_celery.py, this makes silent pass-through to the
+real SDK impossible: a future production code path that introduces a
+new SDK call site fails CI with a clear "add this to the fake" message.
+"""
--- a/surfsense_backend/tests/e2e/fakes/embeddings.py
+++ b/surfsense_backend/tests/e2e/fakes/embeddings.py
@ -0,0 +1,79 @@
+"""Deterministic embedding fakes for E2E.
+
+Mirrors the existing `patched_embed_texts` fixture in
+`surfsense_backend/tests/integration/conftest.py`:
+
+    MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts])
+
+The dimension matches whatever `config.embedding_model_instance.dimension`
+returns in the running process so the fakes are vector-compatible with
+the documents.embedding pgvector column.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+import numpy as np
+
+from app.config import config
+
+logger = logging.getLogger(__name__)
+
+
+def _embedding_dim() -> int:
+    """Resolve the dimension once, lazily, so tests work for any embedding model."""
+    return int(config.embedding_model_instance.dimension)
+
+
+def fake_embed_text(text: str) -> np.ndarray:
+    """Deterministic single-text embedding."""
+    return np.full(shape=(_embedding_dim(),), fill_value=0.1, dtype=np.float32)
+
+
+def fake_embed_texts(texts: list[str]) -> list[np.ndarray]:
+    """Deterministic batch embedding. One vector per input text."""
+    if not texts:
+        return []
+    dim = _embedding_dim()
+    return [
+        np.full(shape=(dim,), fill_value=0.1, dtype=np.float32) for _ in texts
+    ]
+
+
+def install(patches: list[Any]) -> None:
+    """Install embedding patches at every binding site we know about.
+
+    Caller passes a `patches` list that the entrypoint will track in
+    order to start them (and, in principle, stop them on shutdown — we
+    intentionally never stop because the process exits when the test
+    server stops).
+    """
+    from unittest.mock import patch as _patch
+
+    targets = [
+        # Source binding (where the real implementation lives)
+        ("app.utils.document_converters.embed_text", fake_embed_text),
+        ("app.utils.document_converters.embed_texts", fake_embed_texts),
+        # Consumers that did `from app.utils.document_converters import embed_text/texts`
+        ("app.indexing_pipeline.document_embedder.embed_text", fake_embed_text),
+        ("app.indexing_pipeline.document_embedder.embed_texts", fake_embed_texts),
+        # Pipeline service binding (the actual call site for indexing.index)
+        ("app.indexing_pipeline.indexing_pipeline_service.embed_texts", fake_embed_texts),
+    ]
+    for target, replacement in targets:
+        try:
+            p = _patch(target, replacement)
+            p.start()
+            patches.append(p)
+            logger.info("[fake-embeddings] patched %s", target)
+        except (ModuleNotFoundError, AttributeError) as exc:
+            # If a future refactor moves a binding, fail loudly — silent
+            # passthrough to a real embedding model would be expensive
+            # and non-deterministic.
+            raise RuntimeError(
+                f"Could not patch embedding binding {target!r}: {exc!s}. "
+                f"Update surfsense_backend/tests/e2e/fakes/embeddings.py "
+                f"to point at the new binding site."
+            ) from exc
--- a/surfsense_backend/tests/e2e/fakes/llm.py
+++ b/surfsense_backend/tests/e2e/fakes/llm.py
@ -0,0 +1,48 @@
+"""Deterministic LLM fake for the E2E indexing pipeline.
+
+The production indexing pipeline summarizes documents with:
+
+    summary_chain = SUMMARY_PROMPT_TEMPLATE | llm
+    summary_result = await summary_chain.ainvoke({"document": ...})
+    summary_content = summary_result.content
+
+The `llm` parameter is supplied per-document by
+`app.services.llm_service.get_user_long_context_llm`. We patch THAT
+function to return a langchain-native FakeListChatModel so the rest of
+the chain works unchanged. No real LLM provider package is touched.
+
+Run-backend / run-celery use unittest.mock.patch.start() to install
+this at every binding site (the source module + every consumer that
+did `from app.services.llm_service import get_user_long_context_llm`
+at module load time).
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from langchain_core.language_models.fake_chat_models import FakeListChatModel
+
+logger = logging.getLogger(__name__)
+
+
+def _make_fake_llm() -> FakeListChatModel:
+    """Build a fresh FakeListChatModel that returns a deterministic summary."""
+    # FakeListChatModel cycles through `responses` for each invocation. We
+    # supply a single deterministic string. The summary content is tagged
+    # with a marker that specs CAN assert on if they want, but the
+    # primary indexing assertion is on the file content (chunked + stored
+    # separately by the pipeline).
+    fake = FakeListChatModel(
+        responses=[
+            "E2E_FAKE_SUMMARY: Indexed by Playwright E2E run with deterministic LLM stub."
+        ]
+    )
+    return fake
+
+
+async def fake_get_user_long_context_llm(*args: Any, **kwargs: Any) -> Any:
+    """Drop-in replacement for app.services.llm_service.get_user_long_context_llm."""
+    logger.info("[fake-llm] returning FakeListChatModel for E2E indexing")
+    return _make_fake_llm()
--- a/surfsense_backend/tests/e2e/middleware/init.py
+++ b/surfsense_backend/tests/e2e/middleware/init.py
@ -0,0 +1,4 @@
+"""Test-only middleware. Mounted on the FastAPI `app` object inside
+`tests/e2e/run_backend.py`, never registered by production startup
+(`python main.py`).
+"""
--- a/surfsense_backend/tests/e2e/middleware/scenario.py
+++ b/surfsense_backend/tests/e2e/middleware/scenario.py
@ -0,0 +1,54 @@
+"""X-E2E-Scenario middleware.
+
+Reads the X-E2E-Scenario request header and pipes the value into a
+ContextVar that the strict fakes consult to switch between happy-path
+and error scenarios on a per-request basis.
+
+Mounted by tests/e2e/run_backend.py only. Production never adds this
+middleware, so production never reads the header.
+
+Supported scenarios:
+- "happy" (default): everything succeeds with deterministic fixtures.
+- "denied": Composio.connected_accounts.initiate returns a redirect URL
+  pointing at our callback with ?error=access_denied.
+- "auth_expired": GOOGLEDRIVE_LIST_FILES returns an authentication
+  failure that the route translates to connector.config.auth_expired.
+- "duplicate": no special fake behavior; the duplicate path is exercised
+  by running the OAuth flow twice with the same toolkit.
+"""
+
+from __future__ import annotations
+
+from contextvars import ContextVar
+
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import Response
+from starlette.types import ASGIApp
+
+_scenario: ContextVar[str] = ContextVar("e2e_scenario", default="happy")
+
+
+def current_scenario() -> str:
+    """Return the active E2E scenario for the current request context."""
+    return _scenario.get()
+
+
+class ScenarioMiddleware(BaseHTTPMiddleware):
+    """Reads X-E2E-Scenario and exposes it via a ContextVar.
+
+    The header is also forwarded as state on the request so route
+    handlers can branch if they ever need to (Composio routes do not).
+    """
+
+    def __init__(self, app: ASGIApp) -> None:
+        super().__init__(app)
+
+    async def dispatch(self, request: Request, call_next) -> Response:
+        value = request.headers.get("X-E2E-Scenario", "happy")
+        token = _scenario.set(value)
+        try:
+            request.state.e2e_scenario = value
+            return await call_next(request)
+        finally:
+            _scenario.reset(token)
--- a/surfsense_backend/tests/e2e/run_backend.py
+++ b/surfsense_backend/tests/e2e/run_backend.py
@ -0,0 +1,156 @@
+"""E2E backend entrypoint.
+
+Hijacks third-party SDKs at sys.modules level BEFORE any production
+code is imported, then starts the same FastAPI app + uvicorn that
+`main.py` would run.
+
+Production code is byte-identical with or without this file:
+- `python main.py` is the production entrypoint (unchanged).
+- `python tests/e2e/run_backend.py` is the test entrypoint, never imported by production.
+- `surfsense_backend/.dockerignore` excludes `tests/`, so this file
+  physically does not exist in the production Docker image.
+
+Defense in depth (see Composio Drive E2E Phase 1 plan):
+1. sys.modules hijack here (Composio).
+2. Strict __getattr__ inside fakes (NotImplementedError on unknown surface).
+3. Network deny-list set in CI env (HTTPS_PROXY=http://127.0.0.1:1
+   plus sentinel API keys) so any leaked outbound HTTP fails loudly.
+
+Usage:
+    cd surfsense_backend
+    uv run python tests/e2e/run_backend.py
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import sys
+
+# ---------------------------------------------------------------------------
+# 1) Hijack sys.modules BEFORE any production import.
+#    Production: composio_service.py:11 does `from composio import Composio`.
+#    With this hijack in place, that import resolves to our strict fake.
+# ---------------------------------------------------------------------------
+
+# Make the surfsense_backend root importable as a top-level package so
+# `import tests.e2e.fakes...` works regardless of how the entrypoint is
+# invoked (uv run python tests/e2e/run_backend.py from repo root or from
+# surfsense_backend/).
+_THIS_DIR = os.path.dirname(os.path.abspath(__file__))
+_BACKEND_ROOT = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
+if _BACKEND_ROOT not in sys.path:
+    sys.path.insert(0, _BACKEND_ROOT)
+
+import tests.e2e.fakes.composio_module as _fake_composio  # noqa: E402
+
+sys.modules["composio"] = _fake_composio
+
+
+# ---------------------------------------------------------------------------
+# 2) Standard logging + dotenv so the rest of the app behaves like main.py.
+# ---------------------------------------------------------------------------
+
+from dotenv import load_dotenv  # noqa: E402
+
+load_dotenv()
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger("surfsense.e2e.backend")
+logger.warning(
+    "*** SURFSENSE E2E BACKEND ENTRYPOINT — fake Composio + LLM + embeddings, "
+    "this MUST NOT be reachable in production. ***"
+)
+
+
+# ---------------------------------------------------------------------------
+# 3) Now import the production app. Every module in app.* loads here,
+#    creating their bindings (some of which we will patch in step 4).
+# ---------------------------------------------------------------------------
+
+# ---------------------------------------------------------------------------
+# 4) Patch LLM + embedding bindings at every consumer site.
+#    Composio is already covered by the sys.modules hijack in step 1.
+# ---------------------------------------------------------------------------
+from unittest.mock import patch  # noqa: E402
+
+from app.app import app  # noqa: E402
+from tests.e2e.fakes import embeddings as _fake_embeddings  # noqa: E402
+from tests.e2e.fakes.llm import fake_get_user_long_context_llm  # noqa: E402
+
+_active_patches: list = []
+
+
+def _patch_llm_bindings() -> None:
+    """Replace get_user_long_context_llm at every known binding site."""
+    targets = [
+        "app.services.llm_service.get_user_long_context_llm",
+        "app.tasks.connector_indexers.google_drive_indexer.get_user_long_context_llm",
+        "app.tasks.connector_indexers.google_gmail_indexer.get_user_long_context_llm",
+        "app.tasks.connector_indexers.local_folder_indexer.get_user_long_context_llm",
+        "app.tasks.document_processors.file_processors.get_user_long_context_llm",
+    ]
+    for target in targets:
+        try:
+            p = patch(target, fake_get_user_long_context_llm)
+            p.start()
+            _active_patches.append(p)
+            logger.info("[fake-llm] patched %s", target)
+        except (ModuleNotFoundError, AttributeError) as exc:
+            # Some indexers may not be loaded in every env. Log and move
+            # on — but do not silently let a known binding through.
+            logger.warning(
+                "[fake-llm] could not patch %s: %s. If production code "
+                "uses this path in E2E it will hit the real provider; "
+                "update tests/e2e/run_backend.py.",
+                target,
+                exc,
+            )
+
+
+_patch_llm_bindings()
+_fake_embeddings.install(_active_patches)
+
+
+# ---------------------------------------------------------------------------
+# 5) Mount test-only middleware. Production never reaches this code.
+# ---------------------------------------------------------------------------
+
+from tests.e2e.middleware.scenario import ScenarioMiddleware  # noqa: E402
+
+app.add_middleware(ScenarioMiddleware)
+
+
+# ---------------------------------------------------------------------------
+# 6) Start uvicorn, mirroring main.py's behaviour.
+# ---------------------------------------------------------------------------
+
+import asyncio
+import uvicorn
+
+
+def _main() -> None:
+    if sys.platform == "win32":
+        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+
+    host = os.getenv("UVICORN_HOST", "0.0.0.0")
+    port = int(os.getenv("UVICORN_PORT", "8000"))
+    log_level = os.getenv("UVICORN_LOG_LEVEL", "info")
+
+    config = uvicorn.Config(
+        app=app,
+        host=host,
+        port=port,
+        log_level=log_level,
+        reload=False,
+    )
+    server = uvicorn.Server(config)
+    server.run()
+
+
+if __name__ == "__main__":
+    _main()
--- a/surfsense_backend/tests/e2e/run_celery.py
+++ b/surfsense_backend/tests/e2e/run_celery.py
@ -0,0 +1,125 @@
+"""E2E Celery worker entrypoint.
+
+Same sys.modules hijack + LLM/embedding patches as run_backend.py,
+applied before importing the production celery_app. Celery workers
+run in a separate Python interpreter, so the patches must be applied
+here too — they would NOT carry over from the FastAPI process.
+
+Production is unaffected: celery_worker.py at the repo root is the
+production entrypoint and never imports this file.
+
+Usage:
+    cd surfsense_backend
+    uv run python tests/e2e/run_celery.py
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import sys
+
+_THIS_DIR = os.path.dirname(os.path.abspath(__file__))
+_BACKEND_ROOT = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
+if _BACKEND_ROOT not in sys.path:
+    sys.path.insert(0, _BACKEND_ROOT)
+
+
+# ---------------------------------------------------------------------------
+# 1) Hijack sys.modules BEFORE production celery imports anything.
+# ---------------------------------------------------------------------------
+
+import tests.e2e.fakes.composio_module as _fake_composio  # noqa: E402
+
+sys.modules["composio"] = _fake_composio
+
+
+# ---------------------------------------------------------------------------
+# 2) Logging + dotenv.
+# ---------------------------------------------------------------------------
+
+from dotenv import load_dotenv  # noqa: E402
+
+load_dotenv()
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger("surfsense.e2e.celery")
+logger.warning(
+    "*** SURFSENSE E2E CELERY WORKER — fake Composio + LLM + embeddings, "
+    "this MUST NOT be reachable in production. ***"
+)
+
+
+# ---------------------------------------------------------------------------
+# 3) Import the production celery_app. All task modules load here.
+# ---------------------------------------------------------------------------
+
+from app.celery_app import celery_app  # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# 4) Patch LLM + embedding bindings inside the worker process.
+# ---------------------------------------------------------------------------
+
+from unittest.mock import patch  # noqa: E402
+
+from tests.e2e.fakes import embeddings as _fake_embeddings  # noqa: E402
+from tests.e2e.fakes.llm import fake_get_user_long_context_llm  # noqa: E402
+
+_active_patches: list = []
+
+
+def _patch_llm_bindings() -> None:
+    targets = [
+        "app.services.llm_service.get_user_long_context_llm",
+        "app.tasks.connector_indexers.google_drive_indexer.get_user_long_context_llm",
+        "app.tasks.connector_indexers.google_gmail_indexer.get_user_long_context_llm",
+        "app.tasks.connector_indexers.local_folder_indexer.get_user_long_context_llm",
+        "app.tasks.document_processors.file_processors.get_user_long_context_llm",
+    ]
+    for target in targets:
+        try:
+            p = patch(target, fake_get_user_long_context_llm)
+            p.start()
+            _active_patches.append(p)
+            logger.info("[fake-llm] patched %s in celery worker", target)
+        except (ModuleNotFoundError, AttributeError) as exc:
+            logger.warning(
+                "[fake-llm] could not patch %s in celery worker: %s.",
+                target,
+                exc,
+            )
+
+
+_patch_llm_bindings()
+_fake_embeddings.install(_active_patches)
+
+
+# ---------------------------------------------------------------------------
+# 5) Start the worker.
+# ---------------------------------------------------------------------------
+
+
+def _main() -> None:
+    # Default queues mirror production (default queue + connectors queue
+    # so Drive indexing tasks are picked up).
+    queue_name = os.getenv("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
+    queues = f"{queue_name},{queue_name}.connectors"
+    celery_app.worker_main(
+        argv=[
+            "worker",
+            "--loglevel=info",
+            f"--queues={queues}",
+            "--concurrency=2",
+            "--without-gossip",
+            "--without-mingle",
+        ]
+    )
+
+
+if __name__ == "__main__":
+    _main()