test(backend): add E2E harness foundation (entrypoints, middleware, LLM/embedding fakes)

This commit is contained in:
Anish Sarkar 2026-05-06 17:17:42 +05:30
parent c720866a67
commit 58ba95fad2
9 changed files with 550 additions and 0 deletions

View file

@ -0,0 +1,8 @@
"""Strict fakes for third-party SDKs, used in E2E mode only.
Every fake here implements __getattr__ that raises NotImplementedError
on any unknown surface. Combined with sys.modules-level hijacking in
run_backend.py / run_celery.py, this makes silent pass-through to the
real SDK impossible: a future production code path that introduces a
new SDK call site fails CI with a clear "add this to the fake" message.
"""

View file

@ -0,0 +1,79 @@
"""Deterministic embedding fakes for E2E.
Mirrors the existing `patched_embed_texts` fixture in
`surfsense_backend/tests/integration/conftest.py`:
MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts])
The dimension matches whatever `config.embedding_model_instance.dimension`
returns in the running process so the fakes are vector-compatible with
the documents.embedding pgvector column.
"""
from __future__ import annotations
import logging
from typing import Any
import numpy as np
from app.config import config
logger = logging.getLogger(__name__)
def _embedding_dim() -> int:
"""Resolve the dimension once, lazily, so tests work for any embedding model."""
return int(config.embedding_model_instance.dimension)
def fake_embed_text(text: str) -> np.ndarray:
"""Deterministic single-text embedding."""
return np.full(shape=(_embedding_dim(),), fill_value=0.1, dtype=np.float32)
def fake_embed_texts(texts: list[str]) -> list[np.ndarray]:
"""Deterministic batch embedding. One vector per input text."""
if not texts:
return []
dim = _embedding_dim()
return [
np.full(shape=(dim,), fill_value=0.1, dtype=np.float32) for _ in texts
]
def install(patches: list[Any]) -> None:
"""Install embedding patches at every binding site we know about.
Caller passes a `patches` list that the entrypoint will track in
order to start them (and, in principle, stop them on shutdown we
intentionally never stop because the process exits when the test
server stops).
"""
from unittest.mock import patch as _patch
targets = [
# Source binding (where the real implementation lives)
("app.utils.document_converters.embed_text", fake_embed_text),
("app.utils.document_converters.embed_texts", fake_embed_texts),
# Consumers that did `from app.utils.document_converters import embed_text/texts`
("app.indexing_pipeline.document_embedder.embed_text", fake_embed_text),
("app.indexing_pipeline.document_embedder.embed_texts", fake_embed_texts),
# Pipeline service binding (the actual call site for indexing.index)
("app.indexing_pipeline.indexing_pipeline_service.embed_texts", fake_embed_texts),
]
for target, replacement in targets:
try:
p = _patch(target, replacement)
p.start()
patches.append(p)
logger.info("[fake-embeddings] patched %s", target)
except (ModuleNotFoundError, AttributeError) as exc:
# If a future refactor moves a binding, fail loudly — silent
# passthrough to a real embedding model would be expensive
# and non-deterministic.
raise RuntimeError(
f"Could not patch embedding binding {target!r}: {exc!s}. "
f"Update surfsense_backend/tests/e2e/fakes/embeddings.py "
f"to point at the new binding site."
) from exc

View file

@ -0,0 +1,48 @@
"""Deterministic LLM fake for the E2E indexing pipeline.
The production indexing pipeline summarizes documents with:
summary_chain = SUMMARY_PROMPT_TEMPLATE | llm
summary_result = await summary_chain.ainvoke({"document": ...})
summary_content = summary_result.content
The `llm` parameter is supplied per-document by
`app.services.llm_service.get_user_long_context_llm`. We patch THAT
function to return a langchain-native FakeListChatModel so the rest of
the chain works unchanged. No real LLM provider package is touched.
Run-backend / run-celery use unittest.mock.patch.start() to install
this at every binding site (the source module + every consumer that
did `from app.services.llm_service import get_user_long_context_llm`
at module load time).
"""
from __future__ import annotations
import logging
from typing import Any
from langchain_core.language_models.fake_chat_models import FakeListChatModel
logger = logging.getLogger(__name__)
def _make_fake_llm() -> FakeListChatModel:
"""Build a fresh FakeListChatModel that returns a deterministic summary."""
# FakeListChatModel cycles through `responses` for each invocation. We
# supply a single deterministic string. The summary content is tagged
# with a marker that specs CAN assert on if they want, but the
# primary indexing assertion is on the file content (chunked + stored
# separately by the pipeline).
fake = FakeListChatModel(
responses=[
"E2E_FAKE_SUMMARY: Indexed by Playwright E2E run with deterministic LLM stub."
]
)
return fake
async def fake_get_user_long_context_llm(*args: Any, **kwargs: Any) -> Any:
"""Drop-in replacement for app.services.llm_service.get_user_long_context_llm."""
logger.info("[fake-llm] returning FakeListChatModel for E2E indexing")
return _make_fake_llm()