SurfSense/surfsense_backend/tests/e2e/run_backend.py

249 lines
9.6 KiB
Python
Raw Normal View History

"""E2E backend entrypoint.
Hijacks third-party SDKs at sys.modules level BEFORE any production
code is imported, then starts the same FastAPI app + uvicorn that
`main.py` would run.
Production code is byte-identical with or without this file:
- `python main.py` is the production entrypoint (unchanged).
- `python tests/e2e/run_backend.py` is the test entrypoint, never imported by production.
- `surfsense_backend/.dockerignore` excludes `tests/`, so this file
physically does not exist in the production Docker image.
Defense in depth (see Composio Drive E2E Phase 1 plan):
1. sys.modules hijack here (Composio).
2. Strict __getattr__ inside fakes (NotImplementedError on unknown surface).
3. Network deny-list set in CI env (HTTPS_PROXY=http://127.0.0.1:1
plus sentinel API keys) so any leaked outbound HTTP fails loudly.
Usage:
cd surfsense_backend
uv run python tests/e2e/run_backend.py
"""
from __future__ import annotations
import logging
import os
import sys
# ---------------------------------------------------------------------------
# 1) Hijack sys.modules BEFORE any production import.
# Production: composio_service.py:11 does `from composio import Composio`.
# With this hijack in place, that import resolves to our strict fake.
# ---------------------------------------------------------------------------
# Make the surfsense_backend root importable as a top-level package so
# `import tests.e2e.fakes...` works regardless of how the entrypoint is
# invoked (uv run python tests/e2e/run_backend.py from repo root or from
# surfsense_backend/).
_THIS_DIR = os.path.dirname(os.path.abspath(__file__))
_BACKEND_ROOT = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
if _BACKEND_ROOT not in sys.path:
sys.path.insert(0, _BACKEND_ROOT)
import tests.e2e.fakes.composio_module as _fake_composio # noqa: E402
2026-05-07 22:21:52 +05:30
import tests.e2e.fakes.notion_module as _fake_notion # noqa: E402
sys.modules["composio"] = _fake_composio
2026-05-07 22:21:52 +05:30
sys.modules["notion_client"] = _fake_notion
sys.modules["notion_client.errors"] = _fake_notion.errors
# ---------------------------------------------------------------------------
# 2) Standard logging + dotenv so the rest of the app behaves like main.py.
# ---------------------------------------------------------------------------
from dotenv import load_dotenv # noqa: E402
load_dotenv()
os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
os.environ.setdefault(
"CONFLUENCE_REDIRECT_URI",
"http://localhost:8000/api/v1/auth/confluence/connector/callback",
)
2026-05-07 22:21:52 +05:30
os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id")
os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret")
os.environ.setdefault(
"NOTION_REDIRECT_URI",
"http://localhost:8000/api/v1/auth/notion/connector/callback",
)
os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id")
os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret")
os.environ.setdefault(
"ONEDRIVE_REDIRECT_URI",
"http://localhost:8000/api/v1/auth/onedrive/connector/callback",
)
2026-05-08 12:27:45 +05:30
os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key")
os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret")
os.environ.setdefault(
"DROPBOX_REDIRECT_URI",
"http://localhost:8000/api/v1/auth/dropbox/connector/callback",
)
os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id"
os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret"
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger("surfsense.e2e.backend")
logger.warning(
"*** SURFSENSE E2E BACKEND ENTRYPOINT — fake Composio + LLM + embeddings, "
"this MUST NOT be reachable in production. ***"
)
# ---------------------------------------------------------------------------
# 3) Now import the production app. Every module in app.* loads here,
# creating their bindings (some of which we will patch in step 4).
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# 4) Patch LLM + embedding bindings at every consumer site.
# Composio is already covered by the sys.modules hijack in step 1.
# ---------------------------------------------------------------------------
from unittest.mock import patch # noqa: E402
from app.app import app # noqa: E402
from tests.e2e.fakes import ( # noqa: E402
clickup_module as _fake_clickup_module,
confluence_indexer as _fake_confluence_indexer,
confluence_oauth as _fake_confluence_oauth,
2026-05-08 12:27:45 +05:30
dropbox_api as _fake_dropbox_api,
embeddings as _fake_embeddings,
2026-05-08 00:15:08 +05:30
jira_module as _fake_jira_module,
linear_module as _fake_linear_module,
2026-05-08 00:15:08 +05:30
mcp_oauth_runtime as _fake_mcp_oauth_runtime,
mcp_runtime as _fake_mcp_runtime,
native_google as _fake_native_google,
2026-05-07 22:21:52 +05:30
notion_module as _fake_notion_module,
onedrive_graph as _fake_onedrive_graph,
slack_module as _fake_slack_module,
)
from tests.e2e.fakes.chat_llm import ( # noqa: E402
fake_create_chat_litellm_from_agent_config,
fake_create_chat_litellm_from_config,
)
from tests.e2e.fakes.llm import fake_get_user_long_context_llm # noqa: E402
_active_patches: list = []
def _patch_llm_bindings() -> None:
"""Replace LLM factories at every known binding site."""
targets = [
"app.services.llm_service.get_user_long_context_llm",
"app.tasks.connector_indexers.confluence_indexer.get_user_long_context_llm",
"app.tasks.connector_indexers.google_drive_indexer.get_user_long_context_llm",
"app.tasks.connector_indexers.google_gmail_indexer.get_user_long_context_llm",
2026-05-07 22:21:52 +05:30
"app.tasks.connector_indexers.notion_indexer.get_user_long_context_llm",
"app.tasks.connector_indexers.onedrive_indexer.get_user_long_context_llm",
2026-05-08 12:27:45 +05:30
"app.tasks.connector_indexers.dropbox_indexer.get_user_long_context_llm",
"app.tasks.connector_indexers.local_folder_indexer.get_user_long_context_llm",
"app.tasks.document_processors._save.get_user_long_context_llm",
"app.tasks.document_processors.markdown_processor.get_user_long_context_llm",
]
for target in targets:
try:
p = patch(target, fake_get_user_long_context_llm)
p.start()
_active_patches.append(p)
logger.info("[fake-llm] patched %s", target)
except (ModuleNotFoundError, AttributeError) as exc:
# Some indexers may not be loaded in every env. Log and move
# on — but do not silently let a known binding through.
logger.warning(
"[fake-llm] could not patch %s: %s. If production code "
"uses this path in E2E it will hit the real provider; "
"update tests/e2e/run_backend.py.",
target,
exc,
)
chat_targets = [
(
"app.agents.new_chat.llm_config.create_chat_litellm_from_agent_config",
fake_create_chat_litellm_from_agent_config,
),
(
"app.agents.new_chat.llm_config.create_chat_litellm_from_config",
fake_create_chat_litellm_from_config,
),
(
"app.tasks.chat.stream_new_chat.create_chat_litellm_from_agent_config",
fake_create_chat_litellm_from_agent_config,
),
(
"app.tasks.chat.stream_new_chat.create_chat_litellm_from_config",
fake_create_chat_litellm_from_config,
),
]
for target, replacement in chat_targets:
try:
p = patch(target, replacement)
p.start()
_active_patches.append(p)
logger.info("[fake-chat-llm] patched %s", target)
except (ModuleNotFoundError, AttributeError) as exc:
logger.warning("[fake-chat-llm] could not patch %s: %s.", target, exc)
_patch_llm_bindings()
_fake_embeddings.install(_active_patches)
_fake_confluence_oauth.install(_active_patches)
_fake_confluence_indexer.install(_active_patches)
_fake_native_google.install(_active_patches)
_fake_onedrive_graph.install(_active_patches)
2026-05-08 12:27:45 +05:30
_fake_dropbox_api.install(_active_patches)
2026-05-07 22:21:52 +05:30
_fake_notion_module.install(_active_patches)
_fake_linear_module.install(_active_patches)
2026-05-08 00:15:08 +05:30
_fake_jira_module.install(_active_patches)
_fake_clickup_module.install(_active_patches)
2026-05-08 00:15:08 +05:30
_fake_mcp_runtime.install(_active_patches)
_fake_mcp_oauth_runtime.install(_active_patches)
_fake_slack_module.install(_active_patches)
# ---------------------------------------------------------------------------
# 5) Mount test-only middleware. Production never reaches this code.
# ---------------------------------------------------------------------------
from tests.e2e.middleware.scenario import ScenarioMiddleware # noqa: E402
app.add_middleware(ScenarioMiddleware)
# ---------------------------------------------------------------------------
# 6) Start uvicorn, mirroring main.py's behaviour.
# ---------------------------------------------------------------------------
import asyncio # noqa: E402
import uvicorn # noqa: E402
def _main() -> None:
if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
host = os.getenv("UVICORN_HOST", "0.0.0.0")
port = int(os.getenv("UVICORN_PORT", "8000"))
log_level = os.getenv("UVICORN_LOG_LEVEL", "info")
config = uvicorn.Config(
app=app,
host=host,
port=port,
log_level=log_level,
reload=False,
)
server = uvicorn.Server(config)
server.run()
if __name__ == "__main__":
_main()