mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-17 18:35:19 +02:00
chore: modify E2E test configuration by updating global LLM model IDs to negative values for improved test isolation
This commit is contained in:
parent
0b9fc00663
commit
bed2041a1b
2 changed files with 225 additions and 178 deletions
|
|
@ -38,7 +38,7 @@ router_settings:
|
|||
cooldown_time: 1
|
||||
|
||||
global_llm_configs:
|
||||
- id: 1001
|
||||
- id: -9001
|
||||
name: "E2E Fake Auto Model (premium)"
|
||||
billing_tier: "premium"
|
||||
anonymous_enabled: false
|
||||
|
|
@ -54,7 +54,7 @@ global_llm_configs:
|
|||
litellm_params:
|
||||
model: "openai/fake-e2e-model-premium"
|
||||
|
||||
- id: 1002
|
||||
- id: -9002
|
||||
name: "E2E Fake Auto Model (free)"
|
||||
billing_tier: "free"
|
||||
anonymous_enabled: false
|
||||
|
|
|
|||
|
|
@ -25,168 +25,168 @@ if _BACKEND_ROOT not in sys.path:
|
|||
sys.path.insert(0, _BACKEND_ROOT)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1) Hijack sys.modules BEFORE production celery imports anything.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
import tests.e2e.fakes.composio_module as _fake_composio # noqa: E402
|
||||
import tests.e2e.fakes.notion_module as _fake_notion # noqa: E402
|
||||
|
||||
sys.modules["composio"] = _fake_composio
|
||||
sys.modules["notion_client"] = _fake_notion
|
||||
sys.modules["notion_client.errors"] = _fake_notion.errors
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2) Logging + dotenv.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
from dotenv import load_dotenv # noqa: E402
|
||||
|
||||
load_dotenv()
|
||||
|
||||
os.environ.setdefault(
|
||||
"DATABASE_URL",
|
||||
"postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense",
|
||||
)
|
||||
os.environ.setdefault("CELERY_BROKER_URL", "redis://localhost:6379/0")
|
||||
os.environ.setdefault("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
|
||||
os.environ.setdefault("REDIS_APP_URL", "redis://localhost:6379/0")
|
||||
os.environ.setdefault("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
|
||||
os.environ.setdefault("SECRET_KEY", "local-e2e-secret-not-for-production")
|
||||
os.environ.setdefault("AUTH_TYPE", "LOCAL")
|
||||
os.environ.setdefault("REGISTRATION_ENABLED", "TRUE")
|
||||
os.environ.setdefault("ETL_SERVICE", "DOCLING")
|
||||
os.environ.setdefault("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
||||
os.environ.setdefault("NEXT_FRONTEND_URL", "http://localhost:3000")
|
||||
|
||||
# Sentinel keys — fakes never read them; turns leaked real calls into 401s.
|
||||
os.environ.setdefault("COMPOSIO_API_KEY", "local-deny-real-call-sentinel")
|
||||
os.environ.setdefault("COMPOSIO_ENABLED", "TRUE")
|
||||
os.environ.setdefault("OPENAI_API_KEY", "local-deny-real-call-sentinel")
|
||||
os.environ.setdefault("ANTHROPIC_API_KEY", "local-deny-real-call-sentinel")
|
||||
os.environ.setdefault("LITELLM_API_KEY", "local-deny-real-call-sentinel")
|
||||
|
||||
os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
|
||||
os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
|
||||
os.environ.setdefault(
|
||||
"CONFLUENCE_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/confluence/connector/callback",
|
||||
)
|
||||
os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id")
|
||||
os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret")
|
||||
os.environ.setdefault(
|
||||
"NOTION_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/notion/connector/callback",
|
||||
)
|
||||
os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id")
|
||||
os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret")
|
||||
os.environ.setdefault(
|
||||
"ONEDRIVE_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/onedrive/connector/callback",
|
||||
)
|
||||
os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key")
|
||||
os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret")
|
||||
os.environ.setdefault(
|
||||
"DROPBOX_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/dropbox/connector/callback",
|
||||
)
|
||||
# Native Google OAuth — fake Flow in tests.e2e.fakes.native_google raises
|
||||
# "Fake Google Flow requires redirect_uri." when these are empty.
|
||||
os.environ.setdefault(
|
||||
"GOOGLE_DRIVE_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/google/drive/connector/callback",
|
||||
)
|
||||
os.environ.setdefault(
|
||||
"GOOGLE_GMAIL_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/google/gmail/connector/callback",
|
||||
)
|
||||
os.environ.setdefault(
|
||||
"GOOGLE_CALENDAR_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/google/calendar/connector/callback",
|
||||
)
|
||||
os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id"
|
||||
os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret"
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
logger = logging.getLogger("surfsense.e2e.celery")
|
||||
logger.warning("*** SURFSENSE E2E CELERY WORKER — fake Composio + LLM + embeddings ***")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2.5) Materialise the synthetic global_llm_config.yaml so the worker's
|
||||
# view of app.config.GLOBAL_LLM_CONFIGS matches the API container.
|
||||
# Must run BEFORE the production celery_app import below, which
|
||||
# transitively imports app.config. Install-only-if-missing so a
|
||||
# developer's local config (with real API keys) is preserved.
|
||||
# ---------------------------------------------------------------------------
|
||||
import shutil as _shutil # noqa: E402
|
||||
|
||||
_e2e_llm_cfg_src = os.path.join(_THIS_DIR, "fixtures", "global_llm_config.yaml")
|
||||
_e2e_llm_cfg_dst = os.path.join(
|
||||
_BACKEND_ROOT, "app", "config", "global_llm_config.yaml"
|
||||
)
|
||||
if not os.path.exists(_e2e_llm_cfg_src):
|
||||
raise RuntimeError(
|
||||
f"E2E synthetic global LLM config fixture missing at {_e2e_llm_cfg_src!r}. "
|
||||
f"Restore tests/e2e/fixtures/global_llm_config.yaml from VCS."
|
||||
)
|
||||
if os.path.exists(_e2e_llm_cfg_dst):
|
||||
logger.info(
|
||||
"[e2e-global-llm-config] %s already exists; leaving it alone "
|
||||
"(local dev config preserved)",
|
||||
_e2e_llm_cfg_dst,
|
||||
)
|
||||
else:
|
||||
os.makedirs(os.path.dirname(_e2e_llm_cfg_dst), exist_ok=True)
|
||||
_shutil.copyfile(_e2e_llm_cfg_src, _e2e_llm_cfg_dst)
|
||||
logger.info(
|
||||
"[e2e-global-llm-config] installed %s -> %s",
|
||||
_e2e_llm_cfg_src,
|
||||
_e2e_llm_cfg_dst,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3) Import the production celery_app. All task modules load here.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 4) Patch LLM + embedding bindings inside the worker process.
|
||||
# ---------------------------------------------------------------------------
|
||||
from unittest.mock import patch # noqa: E402
|
||||
|
||||
from app.celery_app import celery_app # noqa: E402
|
||||
from tests.e2e.fakes import ( # noqa: E402
|
||||
clickup_module as _fake_clickup_module,
|
||||
confluence_indexer as _fake_confluence_indexer,
|
||||
confluence_oauth as _fake_confluence_oauth,
|
||||
docling_service as _fake_docling_service,
|
||||
dropbox_api as _fake_dropbox_api,
|
||||
embeddings as _fake_embeddings,
|
||||
jira_module as _fake_jira_module,
|
||||
linear_module as _fake_linear_module,
|
||||
mcp_oauth_runtime as _fake_mcp_oauth_runtime,
|
||||
mcp_runtime as _fake_mcp_runtime,
|
||||
native_google as _fake_native_google,
|
||||
notion_module as _fake_notion_module,
|
||||
onedrive_graph as _fake_onedrive_graph,
|
||||
slack_module as _fake_slack_module,
|
||||
)
|
||||
from tests.e2e.fakes.chat_llm import ( # noqa: E402
|
||||
fake_create_chat_litellm_from_agent_config,
|
||||
fake_create_chat_litellm_from_config,
|
||||
)
|
||||
from tests.e2e.fakes.llm import fake_get_user_long_context_llm # noqa: E402
|
||||
|
||||
# Patches started during bootstrap are kept alive for the lifetime of the
|
||||
# process. We never call .stop() on them.
|
||||
_active_patches: list = []
|
||||
|
||||
|
||||
def _hijack_external_sdks() -> None:
|
||||
"""Replace composio + notion_client in sys.modules.
|
||||
|
||||
Production does ``from composio import Composio`` and
|
||||
``import notion_client`` at import time. With this hijack in place,
|
||||
those imports resolve to our strict fakes.
|
||||
|
||||
MUST run before _import_celery_app().
|
||||
"""
|
||||
import tests.e2e.fakes.composio_module as _fake_composio
|
||||
import tests.e2e.fakes.notion_module as _fake_notion
|
||||
|
||||
sys.modules["composio"] = _fake_composio
|
||||
sys.modules["notion_client"] = _fake_notion
|
||||
sys.modules["notion_client.errors"] = _fake_notion.errors
|
||||
|
||||
|
||||
def _load_dotenv_and_set_env_defaults() -> None:
|
||||
"""Load .env and set every env var the production config reads on import.
|
||||
|
||||
MUST run before _import_celery_app(), since app.config consumes
|
||||
these values at import time.
|
||||
"""
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
os.environ.setdefault(
|
||||
"DATABASE_URL",
|
||||
"postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense",
|
||||
)
|
||||
os.environ.setdefault("CELERY_BROKER_URL", "redis://localhost:6379/0")
|
||||
os.environ.setdefault("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
|
||||
os.environ.setdefault("REDIS_APP_URL", "redis://localhost:6379/0")
|
||||
os.environ.setdefault("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
|
||||
os.environ.setdefault("SECRET_KEY", "local-e2e-secret-not-for-production")
|
||||
os.environ.setdefault("AUTH_TYPE", "LOCAL")
|
||||
os.environ.setdefault("REGISTRATION_ENABLED", "TRUE")
|
||||
os.environ.setdefault("ETL_SERVICE", "DOCLING")
|
||||
os.environ.setdefault("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
||||
os.environ.setdefault("NEXT_FRONTEND_URL", "http://localhost:3000")
|
||||
|
||||
# Sentinel keys — fakes never read them; turns leaked real calls into 401s.
|
||||
os.environ.setdefault("COMPOSIO_API_KEY", "local-deny-real-call-sentinel")
|
||||
os.environ.setdefault("COMPOSIO_ENABLED", "TRUE")
|
||||
os.environ.setdefault("OPENAI_API_KEY", "local-deny-real-call-sentinel")
|
||||
os.environ.setdefault("ANTHROPIC_API_KEY", "local-deny-real-call-sentinel")
|
||||
os.environ.setdefault("LITELLM_API_KEY", "local-deny-real-call-sentinel")
|
||||
|
||||
os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
|
||||
os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
|
||||
os.environ.setdefault(
|
||||
"CONFLUENCE_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/confluence/connector/callback",
|
||||
)
|
||||
os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id")
|
||||
os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret")
|
||||
os.environ.setdefault(
|
||||
"NOTION_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/notion/connector/callback",
|
||||
)
|
||||
os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id")
|
||||
os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret")
|
||||
os.environ.setdefault(
|
||||
"ONEDRIVE_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/onedrive/connector/callback",
|
||||
)
|
||||
os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key")
|
||||
os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret")
|
||||
os.environ.setdefault(
|
||||
"DROPBOX_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/dropbox/connector/callback",
|
||||
)
|
||||
# Native Google OAuth — fake Flow in tests.e2e.fakes.native_google raises
|
||||
# "Fake Google Flow requires redirect_uri." when these are empty.
|
||||
os.environ.setdefault(
|
||||
"GOOGLE_DRIVE_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/google/drive/connector/callback",
|
||||
)
|
||||
os.environ.setdefault(
|
||||
"GOOGLE_GMAIL_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/google/gmail/connector/callback",
|
||||
)
|
||||
os.environ.setdefault(
|
||||
"GOOGLE_CALENDAR_REDIRECT_URI",
|
||||
"http://localhost:8000/api/v1/auth/google/calendar/connector/callback",
|
||||
)
|
||||
os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id"
|
||||
os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret"
|
||||
|
||||
|
||||
def _install_synthetic_global_llm_config() -> None:
|
||||
"""Materialise a fake ``app/config/global_llm_config.yaml`` for E2E.
|
||||
|
||||
The real file is gitignored (production operators ship their own with
|
||||
real API keys), so a fresh CI checkout has no YAML at the path
|
||||
``app.config.load_global_llm_configs()`` reads. With an empty
|
||||
``GLOBAL_LLM_CONFIGS`` list, the worker's view of the config diverges
|
||||
from the API container.
|
||||
|
||||
We copy the synthetic fixture from ``tests/e2e/fixtures/`` into the
|
||||
production-expected location BEFORE _import_celery_app() so
|
||||
``app.config`` picks it up on import. Install-only-if-missing so a
|
||||
developer's local config (with real API keys) is preserved.
|
||||
|
||||
MUST run before _import_celery_app().
|
||||
"""
|
||||
import shutil
|
||||
|
||||
src = os.path.join(_THIS_DIR, "fixtures", "global_llm_config.yaml")
|
||||
dst = os.path.join(
|
||||
_BACKEND_ROOT, "app", "config", "global_llm_config.yaml"
|
||||
)
|
||||
|
||||
if not os.path.exists(src):
|
||||
raise RuntimeError(
|
||||
f"E2E synthetic global LLM config fixture missing at {src!r}. "
|
||||
f"Restore tests/e2e/fixtures/global_llm_config.yaml from VCS."
|
||||
)
|
||||
|
||||
if os.path.exists(dst):
|
||||
logger.info(
|
||||
"[e2e-global-llm-config] %s already exists; leaving it alone "
|
||||
"(local dev config preserved)",
|
||||
dst,
|
||||
)
|
||||
return
|
||||
|
||||
os.makedirs(os.path.dirname(dst), exist_ok=True)
|
||||
shutil.copyfile(src, dst)
|
||||
logger.info("[e2e-global-llm-config] installed %s -> %s", src, dst)
|
||||
|
||||
|
||||
def _import_celery_app():
|
||||
"""Import and return the production Celery app.
|
||||
|
||||
Every module under ``app.*`` (including all task modules) loads here,
|
||||
creating their bindings. The LLM/embedding factories captured at this
|
||||
point will be replaced by patches in _patch_llm_bindings() below.
|
||||
"""
|
||||
from app.celery_app import celery_app
|
||||
|
||||
return celery_app
|
||||
|
||||
|
||||
def _patch_llm_bindings() -> None:
|
||||
"""Replace LLM factories at every known binding site in worker tasks."""
|
||||
from unittest.mock import patch
|
||||
|
||||
from tests.e2e.fakes.chat_llm import (
|
||||
fake_create_chat_litellm_from_agent_config,
|
||||
fake_create_chat_litellm_from_config,
|
||||
)
|
||||
from tests.e2e.fakes.llm import fake_get_user_long_context_llm
|
||||
|
||||
targets = [
|
||||
"app.services.llm_service.get_user_long_context_llm",
|
||||
"app.tasks.connector_indexers.confluence_indexer.get_user_long_context_llm",
|
||||
|
|
@ -244,31 +244,78 @@ def _patch_llm_bindings() -> None:
|
|||
)
|
||||
|
||||
|
||||
_patch_llm_bindings()
|
||||
_fake_embeddings.install(_active_patches)
|
||||
_fake_docling_service.install(_active_patches)
|
||||
_fake_confluence_oauth.install(_active_patches)
|
||||
_fake_confluence_indexer.install(_active_patches)
|
||||
_fake_native_google.install(_active_patches)
|
||||
_fake_onedrive_graph.install(_active_patches)
|
||||
_fake_dropbox_api.install(_active_patches)
|
||||
_fake_notion_module.install(_active_patches)
|
||||
_fake_linear_module.install(_active_patches)
|
||||
_fake_jira_module.install(_active_patches)
|
||||
_fake_clickup_module.install(_active_patches)
|
||||
_fake_mcp_runtime.install(_active_patches)
|
||||
_fake_mcp_oauth_runtime.install(_active_patches)
|
||||
_fake_slack_module.install(_active_patches)
|
||||
def _install_runtime_fakes() -> None:
|
||||
"""Run each fake's install() against the active patch stack."""
|
||||
from tests.e2e.fakes import (
|
||||
clickup_module as _fake_clickup_module,
|
||||
confluence_indexer as _fake_confluence_indexer,
|
||||
confluence_oauth as _fake_confluence_oauth,
|
||||
docling_service as _fake_docling_service,
|
||||
dropbox_api as _fake_dropbox_api,
|
||||
embeddings as _fake_embeddings,
|
||||
jira_module as _fake_jira_module,
|
||||
linear_module as _fake_linear_module,
|
||||
mcp_oauth_runtime as _fake_mcp_oauth_runtime,
|
||||
mcp_runtime as _fake_mcp_runtime,
|
||||
native_google as _fake_native_google,
|
||||
notion_module as _fake_notion_module,
|
||||
onedrive_graph as _fake_onedrive_graph,
|
||||
slack_module as _fake_slack_module,
|
||||
)
|
||||
|
||||
_fake_embeddings.install(_active_patches)
|
||||
_fake_docling_service.install(_active_patches)
|
||||
_fake_confluence_oauth.install(_active_patches)
|
||||
_fake_confluence_indexer.install(_active_patches)
|
||||
_fake_native_google.install(_active_patches)
|
||||
_fake_onedrive_graph.install(_active_patches)
|
||||
_fake_dropbox_api.install(_active_patches)
|
||||
_fake_notion_module.install(_active_patches)
|
||||
_fake_linear_module.install(_active_patches)
|
||||
_fake_jira_module.install(_active_patches)
|
||||
_fake_clickup_module.install(_active_patches)
|
||||
_fake_mcp_runtime.install(_active_patches)
|
||||
_fake_mcp_oauth_runtime.install(_active_patches)
|
||||
_fake_slack_module.install(_active_patches)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 5) Start the worker.
|
||||
# ---------------------------------------------------------------------------
|
||||
def _bootstrap():
|
||||
"""Run the full E2E bootstrap and return the production Celery app.
|
||||
|
||||
Ordering is load-bearing:
|
||||
1) Hijack composio + notion_client in sys.modules.
|
||||
2) Load .env + set env defaults (app.config reads env on import).
|
||||
3) Configure logging.
|
||||
4) Materialise the synthetic global_llm_config.yaml so the worker's
|
||||
view of GLOBAL_LLM_CONFIGS matches the API container.
|
||||
5) Import production celery_app (which transitively imports the
|
||||
now-faked external SDKs and reads the env defaults + YAML).
|
||||
6) Patch LLM / embedding bindings at every consumer site.
|
||||
7) Install runtime fakes for connectors and chat backends.
|
||||
"""
|
||||
_hijack_external_sdks()
|
||||
_load_dotenv_and_set_env_defaults()
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
logger.warning(
|
||||
"*** SURFSENSE E2E CELERY WORKER — fake Composio + LLM + embeddings ***"
|
||||
)
|
||||
|
||||
_install_synthetic_global_llm_config()
|
||||
celery_app = _import_celery_app()
|
||||
_patch_llm_bindings()
|
||||
_install_runtime_fakes()
|
||||
return celery_app
|
||||
|
||||
|
||||
celery_app = _bootstrap()
|
||||
|
||||
|
||||
def _main() -> None:
|
||||
# Default queues mirror production (default queue + connectors queue
|
||||
# so Drive indexing tasks are picked up).
|
||||
queue_name = os.getenv("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
|
||||
queues = f"{queue_name},{queue_name}.connectors"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue