chore: modify E2E test configuration by updating global LLM model IDs to negative values for improved test isolation

2026-05-19 18:45:15 +02:00 · 2026-05-12 03:30:01 +05:30 · 2026-05-12 03:30:01 +05:30 · bed2041a1b
commit bed2041a1b
parent 0b9fc00663
2 changed files with 225 additions and 178 deletions
--- a/surfsense_backend/tests/e2e/fixtures/global_llm_config.yaml
+++ b/surfsense_backend/tests/e2e/fixtures/global_llm_config.yaml
@ -38,7 +38,7 @@ router_settings:
  cooldown_time: 1
 global_llm_configs:
-  - id: 1001
+  - id: -9001
    name: "E2E Fake Auto Model (premium)"
    billing_tier: "premium"
    anonymous_enabled: false
@ -54,7 +54,7 @@ global_llm_configs:
    litellm_params:
      model: "openai/fake-e2e-model-premium"
-  - id: 1002
+  - id: -9002
    name: "E2E Fake Auto Model (free)"
    billing_tier: "free"
    anonymous_enabled: false
--- a/surfsense_backend/tests/e2e/run_celery.py
+++ b/surfsense_backend/tests/e2e/run_celery.py
@ -25,23 +25,37 @@ if _BACKEND_ROOT not in sys.path:
    sys.path.insert(0, _BACKEND_ROOT)
-# ---------------------------------------------------------------------------
+logger = logging.getLogger("surfsense.e2e.celery")
 # 1) Hijack sys.modules BEFORE production celery imports anything.
 # ---------------------------------------------------------------------------
-import tests.e2e.fakes.composio_module as _fake_composio  # noqa: E402
+# Patches started during bootstrap are kept alive for the lifetime of the
-import tests.e2e.fakes.notion_module as _fake_notion  # noqa: E402
+# process. We never call .stop() on them.
 _active_patches: list = []
 def _hijack_external_sdks() -> None:
    """Replace composio + notion_client in sys.modules.
    Production does ``from composio import Composio`` and
    ``import notion_client`` at import time. With this hijack in place,
    those imports resolve to our strict fakes.
    MUST run before _import_celery_app().
    """
    import tests.e2e.fakes.composio_module as _fake_composio
    import tests.e2e.fakes.notion_module as _fake_notion
    sys.modules["composio"] = _fake_composio
    sys.modules["notion_client"] = _fake_notion
    sys.modules["notion_client.errors"] = _fake_notion.errors
-# ---------------------------------------------------------------------------
+def _load_dotenv_and_set_env_defaults() -> None:
-# 2) Logging + dotenv.
+    """Load .env and set every env var the production config reads on import.
 # ---------------------------------------------------------------------------
-from dotenv import load_dotenv  # noqa: E402
+    MUST run before _import_celery_app(), since app.config consumes
    these values at import time.
    """
    from dotenv import load_dotenv
    load_dotenv()
@ -108,85 +122,71 @@ os.environ.setdefault(
    os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id"
    os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret"
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
 )
 logger = logging.getLogger("surfsense.e2e.celery")
 logger.warning("*** SURFSENSE E2E CELERY WORKER — fake Composio + LLM + embeddings ***")
 def _install_synthetic_global_llm_config() -> None:
    """Materialise a fake ``app/config/global_llm_config.yaml`` for E2E.
-# ---------------------------------------------------------------------------
+    The real file is gitignored (production operators ship their own with
-# 2.5) Materialise the synthetic global_llm_config.yaml so the worker's
+    real API keys), so a fresh CI checkout has no YAML at the path
-#      view of app.config.GLOBAL_LLM_CONFIGS matches the API container.
+    ``app.config.load_global_llm_configs()`` reads. With an empty
-#      Must run BEFORE the production celery_app import below, which
+    ``GLOBAL_LLM_CONFIGS`` list, the worker's view of the config diverges
-#      transitively imports app.config. Install-only-if-missing so a
+    from the API container.
 #      developer's local config (with real API keys) is preserved.
 # ---------------------------------------------------------------------------
 import shutil as _shutil  # noqa: E402
-_e2e_llm_cfg_src = os.path.join(_THIS_DIR, "fixtures", "global_llm_config.yaml")
+    We copy the synthetic fixture from ``tests/e2e/fixtures/`` into the
-_e2e_llm_cfg_dst = os.path.join(
+    production-expected location BEFORE _import_celery_app() so
    ``app.config`` picks it up on import. Install-only-if-missing so a
    developer's local config (with real API keys) is preserved.
    MUST run before _import_celery_app().
    """
    import shutil
    src = os.path.join(_THIS_DIR, "fixtures", "global_llm_config.yaml")
    dst = os.path.join(
        _BACKEND_ROOT, "app", "config", "global_llm_config.yaml"
    )
-if not os.path.exists(_e2e_llm_cfg_src):
+
    if not os.path.exists(src):
        raise RuntimeError(
-        f"E2E synthetic global LLM config fixture missing at {_e2e_llm_cfg_src!r}. "
+            f"E2E synthetic global LLM config fixture missing at {src!r}. "
            f"Restore tests/e2e/fixtures/global_llm_config.yaml from VCS."
        )
-if os.path.exists(_e2e_llm_cfg_dst):
+
    if os.path.exists(dst):
        logger.info(
            "[e2e-global-llm-config] %s already exists; leaving it alone "
            "(local dev config preserved)",
-        _e2e_llm_cfg_dst,
+            dst,
    )
 else:
    os.makedirs(os.path.dirname(_e2e_llm_cfg_dst), exist_ok=True)
    _shutil.copyfile(_e2e_llm_cfg_src, _e2e_llm_cfg_dst)
    logger.info(
        "[e2e-global-llm-config] installed %s -> %s",
        _e2e_llm_cfg_src,
        _e2e_llm_cfg_dst,
        )
        return
    os.makedirs(os.path.dirname(dst), exist_ok=True)
    shutil.copyfile(src, dst)
    logger.info("[e2e-global-llm-config] installed %s -> %s", src, dst)
-# ---------------------------------------------------------------------------
+def _import_celery_app():
-# 3) Import the production celery_app. All task modules load here.
+    """Import and return the production Celery app.
 # ---------------------------------------------------------------------------
-# ---------------------------------------------------------------------------
+    Every module under ``app.*`` (including all task modules) loads here,
-# 4) Patch LLM + embedding bindings inside the worker process.
+    creating their bindings. The LLM/embedding factories captured at this
-# ---------------------------------------------------------------------------
+    point will be replaced by patches in _patch_llm_bindings() below.
-from unittest.mock import patch  # noqa: E402
+    """
    from app.celery_app import celery_app
-from app.celery_app import celery_app  # noqa: E402
+    return celery_app
 from tests.e2e.fakes import (  # noqa: E402
    clickup_module as _fake_clickup_module,
    confluence_indexer as _fake_confluence_indexer,
    confluence_oauth as _fake_confluence_oauth,
    docling_service as _fake_docling_service,
    dropbox_api as _fake_dropbox_api,
    embeddings as _fake_embeddings,
    jira_module as _fake_jira_module,
    linear_module as _fake_linear_module,
    mcp_oauth_runtime as _fake_mcp_oauth_runtime,
    mcp_runtime as _fake_mcp_runtime,
    native_google as _fake_native_google,
    notion_module as _fake_notion_module,
    onedrive_graph as _fake_onedrive_graph,
    slack_module as _fake_slack_module,
 )
 from tests.e2e.fakes.chat_llm import (  # noqa: E402
    fake_create_chat_litellm_from_agent_config,
    fake_create_chat_litellm_from_config,
 )
 from tests.e2e.fakes.llm import fake_get_user_long_context_llm  # noqa: E402
 _active_patches: list = []
 def _patch_llm_bindings() -> None:
    """Replace LLM factories at every known binding site in worker tasks."""
    from unittest.mock import patch
    from tests.e2e.fakes.chat_llm import (
        fake_create_chat_litellm_from_agent_config,
        fake_create_chat_litellm_from_config,
    )
    from tests.e2e.fakes.llm import fake_get_user_long_context_llm
    targets = [
        "app.services.llm_service.get_user_long_context_llm",
        "app.tasks.connector_indexers.confluence_indexer.get_user_long_context_llm",
@ -244,7 +244,25 @@ def _patch_llm_bindings() -> None:
            )
-_patch_llm_bindings()
+def _install_runtime_fakes() -> None:
    """Run each fake's install() against the active patch stack."""
    from tests.e2e.fakes import (
        clickup_module as _fake_clickup_module,
        confluence_indexer as _fake_confluence_indexer,
        confluence_oauth as _fake_confluence_oauth,
        docling_service as _fake_docling_service,
        dropbox_api as _fake_dropbox_api,
        embeddings as _fake_embeddings,
        jira_module as _fake_jira_module,
        linear_module as _fake_linear_module,
        mcp_oauth_runtime as _fake_mcp_oauth_runtime,
        mcp_runtime as _fake_mcp_runtime,
        native_google as _fake_native_google,
        notion_module as _fake_notion_module,
        onedrive_graph as _fake_onedrive_graph,
        slack_module as _fake_slack_module,
    )
    _fake_embeddings.install(_active_patches)
    _fake_docling_service.install(_active_patches)
    _fake_confluence_oauth.install(_active_patches)
@ -261,14 +279,43 @@ _fake_mcp_oauth_runtime.install(_active_patches)
    _fake_slack_module.install(_active_patches)
-# ---------------------------------------------------------------------------
+def _bootstrap():
-# 5) Start the worker.
+    """Run the full E2E bootstrap and return the production Celery app.
-# ---------------------------------------------------------------------------
+
    Ordering is load-bearing:
      1) Hijack composio + notion_client in sys.modules.
      2) Load .env + set env defaults (app.config reads env on import).
      3) Configure logging.
      4) Materialise the synthetic global_llm_config.yaml so the worker's
         view of GLOBAL_LLM_CONFIGS matches the API container.
      5) Import production celery_app (which transitively imports the
         now-faked external SDKs and reads the env defaults + YAML).
      6) Patch LLM / embedding bindings at every consumer site.
      7) Install runtime fakes for connectors and chat backends.
    """
    _hijack_external_sdks()
    _load_dotenv_and_set_env_defaults()
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    logger.warning(
        "*** SURFSENSE E2E CELERY WORKER — fake Composio + LLM + embeddings ***"
    )
    _install_synthetic_global_llm_config()
    celery_app = _import_celery_app()
    _patch_llm_bindings()
    _install_runtime_fakes()
    return celery_app
 celery_app = _bootstrap()
 def _main() -> None:
    # Default queues mirror production (default queue + connectors queue
    # so Drive indexing tasks are picked up).
    queue_name = os.getenv("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
    queues = f"{queue_name},{queue_name}.connectors"