From f2e62a09b333a2c79f34cb9f33ffa4db5eb4223e Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 7 May 2026 02:52:14 +0530 Subject: [PATCH] test(backend): add deterministic Composio Gmail E2E fakes --- surfsense_backend/tests/e2e/fakes/chat_llm.py | 33 ++++++++++-- .../tests/e2e/fakes/composio_module.py | 52 ++++++++++++++++++- .../e2e/fakes/fixtures/gmail_messages.json | 34 ++++++++++++ 3 files changed, 114 insertions(+), 5 deletions(-) create mode 100644 surfsense_backend/tests/e2e/fakes/fixtures/gmail_messages.json diff --git a/surfsense_backend/tests/e2e/fakes/chat_llm.py b/surfsense_backend/tests/e2e/fakes/chat_llm.py index eef4e61ad..06948d769 100644 --- a/surfsense_backend/tests/e2e/fakes/chat_llm.py +++ b/surfsense_backend/tests/e2e/fakes/chat_llm.py @@ -13,6 +13,7 @@ from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult DRIVE_CANARY_TOKEN = "SURFSENSE_E2E_CANARY_TOKEN_DRIVE_001" +GMAIL_CANARY_TOKEN = "SURFSENSE_E2E_CANARY_TOKEN_GMAIL_001" NO_RELEVANT_CONTENT_SENTINEL = "No relevant indexed content found." NO_RELEVANT_CONTENT_QUERY = "E2E_NO_RELEVANT_CONTENT_SMOKE" @@ -36,6 +37,11 @@ def _messages_to_text(messages: list[BaseMessage]) -> str: return "\n".join(_content_to_text(message.content) for message in messages) +def _contains_any(text: str, needles: tuple[str, ...]) -> bool: + lowered = text.lower() + return any(needle.lower() in lowered for needle in needles) + + class FakeChatLLM(BaseChatModel): @property def _llm_type(self) -> str: @@ -57,11 +63,32 @@ class FakeChatLLM(BaseChatModel): return NO_RELEVANT_CONTENT_SENTINEL prompt_text = _messages_to_text(messages) - if ( - "e2e-canary" in prompt_text + wants_gmail = _contains_any( + latest_human, + ("gmail", "email", "message", "E2E Canary Email"), + ) + wants_drive = _contains_any( + latest_human, + ("drive", "file", "e2e-canary.txt"), + ) + has_gmail_evidence = ( + "E2E Canary Email" in prompt_text + or "fake-msg-canary-001" in prompt_text + or GMAIL_CANARY_TOKEN in prompt_text + ) + has_drive_evidence = ( + "e2e-canary.txt" in prompt_text or "fake-file-canary" in prompt_text or DRIVE_CANARY_TOKEN in prompt_text - ): + ) + + if wants_gmail and has_gmail_evidence: + return f"Gmail content found: {GMAIL_CANARY_TOKEN}" + if wants_drive and has_drive_evidence: + return f"Drive content found: {DRIVE_CANARY_TOKEN}" + if has_gmail_evidence and not has_drive_evidence: + return f"Gmail content found: {GMAIL_CANARY_TOKEN}" + if has_drive_evidence and not has_gmail_evidence: return f"Drive content found: {DRIVE_CANARY_TOKEN}" return NO_RELEVANT_CONTENT_SENTINEL diff --git a/surfsense_backend/tests/e2e/fakes/composio_module.py b/surfsense_backend/tests/e2e/fakes/composio_module.py index 87e1968ba..b038c3d78 100644 --- a/surfsense_backend/tests/e2e/fakes/composio_module.py +++ b/surfsense_backend/tests/e2e/fakes/composio_module.py @@ -29,17 +29,25 @@ logger = logging.getLogger(__name__) # Fixture loading # --------------------------------------------------------------------------- -_FIXTURE_PATH = Path(__file__).parent / "fixtures" / "drive_files.json" +_DRIVE_FIXTURE_PATH = Path(__file__).parent / "fixtures" / "drive_files.json" +_GMAIL_FIXTURE_PATH = Path(__file__).parent / "fixtures" / "gmail_messages.json" _DRIVE_DOWNLOAD_DIR = Path("/tmp/surfsense-e2e-composio-downloads") def _load_drive_fixture() -> dict[str, Any]: """Load the canned Drive fixture once per process.""" - with _FIXTURE_PATH.open() as f: + with _DRIVE_FIXTURE_PATH.open() as f: + return json.load(f) + + +def _load_gmail_fixture() -> dict[str, Any]: + """Load the canned Gmail fixture once per process.""" + with _GMAIL_FIXTURE_PATH.open() as f: return json.load(f) _DRIVE_FIXTURE = _load_drive_fixture() +_GMAIL_FIXTURE = _load_gmail_fixture() def _get_scenario() -> str: @@ -278,6 +286,10 @@ class _Tools(_StrictFakeMixin): return {"data": {"user": {"emailAddress": "e2e-fake@surfsense.example"}}} if slug == "GMAIL_GET_PROFILE": return {"data": {"emailAddress": "e2e-fake@surfsense.example"}} + if slug == "GMAIL_FETCH_EMAILS": + return _gmail_fetch_emails(args) + if slug == "GMAIL_FETCH_MESSAGE_BY_MESSAGE_ID": + return _gmail_fetch_message_by_message_id(args) if slug == "GOOGLECALENDAR_CALENDARS_LIST": return { "data": { @@ -414,6 +426,42 @@ def _drive_get_metadata(args: dict[str, Any]) -> dict[str, Any]: ) +# --------------------------------------------------------------------------- +# Gmail tool handlers +# --------------------------------------------------------------------------- + + +def _gmail_fetch_emails(args: dict[str, Any]) -> dict[str, Any]: + """Mimic GMAIL_FETCH_EMAILS. + + The production indexer uses this as a list page, then calls + GMAIL_FETCH_MESSAGE_BY_MESSAGE_ID for the full body of each id. + """ + del args + messages = list(_GMAIL_FIXTURE.get("messages", [])) + return { + "data": { + "messages": messages, + "nextPageToken": None, + "resultSizeEstimate": len(messages), + } + } + + +def _gmail_fetch_message_by_message_id(args: dict[str, Any]) -> dict[str, Any]: + """Mimic GMAIL_FETCH_MESSAGE_BY_MESSAGE_ID.""" + message_id = args.get("message_id", "") + details = _GMAIL_FIXTURE.get("details", {}) + detail = details.get(message_id) + if detail is None: + raise NotImplementedError( + f"E2E Composio fake has no Gmail detail fixture for " + f"message_id={message_id!r}. Add it under 'details' in " + f"surfsense_backend/tests/e2e/fakes/fixtures/gmail_messages.json." + ) + return {"data": detail} + + # --------------------------------------------------------------------------- # Errors # --------------------------------------------------------------------------- diff --git a/surfsense_backend/tests/e2e/fakes/fixtures/gmail_messages.json b/surfsense_backend/tests/e2e/fakes/fixtures/gmail_messages.json new file mode 100644 index 000000000..c86dee404 --- /dev/null +++ b/surfsense_backend/tests/e2e/fakes/fixtures/gmail_messages.json @@ -0,0 +1,34 @@ +{ + "messages": [ + { + "id": "fake-msg-canary-001", + "threadId": "fake-thread-canary-001", + "snippet": "E2E canary email body is loaded through the Gmail detail endpoint." + }, + { + "id": "fake-msg-planning-001", + "threadId": "fake-thread-planning-001", + "snippet": "Planning email used to keep Gmail fixtures representative." + } + ], + "details": { + "fake-msg-canary-001": { + "id": "fake-msg-canary-001", + "threadId": "fake-thread-canary-001", + "subject": "E2E Canary Email", + "from": "sender@surfsense.example", + "to": "e2e-fake@surfsense.example", + "date": "Mon, 10 Feb 2025 11:00:00 +0000", + "messageText": "This Gmail message body proves the Composio Gmail indexer fetched message details. SURFSENSE_E2E_CANARY_TOKEN_GMAIL_001" + }, + "fake-msg-planning-001": { + "id": "fake-msg-planning-001", + "threadId": "fake-thread-planning-001", + "subject": "E2E Planning Notes", + "from": "planner@surfsense.example", + "to": "e2e-fake@surfsense.example", + "date": "Tue, 11 Feb 2025 09:30:00 +0000", + "messageText": "Planning notes for a non-canary Gmail fixture." + } + } +}