test(backend): add deterministic Composio Gmail E2E fakes

This commit is contained in:
Anish Sarkar 2026-05-07 02:52:14 +05:30
parent 23a853b18e
commit f2e62a09b3
3 changed files with 114 additions and 5 deletions

View file

@ -13,6 +13,7 @@ from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
DRIVE_CANARY_TOKEN = "SURFSENSE_E2E_CANARY_TOKEN_DRIVE_001" DRIVE_CANARY_TOKEN = "SURFSENSE_E2E_CANARY_TOKEN_DRIVE_001"
GMAIL_CANARY_TOKEN = "SURFSENSE_E2E_CANARY_TOKEN_GMAIL_001"
NO_RELEVANT_CONTENT_SENTINEL = "No relevant indexed content found." NO_RELEVANT_CONTENT_SENTINEL = "No relevant indexed content found."
NO_RELEVANT_CONTENT_QUERY = "E2E_NO_RELEVANT_CONTENT_SMOKE" NO_RELEVANT_CONTENT_QUERY = "E2E_NO_RELEVANT_CONTENT_SMOKE"
@ -36,6 +37,11 @@ def _messages_to_text(messages: list[BaseMessage]) -> str:
return "\n".join(_content_to_text(message.content) for message in messages) return "\n".join(_content_to_text(message.content) for message in messages)
def _contains_any(text: str, needles: tuple[str, ...]) -> bool:
lowered = text.lower()
return any(needle.lower() in lowered for needle in needles)
class FakeChatLLM(BaseChatModel): class FakeChatLLM(BaseChatModel):
@property @property
def _llm_type(self) -> str: def _llm_type(self) -> str:
@ -57,11 +63,32 @@ class FakeChatLLM(BaseChatModel):
return NO_RELEVANT_CONTENT_SENTINEL return NO_RELEVANT_CONTENT_SENTINEL
prompt_text = _messages_to_text(messages) prompt_text = _messages_to_text(messages)
if ( wants_gmail = _contains_any(
"e2e-canary" in prompt_text latest_human,
("gmail", "email", "message", "E2E Canary Email"),
)
wants_drive = _contains_any(
latest_human,
("drive", "file", "e2e-canary.txt"),
)
has_gmail_evidence = (
"E2E Canary Email" in prompt_text
or "fake-msg-canary-001" in prompt_text
or GMAIL_CANARY_TOKEN in prompt_text
)
has_drive_evidence = (
"e2e-canary.txt" in prompt_text
or "fake-file-canary" in prompt_text or "fake-file-canary" in prompt_text
or DRIVE_CANARY_TOKEN in prompt_text or DRIVE_CANARY_TOKEN in prompt_text
): )
if wants_gmail and has_gmail_evidence:
return f"Gmail content found: {GMAIL_CANARY_TOKEN}"
if wants_drive and has_drive_evidence:
return f"Drive content found: {DRIVE_CANARY_TOKEN}"
if has_gmail_evidence and not has_drive_evidence:
return f"Gmail content found: {GMAIL_CANARY_TOKEN}"
if has_drive_evidence and not has_gmail_evidence:
return f"Drive content found: {DRIVE_CANARY_TOKEN}" return f"Drive content found: {DRIVE_CANARY_TOKEN}"
return NO_RELEVANT_CONTENT_SENTINEL return NO_RELEVANT_CONTENT_SENTINEL

View file

@ -29,17 +29,25 @@ logger = logging.getLogger(__name__)
# Fixture loading # Fixture loading
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_FIXTURE_PATH = Path(__file__).parent / "fixtures" / "drive_files.json" _DRIVE_FIXTURE_PATH = Path(__file__).parent / "fixtures" / "drive_files.json"
_GMAIL_FIXTURE_PATH = Path(__file__).parent / "fixtures" / "gmail_messages.json"
_DRIVE_DOWNLOAD_DIR = Path("/tmp/surfsense-e2e-composio-downloads") _DRIVE_DOWNLOAD_DIR = Path("/tmp/surfsense-e2e-composio-downloads")
def _load_drive_fixture() -> dict[str, Any]: def _load_drive_fixture() -> dict[str, Any]:
"""Load the canned Drive fixture once per process.""" """Load the canned Drive fixture once per process."""
with _FIXTURE_PATH.open() as f: with _DRIVE_FIXTURE_PATH.open() as f:
return json.load(f)
def _load_gmail_fixture() -> dict[str, Any]:
"""Load the canned Gmail fixture once per process."""
with _GMAIL_FIXTURE_PATH.open() as f:
return json.load(f) return json.load(f)
_DRIVE_FIXTURE = _load_drive_fixture() _DRIVE_FIXTURE = _load_drive_fixture()
_GMAIL_FIXTURE = _load_gmail_fixture()
def _get_scenario() -> str: def _get_scenario() -> str:
@ -278,6 +286,10 @@ class _Tools(_StrictFakeMixin):
return {"data": {"user": {"emailAddress": "e2e-fake@surfsense.example"}}} return {"data": {"user": {"emailAddress": "e2e-fake@surfsense.example"}}}
if slug == "GMAIL_GET_PROFILE": if slug == "GMAIL_GET_PROFILE":
return {"data": {"emailAddress": "e2e-fake@surfsense.example"}} return {"data": {"emailAddress": "e2e-fake@surfsense.example"}}
if slug == "GMAIL_FETCH_EMAILS":
return _gmail_fetch_emails(args)
if slug == "GMAIL_FETCH_MESSAGE_BY_MESSAGE_ID":
return _gmail_fetch_message_by_message_id(args)
if slug == "GOOGLECALENDAR_CALENDARS_LIST": if slug == "GOOGLECALENDAR_CALENDARS_LIST":
return { return {
"data": { "data": {
@ -414,6 +426,42 @@ def _drive_get_metadata(args: dict[str, Any]) -> dict[str, Any]:
) )
# ---------------------------------------------------------------------------
# Gmail tool handlers
# ---------------------------------------------------------------------------
def _gmail_fetch_emails(args: dict[str, Any]) -> dict[str, Any]:
"""Mimic GMAIL_FETCH_EMAILS.
The production indexer uses this as a list page, then calls
GMAIL_FETCH_MESSAGE_BY_MESSAGE_ID for the full body of each id.
"""
del args
messages = list(_GMAIL_FIXTURE.get("messages", []))
return {
"data": {
"messages": messages,
"nextPageToken": None,
"resultSizeEstimate": len(messages),
}
}
def _gmail_fetch_message_by_message_id(args: dict[str, Any]) -> dict[str, Any]:
"""Mimic GMAIL_FETCH_MESSAGE_BY_MESSAGE_ID."""
message_id = args.get("message_id", "")
details = _GMAIL_FIXTURE.get("details", {})
detail = details.get(message_id)
if detail is None:
raise NotImplementedError(
f"E2E Composio fake has no Gmail detail fixture for "
f"message_id={message_id!r}. Add it under 'details' in "
f"surfsense_backend/tests/e2e/fakes/fixtures/gmail_messages.json."
)
return {"data": detail}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Errors # Errors
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

View file

@ -0,0 +1,34 @@
{
"messages": [
{
"id": "fake-msg-canary-001",
"threadId": "fake-thread-canary-001",
"snippet": "E2E canary email body is loaded through the Gmail detail endpoint."
},
{
"id": "fake-msg-planning-001",
"threadId": "fake-thread-planning-001",
"snippet": "Planning email used to keep Gmail fixtures representative."
}
],
"details": {
"fake-msg-canary-001": {
"id": "fake-msg-canary-001",
"threadId": "fake-thread-canary-001",
"subject": "E2E Canary Email",
"from": "sender@surfsense.example",
"to": "e2e-fake@surfsense.example",
"date": "Mon, 10 Feb 2025 11:00:00 +0000",
"messageText": "This Gmail message body proves the Composio Gmail indexer fetched message details. SURFSENSE_E2E_CANARY_TOKEN_GMAIL_001"
},
"fake-msg-planning-001": {
"id": "fake-msg-planning-001",
"threadId": "fake-thread-planning-001",
"subject": "E2E Planning Notes",
"from": "planner@surfsense.example",
"to": "e2e-fake@surfsense.example",
"date": "Tue, 11 Feb 2025 09:30:00 +0000",
"messageText": "Planning notes for a non-canary Gmail fixture."
}
}
}