mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-04 21:32:39 +02:00
390 lines
13 KiB
Python
390 lines
13 KiB
Python
"""
|
|
Composio Connector Module.
|
|
|
|
Provides a unified interface for interacting with various services via Composio,
|
|
primarily used during indexing operations.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Any
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy.future import select
|
|
|
|
from app.db import SearchSourceConnector
|
|
from app.services.composio_service import INDEXABLE_TOOLKITS, ComposioService
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ComposioConnector:
|
|
"""
|
|
Generic Composio connector for data retrieval.
|
|
|
|
Wraps the ComposioService to provide toolkit-specific data access
|
|
for indexing operations.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
session: AsyncSession,
|
|
connector_id: int,
|
|
):
|
|
"""
|
|
Initialize the Composio connector.
|
|
|
|
Args:
|
|
session: Database session for updating connector.
|
|
connector_id: ID of the SearchSourceConnector.
|
|
"""
|
|
self._session = session
|
|
self._connector_id = connector_id
|
|
self._service: ComposioService | None = None
|
|
self._connector: SearchSourceConnector | None = None
|
|
self._config: dict[str, Any] | None = None
|
|
|
|
async def _load_connector(self) -> SearchSourceConnector:
|
|
"""Load connector from database."""
|
|
if self._connector is None:
|
|
result = await self._session.execute(
|
|
select(SearchSourceConnector).filter(
|
|
SearchSourceConnector.id == self._connector_id
|
|
)
|
|
)
|
|
self._connector = result.scalars().first()
|
|
if not self._connector:
|
|
raise ValueError(f"Connector {self._connector_id} not found")
|
|
self._config = self._connector.config or {}
|
|
return self._connector
|
|
|
|
async def _get_service(self) -> ComposioService:
|
|
"""Get or create the Composio service instance."""
|
|
if self._service is None:
|
|
self._service = ComposioService()
|
|
return self._service
|
|
|
|
async def get_config(self) -> dict[str, Any]:
|
|
"""Get the connector configuration."""
|
|
await self._load_connector()
|
|
return self._config or {}
|
|
|
|
async def get_toolkit_id(self) -> str:
|
|
"""Get the toolkit ID for this connector."""
|
|
config = await self.get_config()
|
|
return config.get("toolkit_id", "")
|
|
|
|
async def get_connected_account_id(self) -> str | None:
|
|
"""Get the Composio connected account ID."""
|
|
config = await self.get_config()
|
|
return config.get("composio_connected_account_id")
|
|
|
|
async def get_entity_id(self) -> str:
|
|
"""Get the Composio entity ID (user identifier)."""
|
|
await self._load_connector()
|
|
# Entity ID is constructed from the connector's user_id
|
|
return f"surfsense_{self._connector.user_id}"
|
|
|
|
async def is_indexable(self) -> bool:
|
|
"""Check if this connector's toolkit supports indexing."""
|
|
toolkit_id = await self.get_toolkit_id()
|
|
return toolkit_id in INDEXABLE_TOOLKITS
|
|
|
|
# ===== Google Drive Methods =====
|
|
|
|
async def list_drive_files(
|
|
self,
|
|
folder_id: str | None = None,
|
|
page_token: str | None = None,
|
|
page_size: int = 100,
|
|
) -> tuple[list[dict[str, Any]], str | None, str | None]:
|
|
"""
|
|
List files from Google Drive via Composio.
|
|
|
|
Args:
|
|
folder_id: Optional folder ID to list contents of.
|
|
page_token: Pagination token.
|
|
page_size: Number of files per page.
|
|
|
|
Returns:
|
|
Tuple of (files list, next_page_token, error message).
|
|
"""
|
|
connected_account_id = await self.get_connected_account_id()
|
|
if not connected_account_id:
|
|
return [], None, "No connected account ID found"
|
|
|
|
entity_id = await self.get_entity_id()
|
|
service = await self._get_service()
|
|
return await service.get_drive_files(
|
|
connected_account_id=connected_account_id,
|
|
entity_id=entity_id,
|
|
folder_id=folder_id,
|
|
page_token=page_token,
|
|
page_size=page_size,
|
|
)
|
|
|
|
async def get_drive_file_content(
|
|
self, file_id: str
|
|
) -> tuple[bytes | None, str | None]:
|
|
"""
|
|
Download file content from Google Drive via Composio.
|
|
|
|
Args:
|
|
file_id: Google Drive file ID.
|
|
|
|
Returns:
|
|
Tuple of (file content bytes, error message).
|
|
"""
|
|
connected_account_id = await self.get_connected_account_id()
|
|
if not connected_account_id:
|
|
return None, "No connected account ID found"
|
|
|
|
entity_id = await self.get_entity_id()
|
|
service = await self._get_service()
|
|
return await service.get_drive_file_content(
|
|
connected_account_id=connected_account_id,
|
|
entity_id=entity_id,
|
|
file_id=file_id,
|
|
)
|
|
|
|
# ===== Gmail Methods =====
|
|
|
|
async def list_gmail_messages(
|
|
self,
|
|
query: str = "",
|
|
max_results: int = 100,
|
|
) -> tuple[list[dict[str, Any]], str | None]:
|
|
"""
|
|
List Gmail messages via Composio.
|
|
|
|
Args:
|
|
query: Gmail search query.
|
|
max_results: Maximum number of messages.
|
|
|
|
Returns:
|
|
Tuple of (messages list, error message).
|
|
"""
|
|
connected_account_id = await self.get_connected_account_id()
|
|
if not connected_account_id:
|
|
return [], "No connected account ID found"
|
|
|
|
entity_id = await self.get_entity_id()
|
|
service = await self._get_service()
|
|
return await service.get_gmail_messages(
|
|
connected_account_id=connected_account_id,
|
|
entity_id=entity_id,
|
|
query=query,
|
|
max_results=max_results,
|
|
)
|
|
|
|
async def get_gmail_message_detail(
|
|
self, message_id: str
|
|
) -> tuple[dict[str, Any] | None, str | None]:
|
|
"""
|
|
Get full details of a Gmail message via Composio.
|
|
|
|
Args:
|
|
message_id: Gmail message ID.
|
|
|
|
Returns:
|
|
Tuple of (message details, error message).
|
|
"""
|
|
connected_account_id = await self.get_connected_account_id()
|
|
if not connected_account_id:
|
|
return None, "No connected account ID found"
|
|
|
|
entity_id = await self.get_entity_id()
|
|
service = await self._get_service()
|
|
return await service.get_gmail_message_detail(
|
|
connected_account_id=connected_account_id,
|
|
entity_id=entity_id,
|
|
message_id=message_id,
|
|
)
|
|
|
|
# ===== Google Calendar Methods =====
|
|
|
|
async def list_calendar_events(
|
|
self,
|
|
time_min: str | None = None,
|
|
time_max: str | None = None,
|
|
max_results: int = 250,
|
|
) -> tuple[list[dict[str, Any]], str | None]:
|
|
"""
|
|
List Google Calendar events via Composio.
|
|
|
|
Args:
|
|
time_min: Start time (RFC3339 format).
|
|
time_max: End time (RFC3339 format).
|
|
max_results: Maximum number of events.
|
|
|
|
Returns:
|
|
Tuple of (events list, error message).
|
|
"""
|
|
connected_account_id = await self.get_connected_account_id()
|
|
if not connected_account_id:
|
|
return [], "No connected account ID found"
|
|
|
|
entity_id = await self.get_entity_id()
|
|
service = await self._get_service()
|
|
return await service.get_calendar_events(
|
|
connected_account_id=connected_account_id,
|
|
entity_id=entity_id,
|
|
time_min=time_min,
|
|
time_max=time_max,
|
|
max_results=max_results,
|
|
)
|
|
|
|
# ===== Utility Methods =====
|
|
|
|
def format_gmail_message_to_markdown(self, message: dict[str, Any]) -> str:
|
|
"""
|
|
Format a Gmail message to markdown.
|
|
|
|
Args:
|
|
message: Message object from Composio's GMAIL_FETCH_EMAILS response.
|
|
Composio structure: messageId, messageText, messageTimestamp,
|
|
payload.headers, labelIds, attachmentList
|
|
|
|
Returns:
|
|
Formatted markdown string.
|
|
"""
|
|
try:
|
|
# Composio uses 'messageId' (camelCase)
|
|
message_id = message.get("messageId", "") or message.get("id", "")
|
|
label_ids = message.get("labelIds", [])
|
|
|
|
# Extract headers from payload
|
|
payload = message.get("payload", {})
|
|
headers = payload.get("headers", [])
|
|
|
|
# Parse headers into a dict
|
|
header_dict = {}
|
|
for header in headers:
|
|
name = header.get("name", "").lower()
|
|
value = header.get("value", "")
|
|
header_dict[name] = value
|
|
|
|
# Extract key information
|
|
subject = header_dict.get("subject", "No Subject")
|
|
from_email = header_dict.get("from", "Unknown Sender")
|
|
to_email = header_dict.get("to", "Unknown Recipient")
|
|
# Composio provides messageTimestamp directly
|
|
date_str = message.get("messageTimestamp", "") or header_dict.get(
|
|
"date", "Unknown Date"
|
|
)
|
|
|
|
# Build markdown content
|
|
markdown_content = f"# {subject}\n\n"
|
|
markdown_content += f"**From:** {from_email}\n"
|
|
markdown_content += f"**To:** {to_email}\n"
|
|
markdown_content += f"**Date:** {date_str}\n"
|
|
|
|
if label_ids:
|
|
markdown_content += f"**Labels:** {', '.join(label_ids)}\n"
|
|
|
|
markdown_content += "\n---\n\n"
|
|
|
|
# Composio provides full message text in 'messageText'
|
|
message_text = message.get("messageText", "")
|
|
if message_text:
|
|
markdown_content += f"## Content\n\n{message_text}\n\n"
|
|
else:
|
|
# Fallback to snippet if no messageText
|
|
snippet = message.get("snippet", "")
|
|
if snippet:
|
|
markdown_content += f"## Preview\n\n{snippet}\n\n"
|
|
|
|
# Add attachment info if present
|
|
attachments = message.get("attachmentList", [])
|
|
if attachments:
|
|
markdown_content += "## Attachments\n\n"
|
|
for att in attachments:
|
|
att_name = att.get("filename", att.get("name", "Unknown"))
|
|
markdown_content += f"- {att_name}\n"
|
|
markdown_content += "\n"
|
|
|
|
# Add message metadata
|
|
markdown_content += "## Message Details\n\n"
|
|
markdown_content += f"- **Message ID:** {message_id}\n"
|
|
|
|
return markdown_content
|
|
|
|
except Exception as e:
|
|
return f"Error formatting message to markdown: {e!s}"
|
|
|
|
def format_calendar_event_to_markdown(self, event: dict[str, Any]) -> str:
|
|
"""
|
|
Format a Google Calendar event to markdown.
|
|
|
|
Args:
|
|
event: Event object from Google Calendar API.
|
|
|
|
Returns:
|
|
Formatted markdown string.
|
|
"""
|
|
from datetime import datetime
|
|
|
|
try:
|
|
# Extract basic event information
|
|
summary = event.get("summary", "No Title")
|
|
description = event.get("description", "")
|
|
location = event.get("location", "")
|
|
|
|
# Extract start and end times
|
|
start = event.get("start", {})
|
|
end = event.get("end", {})
|
|
|
|
start_time = start.get("dateTime") or start.get("date", "")
|
|
end_time = end.get("dateTime") or end.get("date", "")
|
|
|
|
# Format times for display
|
|
def format_time(time_str: str) -> str:
|
|
if not time_str:
|
|
return "Unknown"
|
|
try:
|
|
if "T" in time_str:
|
|
dt = datetime.fromisoformat(time_str.replace("Z", "+00:00"))
|
|
return dt.strftime("%Y-%m-%d %H:%M")
|
|
return time_str
|
|
except Exception:
|
|
return time_str
|
|
|
|
start_formatted = format_time(start_time)
|
|
end_formatted = format_time(end_time)
|
|
|
|
# Extract attendees
|
|
attendees = event.get("attendees", [])
|
|
attendee_list = []
|
|
for attendee in attendees:
|
|
email = attendee.get("email", "")
|
|
display_name = attendee.get("displayName", email)
|
|
response_status = attendee.get("responseStatus", "")
|
|
attendee_list.append(f"- {display_name} ({response_status})")
|
|
|
|
# Build markdown content
|
|
markdown_content = f"# {summary}\n\n"
|
|
markdown_content += f"**Start:** {start_formatted}\n"
|
|
markdown_content += f"**End:** {end_formatted}\n"
|
|
|
|
if location:
|
|
markdown_content += f"**Location:** {location}\n"
|
|
|
|
markdown_content += "\n"
|
|
|
|
if description:
|
|
markdown_content += f"## Description\n\n{description}\n\n"
|
|
|
|
if attendee_list:
|
|
markdown_content += "## Attendees\n\n"
|
|
markdown_content += "\n".join(attendee_list)
|
|
markdown_content += "\n\n"
|
|
|
|
# Add event metadata
|
|
markdown_content += "## Event Details\n\n"
|
|
markdown_content += f"- **Event ID:** {event.get('id', 'Unknown')}\n"
|
|
markdown_content += f"- **Created:** {event.get('created', 'Unknown')}\n"
|
|
markdown_content += f"- **Updated:** {event.get('updated', 'Unknown')}\n"
|
|
|
|
return markdown_content
|
|
|
|
except Exception as e:
|
|
return f"Error formatting event to markdown: {e!s}"
|