mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-28 21:49:40 +02:00
Merge dev into feature/obsidian - resolved conflicts keeping both Obsidian and Composio connectors
This commit is contained in:
commit
f0760c14e9
29 changed files with 6297 additions and 3110 deletions
|
|
@ -85,6 +85,11 @@ TEAMS_CLIENT_ID=your_teams_client_id_here
|
|||
TEAMS_CLIENT_SECRET=your_teams_client_secret_here
|
||||
TEAMS_REDIRECT_URI=http://localhost:8000/api/v1/auth/teams/connector/callback
|
||||
|
||||
#Composio Coonnector
|
||||
COMPOSIO_API_KEY=your_api_key_here
|
||||
COMPOSIO_ENABLED=TRUE
|
||||
COMPOSIO_REDIRECT_URI=http://localhost:8000/api/v1/auth/composio/connector/callback
|
||||
|
||||
# Embedding Model
|
||||
# Examples:
|
||||
# # Get sentence transformers embeddings
|
||||
|
|
|
|||
|
|
@ -0,0 +1,81 @@
|
|||
"""Add COMPOSIO_CONNECTOR to SearchSourceConnectorType and DocumentType enums
|
||||
|
||||
Revision ID: 74
|
||||
Revises: 73
|
||||
Create Date: 2026-01-21
|
||||
|
||||
This migration adds the COMPOSIO_CONNECTOR enum value to both:
|
||||
- searchsourceconnectortype (for connector type tracking)
|
||||
- documenttype (for document type tracking)
|
||||
|
||||
Composio is a managed OAuth integration service that allows connecting
|
||||
to various third-party services (Google Drive, Gmail, Calendar, etc.)
|
||||
without requiring separate OAuth app verification.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "74"
|
||||
down_revision: str | None = "73"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
# Define the ENUM type names and the new value
|
||||
CONNECTOR_ENUM = "searchsourceconnectortype"
|
||||
CONNECTOR_NEW_VALUE = "COMPOSIO_CONNECTOR"
|
||||
DOCUMENT_ENUM = "documenttype"
|
||||
DOCUMENT_NEW_VALUE = "COMPOSIO_CONNECTOR"
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema - add COMPOSIO_CONNECTOR to connector and document enums safely."""
|
||||
# Add COMPOSIO_CONNECTOR to searchsourceconnectortype only if not exists
|
||||
op.execute(
|
||||
f"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_enum
|
||||
WHERE enumlabel = '{CONNECTOR_NEW_VALUE}'
|
||||
AND enumtypid = (SELECT oid FROM pg_type WHERE typname = '{CONNECTOR_ENUM}')
|
||||
) THEN
|
||||
ALTER TYPE {CONNECTOR_ENUM} ADD VALUE '{CONNECTOR_NEW_VALUE}';
|
||||
END IF;
|
||||
END$$;
|
||||
"""
|
||||
)
|
||||
|
||||
# Add COMPOSIO_CONNECTOR to documenttype only if not exists
|
||||
op.execute(
|
||||
f"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_enum
|
||||
WHERE enumlabel = '{DOCUMENT_NEW_VALUE}'
|
||||
AND enumtypid = (SELECT oid FROM pg_type WHERE typname = '{DOCUMENT_ENUM}')
|
||||
) THEN
|
||||
ALTER TYPE {DOCUMENT_ENUM} ADD VALUE '{DOCUMENT_NEW_VALUE}';
|
||||
END IF;
|
||||
END$$;
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema - remove COMPOSIO_CONNECTOR from connector and document enums.
|
||||
|
||||
Note: PostgreSQL does not support removing enum values directly.
|
||||
To properly downgrade, you would need to:
|
||||
1. Delete any rows using the COMPOSIO_CONNECTOR value
|
||||
2. Create new enums without COMPOSIO_CONNECTOR
|
||||
3. Alter the columns to use the new enums
|
||||
4. Drop the old enums
|
||||
|
||||
This is left as a no-op since removing enum values is complex
|
||||
and typically not needed in practice.
|
||||
"""
|
||||
pass
|
||||
|
|
@ -142,6 +142,12 @@ class Config:
|
|||
CLICKUP_CLIENT_SECRET = os.getenv("CLICKUP_CLIENT_SECRET")
|
||||
CLICKUP_REDIRECT_URI = os.getenv("CLICKUP_REDIRECT_URI")
|
||||
|
||||
# Composio Configuration (for managed OAuth integrations)
|
||||
# Get your API key from https://app.composio.dev
|
||||
COMPOSIO_API_KEY = os.getenv("COMPOSIO_API_KEY")
|
||||
COMPOSIO_ENABLED = os.getenv("COMPOSIO_ENABLED", "FALSE").upper() == "TRUE"
|
||||
COMPOSIO_REDIRECT_URI = os.getenv("COMPOSIO_REDIRECT_URI")
|
||||
|
||||
# LLM instances are now managed per-user through the LLMConfig system
|
||||
# Legacy environment variables removed in favor of user-specific configurations
|
||||
|
||||
|
|
|
|||
388
surfsense_backend/app/connectors/composio_connector.py
Normal file
388
surfsense_backend/app/connectors/composio_connector.py
Normal file
|
|
@ -0,0 +1,388 @@
|
|||
"""
|
||||
Composio Connector Module.
|
||||
|
||||
Provides a unified interface for interacting with various services via Composio,
|
||||
primarily used during indexing operations.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
||||
from app.db import SearchSourceConnector
|
||||
from app.services.composio_service import ComposioService, INDEXABLE_TOOLKITS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ComposioConnector:
|
||||
"""
|
||||
Generic Composio connector for data retrieval.
|
||||
|
||||
Wraps the ComposioService to provide toolkit-specific data access
|
||||
for indexing operations.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
):
|
||||
"""
|
||||
Initialize the Composio connector.
|
||||
|
||||
Args:
|
||||
session: Database session for updating connector.
|
||||
connector_id: ID of the SearchSourceConnector.
|
||||
"""
|
||||
self._session = session
|
||||
self._connector_id = connector_id
|
||||
self._service: ComposioService | None = None
|
||||
self._connector: SearchSourceConnector | None = None
|
||||
self._config: dict[str, Any] | None = None
|
||||
|
||||
async def _load_connector(self) -> SearchSourceConnector:
|
||||
"""Load connector from database."""
|
||||
if self._connector is None:
|
||||
result = await self._session.execute(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.id == self._connector_id
|
||||
)
|
||||
)
|
||||
self._connector = result.scalars().first()
|
||||
if not self._connector:
|
||||
raise ValueError(f"Connector {self._connector_id} not found")
|
||||
self._config = self._connector.config or {}
|
||||
return self._connector
|
||||
|
||||
async def _get_service(self) -> ComposioService:
|
||||
"""Get or create the Composio service instance."""
|
||||
if self._service is None:
|
||||
self._service = ComposioService()
|
||||
return self._service
|
||||
|
||||
async def get_config(self) -> dict[str, Any]:
|
||||
"""Get the connector configuration."""
|
||||
await self._load_connector()
|
||||
return self._config or {}
|
||||
|
||||
async def get_toolkit_id(self) -> str:
|
||||
"""Get the toolkit ID for this connector."""
|
||||
config = await self.get_config()
|
||||
return config.get("toolkit_id", "")
|
||||
|
||||
async def get_connected_account_id(self) -> str | None:
|
||||
"""Get the Composio connected account ID."""
|
||||
config = await self.get_config()
|
||||
return config.get("composio_connected_account_id")
|
||||
|
||||
async def get_entity_id(self) -> str:
|
||||
"""Get the Composio entity ID (user identifier)."""
|
||||
await self._load_connector()
|
||||
# Entity ID is constructed from the connector's user_id
|
||||
return f"surfsense_{self._connector.user_id}"
|
||||
|
||||
async def is_indexable(self) -> bool:
|
||||
"""Check if this connector's toolkit supports indexing."""
|
||||
toolkit_id = await self.get_toolkit_id()
|
||||
return toolkit_id in INDEXABLE_TOOLKITS
|
||||
|
||||
# ===== Google Drive Methods =====
|
||||
|
||||
async def list_drive_files(
|
||||
self,
|
||||
folder_id: str | None = None,
|
||||
page_token: str | None = None,
|
||||
page_size: int = 100,
|
||||
) -> tuple[list[dict[str, Any]], str | None, str | None]:
|
||||
"""
|
||||
List files from Google Drive via Composio.
|
||||
|
||||
Args:
|
||||
folder_id: Optional folder ID to list contents of.
|
||||
page_token: Pagination token.
|
||||
page_size: Number of files per page.
|
||||
|
||||
Returns:
|
||||
Tuple of (files list, next_page_token, error message).
|
||||
"""
|
||||
connected_account_id = await self.get_connected_account_id()
|
||||
if not connected_account_id:
|
||||
return [], None, "No connected account ID found"
|
||||
|
||||
entity_id = await self.get_entity_id()
|
||||
service = await self._get_service()
|
||||
return await service.get_drive_files(
|
||||
connected_account_id=connected_account_id,
|
||||
entity_id=entity_id,
|
||||
folder_id=folder_id,
|
||||
page_token=page_token,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
async def get_drive_file_content(
|
||||
self, file_id: str
|
||||
) -> tuple[bytes | None, str | None]:
|
||||
"""
|
||||
Download file content from Google Drive via Composio.
|
||||
|
||||
Args:
|
||||
file_id: Google Drive file ID.
|
||||
|
||||
Returns:
|
||||
Tuple of (file content bytes, error message).
|
||||
"""
|
||||
connected_account_id = await self.get_connected_account_id()
|
||||
if not connected_account_id:
|
||||
return None, "No connected account ID found"
|
||||
|
||||
entity_id = await self.get_entity_id()
|
||||
service = await self._get_service()
|
||||
return await service.get_drive_file_content(
|
||||
connected_account_id=connected_account_id,
|
||||
entity_id=entity_id,
|
||||
file_id=file_id,
|
||||
)
|
||||
|
||||
# ===== Gmail Methods =====
|
||||
|
||||
async def list_gmail_messages(
|
||||
self,
|
||||
query: str = "",
|
||||
max_results: int = 100,
|
||||
) -> tuple[list[dict[str, Any]], str | None]:
|
||||
"""
|
||||
List Gmail messages via Composio.
|
||||
|
||||
Args:
|
||||
query: Gmail search query.
|
||||
max_results: Maximum number of messages.
|
||||
|
||||
Returns:
|
||||
Tuple of (messages list, error message).
|
||||
"""
|
||||
connected_account_id = await self.get_connected_account_id()
|
||||
if not connected_account_id:
|
||||
return [], "No connected account ID found"
|
||||
|
||||
entity_id = await self.get_entity_id()
|
||||
service = await self._get_service()
|
||||
return await service.get_gmail_messages(
|
||||
connected_account_id=connected_account_id,
|
||||
entity_id=entity_id,
|
||||
query=query,
|
||||
max_results=max_results,
|
||||
)
|
||||
|
||||
async def get_gmail_message_detail(
|
||||
self, message_id: str
|
||||
) -> tuple[dict[str, Any] | None, str | None]:
|
||||
"""
|
||||
Get full details of a Gmail message via Composio.
|
||||
|
||||
Args:
|
||||
message_id: Gmail message ID.
|
||||
|
||||
Returns:
|
||||
Tuple of (message details, error message).
|
||||
"""
|
||||
connected_account_id = await self.get_connected_account_id()
|
||||
if not connected_account_id:
|
||||
return None, "No connected account ID found"
|
||||
|
||||
entity_id = await self.get_entity_id()
|
||||
service = await self._get_service()
|
||||
return await service.get_gmail_message_detail(
|
||||
connected_account_id=connected_account_id,
|
||||
entity_id=entity_id,
|
||||
message_id=message_id,
|
||||
)
|
||||
|
||||
# ===== Google Calendar Methods =====
|
||||
|
||||
async def list_calendar_events(
|
||||
self,
|
||||
time_min: str | None = None,
|
||||
time_max: str | None = None,
|
||||
max_results: int = 250,
|
||||
) -> tuple[list[dict[str, Any]], str | None]:
|
||||
"""
|
||||
List Google Calendar events via Composio.
|
||||
|
||||
Args:
|
||||
time_min: Start time (RFC3339 format).
|
||||
time_max: End time (RFC3339 format).
|
||||
max_results: Maximum number of events.
|
||||
|
||||
Returns:
|
||||
Tuple of (events list, error message).
|
||||
"""
|
||||
connected_account_id = await self.get_connected_account_id()
|
||||
if not connected_account_id:
|
||||
return [], "No connected account ID found"
|
||||
|
||||
entity_id = await self.get_entity_id()
|
||||
service = await self._get_service()
|
||||
return await service.get_calendar_events(
|
||||
connected_account_id=connected_account_id,
|
||||
entity_id=entity_id,
|
||||
time_min=time_min,
|
||||
time_max=time_max,
|
||||
max_results=max_results,
|
||||
)
|
||||
|
||||
# ===== Utility Methods =====
|
||||
|
||||
def format_gmail_message_to_markdown(self, message: dict[str, Any]) -> str:
|
||||
"""
|
||||
Format a Gmail message to markdown.
|
||||
|
||||
Args:
|
||||
message: Message object from Composio's GMAIL_FETCH_EMAILS response.
|
||||
Composio structure: messageId, messageText, messageTimestamp,
|
||||
payload.headers, labelIds, attachmentList
|
||||
|
||||
Returns:
|
||||
Formatted markdown string.
|
||||
"""
|
||||
try:
|
||||
# Composio uses 'messageId' (camelCase)
|
||||
message_id = message.get("messageId", "") or message.get("id", "")
|
||||
label_ids = message.get("labelIds", [])
|
||||
|
||||
# Extract headers from payload
|
||||
payload = message.get("payload", {})
|
||||
headers = payload.get("headers", [])
|
||||
|
||||
# Parse headers into a dict
|
||||
header_dict = {}
|
||||
for header in headers:
|
||||
name = header.get("name", "").lower()
|
||||
value = header.get("value", "")
|
||||
header_dict[name] = value
|
||||
|
||||
# Extract key information
|
||||
subject = header_dict.get("subject", "No Subject")
|
||||
from_email = header_dict.get("from", "Unknown Sender")
|
||||
to_email = header_dict.get("to", "Unknown Recipient")
|
||||
# Composio provides messageTimestamp directly
|
||||
date_str = message.get("messageTimestamp", "") or header_dict.get("date", "Unknown Date")
|
||||
|
||||
# Build markdown content
|
||||
markdown_content = f"# {subject}\n\n"
|
||||
markdown_content += f"**From:** {from_email}\n"
|
||||
markdown_content += f"**To:** {to_email}\n"
|
||||
markdown_content += f"**Date:** {date_str}\n"
|
||||
|
||||
if label_ids:
|
||||
markdown_content += f"**Labels:** {', '.join(label_ids)}\n"
|
||||
|
||||
markdown_content += "\n---\n\n"
|
||||
|
||||
# Composio provides full message text in 'messageText'
|
||||
message_text = message.get("messageText", "")
|
||||
if message_text:
|
||||
markdown_content += f"## Content\n\n{message_text}\n\n"
|
||||
else:
|
||||
# Fallback to snippet if no messageText
|
||||
snippet = message.get("snippet", "")
|
||||
if snippet:
|
||||
markdown_content += f"## Preview\n\n{snippet}\n\n"
|
||||
|
||||
# Add attachment info if present
|
||||
attachments = message.get("attachmentList", [])
|
||||
if attachments:
|
||||
markdown_content += "## Attachments\n\n"
|
||||
for att in attachments:
|
||||
att_name = att.get("filename", att.get("name", "Unknown"))
|
||||
markdown_content += f"- {att_name}\n"
|
||||
markdown_content += "\n"
|
||||
|
||||
# Add message metadata
|
||||
markdown_content += "## Message Details\n\n"
|
||||
markdown_content += f"- **Message ID:** {message_id}\n"
|
||||
|
||||
return markdown_content
|
||||
|
||||
except Exception as e:
|
||||
return f"Error formatting message to markdown: {e!s}"
|
||||
|
||||
def format_calendar_event_to_markdown(self, event: dict[str, Any]) -> str:
|
||||
"""
|
||||
Format a Google Calendar event to markdown.
|
||||
|
||||
Args:
|
||||
event: Event object from Google Calendar API.
|
||||
|
||||
Returns:
|
||||
Formatted markdown string.
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
# Extract basic event information
|
||||
summary = event.get("summary", "No Title")
|
||||
description = event.get("description", "")
|
||||
location = event.get("location", "")
|
||||
|
||||
# Extract start and end times
|
||||
start = event.get("start", {})
|
||||
end = event.get("end", {})
|
||||
|
||||
start_time = start.get("dateTime") or start.get("date", "")
|
||||
end_time = end.get("dateTime") or end.get("date", "")
|
||||
|
||||
# Format times for display
|
||||
def format_time(time_str: str) -> str:
|
||||
if not time_str:
|
||||
return "Unknown"
|
||||
try:
|
||||
if "T" in time_str:
|
||||
dt = datetime.fromisoformat(time_str.replace("Z", "+00:00"))
|
||||
return dt.strftime("%Y-%m-%d %H:%M")
|
||||
return time_str
|
||||
except Exception:
|
||||
return time_str
|
||||
|
||||
start_formatted = format_time(start_time)
|
||||
end_formatted = format_time(end_time)
|
||||
|
||||
# Extract attendees
|
||||
attendees = event.get("attendees", [])
|
||||
attendee_list = []
|
||||
for attendee in attendees:
|
||||
email = attendee.get("email", "")
|
||||
display_name = attendee.get("displayName", email)
|
||||
response_status = attendee.get("responseStatus", "")
|
||||
attendee_list.append(f"- {display_name} ({response_status})")
|
||||
|
||||
# Build markdown content
|
||||
markdown_content = f"# {summary}\n\n"
|
||||
markdown_content += f"**Start:** {start_formatted}\n"
|
||||
markdown_content += f"**End:** {end_formatted}\n"
|
||||
|
||||
if location:
|
||||
markdown_content += f"**Location:** {location}\n"
|
||||
|
||||
markdown_content += "\n"
|
||||
|
||||
if description:
|
||||
markdown_content += f"## Description\n\n{description}\n\n"
|
||||
|
||||
if attendee_list:
|
||||
markdown_content += "## Attendees\n\n"
|
||||
markdown_content += "\n".join(attendee_list)
|
||||
markdown_content += "\n\n"
|
||||
|
||||
# Add event metadata
|
||||
markdown_content += "## Event Details\n\n"
|
||||
markdown_content += f"- **Event ID:** {event.get('id', 'Unknown')}\n"
|
||||
markdown_content += f"- **Created:** {event.get('created', 'Unknown')}\n"
|
||||
markdown_content += f"- **Updated:** {event.get('updated', 'Unknown')}\n"
|
||||
|
||||
return markdown_content
|
||||
|
||||
except Exception as e:
|
||||
return f"Error formatting event to markdown: {e!s}"
|
||||
|
|
@ -55,6 +55,7 @@ class DocumentType(str, Enum):
|
|||
CIRCLEBACK = "CIRCLEBACK"
|
||||
OBSIDIAN_CONNECTOR = "OBSIDIAN_CONNECTOR"
|
||||
NOTE = "NOTE"
|
||||
COMPOSIO_CONNECTOR = "COMPOSIO_CONNECTOR" # Generic Composio integration
|
||||
|
||||
|
||||
class SearchSourceConnectorType(str, Enum):
|
||||
|
|
@ -83,6 +84,7 @@ class SearchSourceConnectorType(str, Enum):
|
|||
CIRCLEBACK_CONNECTOR = "CIRCLEBACK_CONNECTOR"
|
||||
OBSIDIAN_CONNECTOR = "OBSIDIAN_CONNECTOR" # Self-hosted only - Local Obsidian vault indexing
|
||||
MCP_CONNECTOR = "MCP_CONNECTOR" # Model Context Protocol - User-defined API tools
|
||||
COMPOSIO_CONNECTOR = "COMPOSIO_CONNECTOR" # Generic Composio integration (Google, Slack, etc.)
|
||||
|
||||
|
||||
class LiteLLMProvider(str, Enum):
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from .airtable_add_connector_route import (
|
|||
from .chat_comments_routes import router as chat_comments_router
|
||||
from .circleback_webhook_route import router as circleback_webhook_router
|
||||
from .clickup_add_connector_route import router as clickup_add_connector_router
|
||||
from .composio_routes import router as composio_router
|
||||
from .confluence_add_connector_route import router as confluence_add_connector_router
|
||||
from .discord_add_connector_route import router as discord_add_connector_router
|
||||
from .documents_routes import router as documents_router
|
||||
|
|
@ -65,3 +66,4 @@ router.include_router(logs_router)
|
|||
router.include_router(circleback_webhook_router) # Circleback meeting webhooks
|
||||
router.include_router(surfsense_docs_router) # Surfsense documentation for citations
|
||||
router.include_router(notifications_router) # Notifications with Electric SQL sync
|
||||
router.include_router(composio_router) # Composio OAuth and toolkit management
|
||||
|
|
|
|||
333
surfsense_backend/app/routes/composio_routes.py
Normal file
333
surfsense_backend/app/routes/composio_routes.py
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
"""
|
||||
Composio Connector OAuth Routes.
|
||||
|
||||
Handles OAuth flow for Composio-based integrations (Google Drive, Gmail, Calendar, etc.).
|
||||
This provides a single connector that can connect to any Composio toolkit.
|
||||
|
||||
Endpoints:
|
||||
- GET /composio/toolkits - List available Composio toolkits
|
||||
- GET /auth/composio/connector/add - Initiate OAuth for a specific toolkit
|
||||
- GET /auth/composio/connector/callback - Handle OAuth callback
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from fastapi.responses import RedirectResponse
|
||||
from pydantic import ValidationError
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.config import config
|
||||
from app.db import (
|
||||
SearchSourceConnector,
|
||||
SearchSourceConnectorType,
|
||||
User,
|
||||
get_async_session,
|
||||
)
|
||||
from app.services.composio_service import (
|
||||
COMPOSIO_TOOLKIT_NAMES,
|
||||
INDEXABLE_TOOLKITS,
|
||||
ComposioService,
|
||||
)
|
||||
from app.users import current_active_user
|
||||
from app.utils.connector_naming import (
|
||||
check_duplicate_connector,
|
||||
generate_unique_connector_name,
|
||||
)
|
||||
from app.utils.oauth_security import OAuthStateManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Initialize security utilities
|
||||
_state_manager = None
|
||||
|
||||
|
||||
def get_state_manager() -> OAuthStateManager:
|
||||
"""Get or create OAuth state manager instance."""
|
||||
global _state_manager
|
||||
if _state_manager is None:
|
||||
if not config.SECRET_KEY:
|
||||
raise ValueError("SECRET_KEY must be set for OAuth security")
|
||||
_state_manager = OAuthStateManager(config.SECRET_KEY)
|
||||
return _state_manager
|
||||
|
||||
|
||||
@router.get("/composio/toolkits")
|
||||
async def list_composio_toolkits(user: User = Depends(current_active_user)):
|
||||
"""
|
||||
List available Composio toolkits.
|
||||
|
||||
Returns:
|
||||
JSON with list of available toolkits and their metadata.
|
||||
"""
|
||||
if not ComposioService.is_enabled():
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Composio integration is not enabled. Set COMPOSIO_ENABLED=TRUE and provide COMPOSIO_API_KEY.",
|
||||
)
|
||||
|
||||
try:
|
||||
service = ComposioService()
|
||||
toolkits = service.list_available_toolkits()
|
||||
return {"toolkits": toolkits}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list Composio toolkits: {e!s}")
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to list toolkits: {e!s}"
|
||||
) from e
|
||||
|
||||
|
||||
@router.get("/auth/composio/connector/add")
|
||||
async def initiate_composio_auth(
|
||||
space_id: int,
|
||||
toolkit_id: str = Query(..., description="Composio toolkit ID (e.g., 'googledrive', 'gmail')"),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Initiate Composio OAuth flow for a specific toolkit.
|
||||
|
||||
Query params:
|
||||
space_id: Search space ID to add connector to
|
||||
toolkit_id: Composio toolkit ID (e.g., "googledrive", "gmail", "googlecalendar")
|
||||
|
||||
Returns:
|
||||
JSON with auth_url to redirect user to Composio authorization
|
||||
"""
|
||||
if not ComposioService.is_enabled():
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Composio integration is not enabled.",
|
||||
)
|
||||
|
||||
if not space_id:
|
||||
raise HTTPException(status_code=400, detail="space_id is required")
|
||||
|
||||
if toolkit_id not in COMPOSIO_TOOLKIT_NAMES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unknown toolkit: {toolkit_id}. Available: {list(COMPOSIO_TOOLKIT_NAMES.keys())}",
|
||||
)
|
||||
|
||||
if not config.SECRET_KEY:
|
||||
raise HTTPException(
|
||||
status_code=500, detail="SECRET_KEY not configured for OAuth security."
|
||||
)
|
||||
|
||||
try:
|
||||
# Generate secure state parameter with HMAC signature
|
||||
state_manager = get_state_manager()
|
||||
state_encoded = state_manager.generate_secure_state(
|
||||
space_id, user.id, toolkit_id=toolkit_id
|
||||
)
|
||||
|
||||
# Build callback URL
|
||||
callback_url = config.COMPOSIO_REDIRECT_URI
|
||||
if not callback_url:
|
||||
# Fallback: construct from BACKEND_URL
|
||||
backend_url = config.BACKEND_URL or "http://localhost:8000"
|
||||
callback_url = f"{backend_url}/api/v1/auth/composio/connector/callback"
|
||||
|
||||
# Initiate Composio OAuth
|
||||
service = ComposioService()
|
||||
# Use user.id as the entity ID in Composio (converted to string for Composio)
|
||||
entity_id = f"surfsense_{user.id}"
|
||||
|
||||
connection_result = await service.initiate_connection(
|
||||
user_id=entity_id,
|
||||
toolkit_id=toolkit_id,
|
||||
redirect_uri=f"{callback_url}?state={state_encoded}",
|
||||
)
|
||||
|
||||
auth_url = connection_result.get("redirect_url")
|
||||
if not auth_url:
|
||||
raise HTTPException(
|
||||
status_code=500, detail="Failed to get authorization URL from Composio"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Initiating Composio OAuth for user {user.id}, toolkit {toolkit_id}, space {space_id}"
|
||||
)
|
||||
return {"auth_url": auth_url}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initiate Composio OAuth: {e!s}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to initiate Composio OAuth: {e!s}"
|
||||
) from e
|
||||
|
||||
|
||||
@router.get("/auth/composio/connector/callback")
|
||||
async def composio_callback(
|
||||
state: str | None = None,
|
||||
connectedAccountId: str | None = None, # Composio sends camelCase
|
||||
connected_account_id: str | None = None, # Fallback snake_case
|
||||
error: str | None = None,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
):
|
||||
"""
|
||||
Handle Composio OAuth callback.
|
||||
|
||||
Query params:
|
||||
state: Encoded state with space_id, user_id, and toolkit_id
|
||||
connected_account_id: Composio connected account ID (may not be present)
|
||||
error: OAuth error (if user denied access or error occurred)
|
||||
|
||||
Returns:
|
||||
Redirect to frontend success page
|
||||
"""
|
||||
try:
|
||||
# Handle OAuth errors
|
||||
if error:
|
||||
logger.warning(f"Composio OAuth error: {error}")
|
||||
space_id = None
|
||||
if state:
|
||||
try:
|
||||
state_manager = get_state_manager()
|
||||
data = state_manager.validate_state(state)
|
||||
space_id = data.get("space_id")
|
||||
except Exception:
|
||||
logger.warning("Failed to validate state in error handler")
|
||||
|
||||
if space_id:
|
||||
return RedirectResponse(
|
||||
url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&error=composio_oauth_denied"
|
||||
)
|
||||
else:
|
||||
return RedirectResponse(
|
||||
url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=composio_oauth_denied"
|
||||
)
|
||||
|
||||
# Validate required parameters
|
||||
if not state:
|
||||
raise HTTPException(status_code=400, detail="Missing state parameter")
|
||||
|
||||
# Validate and decode state with signature verification
|
||||
state_manager = get_state_manager()
|
||||
try:
|
||||
data = state_manager.validate_state(state)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Invalid state parameter: {e!s}"
|
||||
) from e
|
||||
|
||||
user_id = UUID(data["user_id"])
|
||||
space_id = data["space_id"]
|
||||
toolkit_id = data.get("toolkit_id")
|
||||
|
||||
if not toolkit_id:
|
||||
raise HTTPException(status_code=400, detail="Missing toolkit_id in state")
|
||||
|
||||
toolkit_name = COMPOSIO_TOOLKIT_NAMES.get(toolkit_id, toolkit_id)
|
||||
|
||||
logger.info(
|
||||
f"Processing Composio callback for user {user_id}, toolkit {toolkit_id}, space {space_id}"
|
||||
)
|
||||
|
||||
# Initialize Composio service
|
||||
service = ComposioService()
|
||||
entity_id = f"surfsense_{user_id}"
|
||||
|
||||
# Use camelCase param if provided (Composio's format), fallback to snake_case
|
||||
final_connected_account_id = connectedAccountId or connected_account_id
|
||||
|
||||
# DEBUG: Log all query parameters received
|
||||
logger.info(f"DEBUG: Callback received - connectedAccountId: {connectedAccountId}, connected_account_id: {connected_account_id}, using: {final_connected_account_id}")
|
||||
|
||||
# If we still don't have a connected_account_id, warn but continue
|
||||
# (the connector will be created but indexing won't work until updated)
|
||||
if not final_connected_account_id:
|
||||
logger.warning(
|
||||
f"Could not find connected_account_id for toolkit {toolkit_id}. "
|
||||
"The connector will be created but indexing may not work."
|
||||
)
|
||||
else:
|
||||
logger.info(f"Successfully got connected_account_id: {final_connected_account_id}")
|
||||
|
||||
# Build connector config
|
||||
connector_config = {
|
||||
"composio_connected_account_id": final_connected_account_id,
|
||||
"toolkit_id": toolkit_id,
|
||||
"toolkit_name": toolkit_name,
|
||||
"is_indexable": toolkit_id in INDEXABLE_TOOLKITS,
|
||||
}
|
||||
|
||||
# Check for duplicate connector
|
||||
# For Composio, we use toolkit_id + connected_account_id as unique identifier
|
||||
identifier = final_connected_account_id or f"{toolkit_id}_{user_id}"
|
||||
|
||||
is_duplicate = await check_duplicate_connector(
|
||||
session,
|
||||
SearchSourceConnectorType.COMPOSIO_CONNECTOR,
|
||||
space_id,
|
||||
user_id,
|
||||
identifier,
|
||||
)
|
||||
if is_duplicate:
|
||||
logger.warning(
|
||||
f"Duplicate Composio connector detected for user {user_id} with toolkit {toolkit_id}"
|
||||
)
|
||||
return RedirectResponse(
|
||||
url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&error=duplicate_account&connector=composio-connector"
|
||||
)
|
||||
|
||||
try:
|
||||
# Generate a unique, user-friendly connector name
|
||||
connector_name = await generate_unique_connector_name(
|
||||
session,
|
||||
SearchSourceConnectorType.COMPOSIO_CONNECTOR,
|
||||
space_id,
|
||||
user_id,
|
||||
f"{toolkit_name} (Composio)",
|
||||
)
|
||||
|
||||
db_connector = SearchSourceConnector(
|
||||
name=connector_name,
|
||||
connector_type=SearchSourceConnectorType.COMPOSIO_CONNECTOR,
|
||||
config=connector_config,
|
||||
search_space_id=space_id,
|
||||
user_id=user_id,
|
||||
is_indexable=toolkit_id in INDEXABLE_TOOLKITS,
|
||||
)
|
||||
|
||||
session.add(db_connector)
|
||||
await session.commit()
|
||||
await session.refresh(db_connector)
|
||||
|
||||
logger.info(
|
||||
f"Successfully created Composio connector {db_connector.id} for user {user_id}, toolkit {toolkit_id}"
|
||||
)
|
||||
|
||||
return RedirectResponse(
|
||||
url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&success=true&connector=composio-connector&connectorId={db_connector.id}"
|
||||
)
|
||||
|
||||
except IntegrityError as e:
|
||||
await session.rollback()
|
||||
logger.error(f"Database integrity error: {e!s}")
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"Database integrity error: {e!s}",
|
||||
) from e
|
||||
except ValidationError as e:
|
||||
await session.rollback()
|
||||
logger.error(f"Validation error: {e!s}")
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Invalid connector configuration: {e!s}"
|
||||
) from e
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in Composio callback: {e!s}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to complete Composio OAuth: {e!s}"
|
||||
) from e
|
||||
|
|
@ -887,6 +887,19 @@ async def index_connector_content(
|
|||
)
|
||||
response_message = "Obsidian vault indexing started in the background."
|
||||
|
||||
elif connector.connector_type == SearchSourceConnectorType.COMPOSIO_CONNECTOR:
|
||||
from app.tasks.celery_tasks.connector_tasks import (
|
||||
index_composio_connector_task,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Triggering Composio connector indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
||||
)
|
||||
index_composio_connector_task.delay(
|
||||
connector_id, search_space_id, str(user.id), indexing_from, indexing_to
|
||||
)
|
||||
response_message = "Composio connector indexing started in the background."
|
||||
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
|
|
|
|||
607
surfsense_backend/app/services/composio_service.py
Normal file
607
surfsense_backend/app/services/composio_service.py
Normal file
|
|
@ -0,0 +1,607 @@
|
|||
"""
|
||||
Composio Service Module.
|
||||
|
||||
Provides a wrapper around the Composio SDK for managing OAuth connections
|
||||
and executing tools for various integrations (Google Drive, Gmail, Calendar, etc.).
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from composio import Composio
|
||||
|
||||
from app.config import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Mapping of toolkit IDs to their Composio auth config IDs
|
||||
# These use Composio's managed OAuth (no custom credentials needed)
|
||||
COMPOSIO_TOOLKIT_AUTH_CONFIGS = {
|
||||
"googledrive": "default", # Uses Composio's managed Google OAuth
|
||||
"gmail": "default",
|
||||
"googlecalendar": "default",
|
||||
"slack": "default",
|
||||
"notion": "default",
|
||||
"github": "default",
|
||||
}
|
||||
|
||||
# Mapping of toolkit IDs to their display names
|
||||
COMPOSIO_TOOLKIT_NAMES = {
|
||||
"googledrive": "Google Drive",
|
||||
"gmail": "Gmail",
|
||||
"googlecalendar": "Google Calendar",
|
||||
"slack": "Slack",
|
||||
"notion": "Notion",
|
||||
"github": "GitHub",
|
||||
}
|
||||
|
||||
# Toolkits that support indexing (Phase 1: Google services only)
|
||||
INDEXABLE_TOOLKITS = {"googledrive", "gmail", "googlecalendar"}
|
||||
|
||||
|
||||
class ComposioService:
|
||||
"""Service for interacting with Composio API."""
|
||||
|
||||
def __init__(self, api_key: str | None = None):
|
||||
"""
|
||||
Initialize the Composio service.
|
||||
|
||||
Args:
|
||||
api_key: Composio API key. If not provided, uses config.COMPOSIO_API_KEY.
|
||||
"""
|
||||
self.api_key = api_key or config.COMPOSIO_API_KEY
|
||||
if not self.api_key:
|
||||
raise ValueError("COMPOSIO_API_KEY is required but not configured")
|
||||
self.client = Composio(api_key=self.api_key)
|
||||
|
||||
@staticmethod
|
||||
def is_enabled() -> bool:
|
||||
"""Check if Composio integration is enabled."""
|
||||
return config.COMPOSIO_ENABLED and bool(config.COMPOSIO_API_KEY)
|
||||
|
||||
def list_available_toolkits(self) -> list[dict[str, Any]]:
|
||||
"""
|
||||
List all available Composio toolkits for the UI.
|
||||
|
||||
Returns:
|
||||
List of toolkit metadata dictionaries.
|
||||
"""
|
||||
toolkits = []
|
||||
for toolkit_id, display_name in COMPOSIO_TOOLKIT_NAMES.items():
|
||||
toolkits.append(
|
||||
{
|
||||
"id": toolkit_id,
|
||||
"name": display_name,
|
||||
"is_indexable": toolkit_id in INDEXABLE_TOOLKITS,
|
||||
"description": f"Connect to {display_name} via Composio",
|
||||
}
|
||||
)
|
||||
return toolkits
|
||||
|
||||
def _get_auth_config_for_toolkit(self, toolkit_id: str) -> str | None:
|
||||
"""
|
||||
Get the auth_config_id for a specific toolkit.
|
||||
|
||||
Args:
|
||||
toolkit_id: The toolkit ID (e.g., "googledrive", "gmail").
|
||||
|
||||
Returns:
|
||||
The auth_config_id or None if not found.
|
||||
"""
|
||||
try:
|
||||
# List all auth configs and find the one matching our toolkit
|
||||
auth_configs = self.client.auth_configs.list()
|
||||
for auth_config in auth_configs.items:
|
||||
# Get toolkit - it may be an object with a 'slug' or 'name' attribute, or a string
|
||||
config_toolkit = getattr(auth_config, "toolkit", None)
|
||||
if config_toolkit is None:
|
||||
continue
|
||||
|
||||
# Extract toolkit name/slug from the object
|
||||
toolkit_name = None
|
||||
if isinstance(config_toolkit, str):
|
||||
toolkit_name = config_toolkit
|
||||
elif hasattr(config_toolkit, "slug"):
|
||||
toolkit_name = config_toolkit.slug
|
||||
elif hasattr(config_toolkit, "name"):
|
||||
toolkit_name = config_toolkit.name
|
||||
elif hasattr(config_toolkit, "id"):
|
||||
toolkit_name = config_toolkit.id
|
||||
|
||||
# Compare case-insensitively
|
||||
if toolkit_name and toolkit_name.lower() == toolkit_id.lower():
|
||||
logger.info(f"Found auth config {auth_config.id} for toolkit {toolkit_id}")
|
||||
return auth_config.id
|
||||
|
||||
# Log available auth configs for debugging
|
||||
logger.warning(f"No auth config found for toolkit '{toolkit_id}'. Available auth configs:")
|
||||
for auth_config in auth_configs.items:
|
||||
config_toolkit = getattr(auth_config, "toolkit", None)
|
||||
logger.warning(f" - {auth_config.id}: toolkit={config_toolkit}")
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list auth configs: {e!s}")
|
||||
return None
|
||||
|
||||
async def initiate_connection(
|
||||
self,
|
||||
user_id: str,
|
||||
toolkit_id: str,
|
||||
redirect_uri: str,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Initiate OAuth flow for a Composio toolkit.
|
||||
|
||||
Args:
|
||||
user_id: Unique identifier for the user (used as entity_id in Composio).
|
||||
toolkit_id: The toolkit to connect (e.g., "googledrive", "gmail").
|
||||
redirect_uri: URL to redirect after OAuth completion.
|
||||
|
||||
Returns:
|
||||
Dictionary containing redirect_url and connection_id.
|
||||
"""
|
||||
if toolkit_id not in COMPOSIO_TOOLKIT_NAMES:
|
||||
raise ValueError(f"Unknown toolkit: {toolkit_id}")
|
||||
|
||||
try:
|
||||
# First, get the auth_config_id for this toolkit
|
||||
auth_config_id = self._get_auth_config_for_toolkit(toolkit_id)
|
||||
|
||||
if not auth_config_id:
|
||||
raise ValueError(
|
||||
f"No auth config found for toolkit '{toolkit_id}'. "
|
||||
f"Please create an auth config for {COMPOSIO_TOOLKIT_NAMES.get(toolkit_id, toolkit_id)} "
|
||||
f"in your Composio dashboard at https://app.composio.dev"
|
||||
)
|
||||
|
||||
# Initiate the connection using Composio SDK with auth_config_id
|
||||
# allow_multiple=True allows creating multiple connections per user (e.g., different Google accounts)
|
||||
connection_request = self.client.connected_accounts.initiate(
|
||||
user_id=user_id,
|
||||
auth_config_id=auth_config_id,
|
||||
callback_url=redirect_uri,
|
||||
allow_multiple=True,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Initiated Composio connection for user {user_id}, toolkit {toolkit_id}, auth_config {auth_config_id}"
|
||||
)
|
||||
|
||||
return {
|
||||
"redirect_url": connection_request.redirect_url,
|
||||
"connection_id": getattr(connection_request, "id", None),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initiate Composio connection: {e!s}")
|
||||
raise
|
||||
|
||||
async def get_connected_account(
|
||||
self, connected_account_id: str
|
||||
) -> dict[str, Any] | None:
|
||||
"""
|
||||
Get details of a connected account.
|
||||
|
||||
Args:
|
||||
connected_account_id: The Composio connected account ID.
|
||||
|
||||
Returns:
|
||||
Connected account details or None if not found.
|
||||
"""
|
||||
try:
|
||||
# Pass connected_account_id as positional argument (not keyword)
|
||||
account = self.client.connected_accounts.get(connected_account_id)
|
||||
return {
|
||||
"id": account.id,
|
||||
"status": getattr(account, "status", None),
|
||||
"toolkit": getattr(account, "toolkit", None),
|
||||
"user_id": getattr(account, "user_id", None),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get connected account {connected_account_id}: {e!s}")
|
||||
return None
|
||||
|
||||
async def list_all_connections(self) -> list[dict[str, Any]]:
|
||||
"""
|
||||
List ALL connected accounts (for debugging).
|
||||
|
||||
Returns:
|
||||
List of all connected account details.
|
||||
"""
|
||||
try:
|
||||
accounts_response = self.client.connected_accounts.list()
|
||||
|
||||
if hasattr(accounts_response, "items"):
|
||||
accounts = accounts_response.items
|
||||
elif hasattr(accounts_response, "__iter__"):
|
||||
accounts = accounts_response
|
||||
else:
|
||||
logger.warning(f"Unexpected accounts response type: {type(accounts_response)}")
|
||||
return []
|
||||
|
||||
result = []
|
||||
for acc in accounts:
|
||||
toolkit_raw = getattr(acc, "toolkit", None)
|
||||
toolkit_info = None
|
||||
if toolkit_raw:
|
||||
if isinstance(toolkit_raw, str):
|
||||
toolkit_info = toolkit_raw
|
||||
elif hasattr(toolkit_raw, "slug"):
|
||||
toolkit_info = toolkit_raw.slug
|
||||
elif hasattr(toolkit_raw, "name"):
|
||||
toolkit_info = toolkit_raw.name
|
||||
else:
|
||||
toolkit_info = str(toolkit_raw)
|
||||
|
||||
result.append({
|
||||
"id": acc.id,
|
||||
"status": getattr(acc, "status", None),
|
||||
"toolkit": toolkit_info,
|
||||
"user_id": getattr(acc, "user_id", None),
|
||||
})
|
||||
|
||||
logger.info(f"DEBUG: Found {len(result)} TOTAL connections in Composio")
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list all connections: {e!s}")
|
||||
return []
|
||||
|
||||
async def list_user_connections(self, user_id: str) -> list[dict[str, Any]]:
|
||||
"""
|
||||
List all connected accounts for a user.
|
||||
|
||||
Args:
|
||||
user_id: The user's unique identifier.
|
||||
|
||||
Returns:
|
||||
List of connected account details.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"DEBUG: Calling connected_accounts.list(user_id='{user_id}')")
|
||||
accounts_response = self.client.connected_accounts.list(user_id=user_id)
|
||||
|
||||
# Handle paginated response (may have .items attribute) or direct list
|
||||
if hasattr(accounts_response, "items"):
|
||||
accounts = accounts_response.items
|
||||
elif hasattr(accounts_response, "__iter__"):
|
||||
accounts = accounts_response
|
||||
else:
|
||||
logger.warning(f"Unexpected accounts response type: {type(accounts_response)}")
|
||||
return []
|
||||
|
||||
result = []
|
||||
for acc in accounts:
|
||||
# Extract toolkit info - might be string or object
|
||||
toolkit_raw = getattr(acc, "toolkit", None)
|
||||
toolkit_info = None
|
||||
if toolkit_raw:
|
||||
if isinstance(toolkit_raw, str):
|
||||
toolkit_info = toolkit_raw
|
||||
elif hasattr(toolkit_raw, "slug"):
|
||||
toolkit_info = toolkit_raw.slug
|
||||
elif hasattr(toolkit_raw, "name"):
|
||||
toolkit_info = toolkit_raw.name
|
||||
else:
|
||||
toolkit_info = toolkit_raw
|
||||
|
||||
result.append({
|
||||
"id": acc.id,
|
||||
"status": getattr(acc, "status", None),
|
||||
"toolkit": toolkit_info,
|
||||
})
|
||||
|
||||
logger.info(f"Found {len(result)} connections for user {user_id}: {result}")
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list connections for user {user_id}: {e!s}")
|
||||
return []
|
||||
|
||||
async def execute_tool(
|
||||
self,
|
||||
connected_account_id: str,
|
||||
tool_name: str,
|
||||
params: dict[str, Any] | None = None,
|
||||
entity_id: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Execute a Composio tool.
|
||||
|
||||
Args:
|
||||
connected_account_id: The connected account to use.
|
||||
tool_name: Name of the tool (e.g., "GOOGLEDRIVE_LIST_FILES").
|
||||
params: Parameters for the tool.
|
||||
entity_id: The entity/user ID that owns the connected account.
|
||||
|
||||
Returns:
|
||||
Tool execution result.
|
||||
"""
|
||||
try:
|
||||
# Based on Composio SDK docs:
|
||||
# - slug: tool name
|
||||
# - arguments: tool parameters
|
||||
# - connected_account_id: for authentication
|
||||
# - user_id: user identifier (SDK uses user_id, not entity_id)
|
||||
# - dangerously_skip_version_check: skip version check for manual execution
|
||||
logger.info(f"DEBUG: Executing tool {tool_name} with params: {params}")
|
||||
result = self.client.tools.execute(
|
||||
slug=tool_name,
|
||||
connected_account_id=connected_account_id,
|
||||
user_id=entity_id, # SDK expects user_id parameter
|
||||
arguments=params or {},
|
||||
dangerously_skip_version_check=True,
|
||||
)
|
||||
logger.info(f"DEBUG: Tool {tool_name} raw result type: {type(result)}")
|
||||
logger.info(f"DEBUG: Tool {tool_name} raw result: {result}")
|
||||
return {"success": True, "data": result}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to execute tool {tool_name}: {e!s}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
# ===== Google Drive specific methods =====
|
||||
|
||||
async def get_drive_files(
|
||||
self,
|
||||
connected_account_id: str,
|
||||
entity_id: str,
|
||||
folder_id: str | None = None,
|
||||
page_token: str | None = None,
|
||||
page_size: int = 100,
|
||||
) -> tuple[list[dict[str, Any]], str | None, str | None]:
|
||||
"""
|
||||
List files from Google Drive via Composio.
|
||||
|
||||
Args:
|
||||
connected_account_id: Composio connected account ID.
|
||||
entity_id: The entity/user ID that owns the connected account.
|
||||
folder_id: Optional folder ID to list contents of.
|
||||
page_token: Pagination token.
|
||||
page_size: Number of files per page.
|
||||
|
||||
Returns:
|
||||
Tuple of (files list, next_page_token, error message).
|
||||
"""
|
||||
try:
|
||||
# Composio uses snake_case for parameters
|
||||
params = {
|
||||
"page_size": min(page_size, 100),
|
||||
}
|
||||
if folder_id:
|
||||
params["folder_id"] = folder_id
|
||||
if page_token:
|
||||
params["page_token"] = page_token
|
||||
|
||||
result = await self.execute_tool(
|
||||
connected_account_id=connected_account_id,
|
||||
tool_name="GOOGLEDRIVE_LIST_FILES",
|
||||
params=params,
|
||||
entity_id=entity_id,
|
||||
)
|
||||
|
||||
if not result.get("success"):
|
||||
return [], None, result.get("error", "Unknown error")
|
||||
|
||||
data = result.get("data", {})
|
||||
logger.info(f"DEBUG: Drive data type: {type(data)}, keys: {data.keys() if isinstance(data, dict) else 'N/A'}")
|
||||
|
||||
# Handle nested response structure from Composio
|
||||
files = []
|
||||
next_token = None
|
||||
if isinstance(data, dict):
|
||||
# Try direct access first, then nested
|
||||
files = data.get("files", []) or data.get("data", {}).get("files", [])
|
||||
next_token = data.get("nextPageToken") or data.get("next_page_token") or data.get("data", {}).get("nextPageToken")
|
||||
elif isinstance(data, list):
|
||||
files = data
|
||||
|
||||
logger.info(f"DEBUG: Extracted {len(files)} drive files")
|
||||
return files, next_token, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list Drive files: {e!s}")
|
||||
return [], None, str(e)
|
||||
|
||||
async def get_drive_file_content(
|
||||
self, connected_account_id: str, entity_id: str, file_id: str
|
||||
) -> tuple[bytes | None, str | None]:
|
||||
"""
|
||||
Download file content from Google Drive via Composio.
|
||||
|
||||
Args:
|
||||
connected_account_id: Composio connected account ID.
|
||||
entity_id: The entity/user ID that owns the connected account.
|
||||
file_id: Google Drive file ID.
|
||||
|
||||
Returns:
|
||||
Tuple of (file content bytes, error message).
|
||||
"""
|
||||
try:
|
||||
result = await self.execute_tool(
|
||||
connected_account_id=connected_account_id,
|
||||
tool_name="GOOGLEDRIVE_DOWNLOAD_FILE",
|
||||
params={"file_id": file_id}, # snake_case
|
||||
entity_id=entity_id,
|
||||
)
|
||||
|
||||
if not result.get("success"):
|
||||
return None, result.get("error", "Unknown error")
|
||||
|
||||
content = result.get("data")
|
||||
if isinstance(content, str):
|
||||
content = content.encode("utf-8")
|
||||
|
||||
return content, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get Drive file content: {e!s}")
|
||||
return None, str(e)
|
||||
|
||||
# ===== Gmail specific methods =====
|
||||
|
||||
async def get_gmail_messages(
|
||||
self,
|
||||
connected_account_id: str,
|
||||
entity_id: str,
|
||||
query: str = "",
|
||||
max_results: int = 100,
|
||||
) -> tuple[list[dict[str, Any]], str | None]:
|
||||
"""
|
||||
List Gmail messages via Composio.
|
||||
|
||||
Args:
|
||||
connected_account_id: Composio connected account ID.
|
||||
entity_id: The entity/user ID that owns the connected account.
|
||||
query: Gmail search query.
|
||||
max_results: Maximum number of messages to return.
|
||||
|
||||
Returns:
|
||||
Tuple of (messages list, error message).
|
||||
"""
|
||||
try:
|
||||
# Composio uses snake_case for parameters, max is 500
|
||||
params = {"max_results": min(max_results, 500)}
|
||||
if query:
|
||||
params["query"] = query # Composio uses 'query' not 'q'
|
||||
|
||||
result = await self.execute_tool(
|
||||
connected_account_id=connected_account_id,
|
||||
tool_name="GMAIL_FETCH_EMAILS",
|
||||
params=params,
|
||||
entity_id=entity_id,
|
||||
)
|
||||
|
||||
if not result.get("success"):
|
||||
return [], result.get("error", "Unknown error")
|
||||
|
||||
data = result.get("data", {})
|
||||
logger.info(f"DEBUG: Gmail data type: {type(data)}, keys: {data.keys() if isinstance(data, dict) else 'N/A'}")
|
||||
logger.info(f"DEBUG: Gmail full data: {data}")
|
||||
|
||||
# Try different possible response structures
|
||||
messages = []
|
||||
if isinstance(data, dict):
|
||||
messages = data.get("messages", []) or data.get("data", {}).get("messages", []) or data.get("emails", [])
|
||||
elif isinstance(data, list):
|
||||
messages = data
|
||||
|
||||
logger.info(f"DEBUG: Extracted {len(messages)} messages")
|
||||
return messages, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list Gmail messages: {e!s}")
|
||||
return [], str(e)
|
||||
|
||||
async def get_gmail_message_detail(
|
||||
self, connected_account_id: str, entity_id: str, message_id: str
|
||||
) -> tuple[dict[str, Any] | None, str | None]:
|
||||
"""
|
||||
Get full details of a Gmail message via Composio.
|
||||
|
||||
Args:
|
||||
connected_account_id: Composio connected account ID.
|
||||
entity_id: The entity/user ID that owns the connected account.
|
||||
message_id: Gmail message ID.
|
||||
|
||||
Returns:
|
||||
Tuple of (message details, error message).
|
||||
"""
|
||||
try:
|
||||
result = await self.execute_tool(
|
||||
connected_account_id=connected_account_id,
|
||||
tool_name="GMAIL_GET_MESSAGE_BY_MESSAGE_ID",
|
||||
params={"message_id": message_id}, # snake_case
|
||||
entity_id=entity_id,
|
||||
)
|
||||
|
||||
if not result.get("success"):
|
||||
return None, result.get("error", "Unknown error")
|
||||
|
||||
return result.get("data"), None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get Gmail message detail: {e!s}")
|
||||
return None, str(e)
|
||||
|
||||
# ===== Google Calendar specific methods =====
|
||||
|
||||
async def get_calendar_events(
|
||||
self,
|
||||
connected_account_id: str,
|
||||
entity_id: str,
|
||||
time_min: str | None = None,
|
||||
time_max: str | None = None,
|
||||
max_results: int = 250,
|
||||
) -> tuple[list[dict[str, Any]], str | None]:
|
||||
"""
|
||||
List Google Calendar events via Composio.
|
||||
|
||||
Args:
|
||||
connected_account_id: Composio connected account ID.
|
||||
entity_id: The entity/user ID that owns the connected account.
|
||||
time_min: Start time (RFC3339 format).
|
||||
time_max: End time (RFC3339 format).
|
||||
max_results: Maximum number of events.
|
||||
|
||||
Returns:
|
||||
Tuple of (events list, error message).
|
||||
"""
|
||||
try:
|
||||
# Composio uses snake_case for parameters
|
||||
params = {
|
||||
"max_results": min(max_results, 250),
|
||||
"single_events": True,
|
||||
"order_by": "startTime",
|
||||
}
|
||||
if time_min:
|
||||
params["time_min"] = time_min
|
||||
if time_max:
|
||||
params["time_max"] = time_max
|
||||
|
||||
result = await self.execute_tool(
|
||||
connected_account_id=connected_account_id,
|
||||
tool_name="GOOGLECALENDAR_EVENTS_LIST",
|
||||
params=params,
|
||||
entity_id=entity_id,
|
||||
)
|
||||
|
||||
if not result.get("success"):
|
||||
return [], result.get("error", "Unknown error")
|
||||
|
||||
data = result.get("data", {})
|
||||
logger.info(f"DEBUG: Calendar data type: {type(data)}, keys: {data.keys() if isinstance(data, dict) else 'N/A'}")
|
||||
logger.info(f"DEBUG: Calendar full data: {data}")
|
||||
|
||||
# Try different possible response structures
|
||||
events = []
|
||||
if isinstance(data, dict):
|
||||
events = data.get("items", []) or data.get("data", {}).get("items", []) or data.get("events", [])
|
||||
elif isinstance(data, list):
|
||||
events = data
|
||||
|
||||
logger.info(f"DEBUG: Extracted {len(events)} calendar events")
|
||||
return events, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list Calendar events: {e!s}")
|
||||
return [], str(e)
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_composio_service: ComposioService | None = None
|
||||
|
||||
|
||||
def get_composio_service() -> ComposioService:
|
||||
"""
|
||||
Get or create the Composio service singleton.
|
||||
|
||||
Returns:
|
||||
ComposioService instance.
|
||||
|
||||
Raises:
|
||||
ValueError: If Composio is not properly configured.
|
||||
"""
|
||||
global _composio_service
|
||||
if _composio_service is None:
|
||||
_composio_service = ComposioService()
|
||||
return _composio_service
|
||||
|
|
@ -802,3 +802,45 @@ async def _index_obsidian_vault(
|
|||
await run_obsidian_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
|
||||
|
||||
@celery_app.task(name="index_composio_connector", bind=True)
|
||||
def index_composio_connector_task(
|
||||
self,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""Celery task to index Composio connector content (Google Drive, Gmail, Calendar via Composio)."""
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
try:
|
||||
loop.run_until_complete(
|
||||
_index_composio_connector(
|
||||
connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _index_composio_connector(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""Index Composio connector content with new session."""
|
||||
# Import from tasks folder (not connector_indexers) to avoid circular import
|
||||
from app.tasks.composio_indexer import index_composio_connector
|
||||
|
||||
async with get_celery_session_maker()() as session:
|
||||
await index_composio_connector(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
|
|
|
|||
878
surfsense_backend/app/tasks/composio_indexer.py
Normal file
878
surfsense_backend/app/tasks/composio_indexer.py
Normal file
|
|
@ -0,0 +1,878 @@
|
|||
"""
|
||||
Composio connector indexer.
|
||||
|
||||
Routes indexing requests to toolkit-specific handlers (Google Drive, Gmail, Calendar).
|
||||
|
||||
Note: This module is intentionally placed in app/tasks/ (not in connector_indexers/)
|
||||
to avoid circular import issues with the connector_indexers package.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.config import config
|
||||
from app.connectors.composio_connector import ComposioConnector
|
||||
from app.db import (
|
||||
Document,
|
||||
DocumentType,
|
||||
SearchSourceConnector,
|
||||
SearchSourceConnectorType,
|
||||
)
|
||||
from app.services.composio_service import INDEXABLE_TOOLKITS
|
||||
from app.services.llm_service import get_user_long_context_llm
|
||||
from app.services.task_logging_service import TaskLoggingService
|
||||
from app.utils.document_converters import (
|
||||
create_document_chunks,
|
||||
generate_content_hash,
|
||||
generate_document_summary,
|
||||
generate_unique_identifier_hash,
|
||||
)
|
||||
|
||||
# Set up logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ============ Utility functions (copied from connector_indexers.base to avoid circular imports) ============
|
||||
|
||||
|
||||
def get_current_timestamp() -> datetime:
|
||||
"""Get the current timestamp with timezone for updated_at field."""
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
async def check_document_by_unique_identifier(
|
||||
session: AsyncSession, unique_identifier_hash: str
|
||||
) -> Document | None:
|
||||
"""Check if a document with the given unique identifier hash already exists."""
|
||||
existing_doc_result = await session.execute(
|
||||
select(Document)
|
||||
.options(selectinload(Document.chunks))
|
||||
.where(Document.unique_identifier_hash == unique_identifier_hash)
|
||||
)
|
||||
return existing_doc_result.scalars().first()
|
||||
|
||||
|
||||
async def get_connector_by_id(
|
||||
session: AsyncSession, connector_id: int, connector_type: SearchSourceConnectorType
|
||||
) -> SearchSourceConnector | None:
|
||||
"""Get a connector by ID and type from the database."""
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.id == connector_id,
|
||||
SearchSourceConnector.connector_type == connector_type,
|
||||
)
|
||||
)
|
||||
return result.scalars().first()
|
||||
|
||||
|
||||
async def update_connector_last_indexed(
|
||||
session: AsyncSession,
|
||||
connector: SearchSourceConnector,
|
||||
update_last_indexed: bool = True,
|
||||
) -> None:
|
||||
"""Update the last_indexed_at timestamp for a connector."""
|
||||
if update_last_indexed:
|
||||
connector.last_indexed_at = datetime.now()
|
||||
logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
|
||||
|
||||
|
||||
# ============ Main indexer function ============
|
||||
|
||||
|
||||
async def index_composio_connector(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
update_last_indexed: bool = True,
|
||||
max_items: int = 1000,
|
||||
) -> tuple[int, str]:
|
||||
"""
|
||||
Index content from a Composio connector.
|
||||
|
||||
Routes to toolkit-specific indexing based on the connector's toolkit_id.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
connector_id: ID of the Composio connector
|
||||
search_space_id: ID of the search space
|
||||
user_id: ID of the user
|
||||
start_date: Start date for filtering (YYYY-MM-DD format)
|
||||
end_date: End date for filtering (YYYY-MM-DD format)
|
||||
update_last_indexed: Whether to update the last_indexed_at timestamp
|
||||
max_items: Maximum number of items to fetch
|
||||
|
||||
Returns:
|
||||
Tuple of (number_of_indexed_items, error_message or None)
|
||||
"""
|
||||
task_logger = TaskLoggingService(session, search_space_id)
|
||||
|
||||
# Log task start
|
||||
log_entry = await task_logger.log_task_start(
|
||||
task_name="composio_connector_indexing",
|
||||
source="connector_indexing_task",
|
||||
message=f"Starting Composio connector indexing for connector {connector_id}",
|
||||
metadata={
|
||||
"connector_id": connector_id,
|
||||
"user_id": str(user_id),
|
||||
"max_items": max_items,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
# Get connector by id
|
||||
connector = await get_connector_by_id(
|
||||
session, connector_id, SearchSourceConnectorType.COMPOSIO_CONNECTOR
|
||||
)
|
||||
|
||||
if not connector:
|
||||
error_msg = f"Composio connector with ID {connector_id} not found"
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, {"error_type": "ConnectorNotFound"}
|
||||
)
|
||||
return 0, error_msg
|
||||
|
||||
# Get toolkit ID from config
|
||||
toolkit_id = connector.config.get("toolkit_id")
|
||||
if not toolkit_id:
|
||||
error_msg = f"Composio connector {connector_id} has no toolkit_id configured"
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, {"error_type": "MissingToolkitId"}
|
||||
)
|
||||
return 0, error_msg
|
||||
|
||||
# Check if toolkit is indexable
|
||||
if toolkit_id not in INDEXABLE_TOOLKITS:
|
||||
error_msg = f"Toolkit '{toolkit_id}' does not support indexing yet"
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, {"error_type": "ToolkitNotIndexable"}
|
||||
)
|
||||
return 0, error_msg
|
||||
|
||||
# Route to toolkit-specific indexer
|
||||
if toolkit_id == "googledrive":
|
||||
return await _index_composio_google_drive(
|
||||
session=session,
|
||||
connector=connector,
|
||||
connector_id=connector_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
update_last_indexed=update_last_indexed,
|
||||
max_items=max_items,
|
||||
)
|
||||
elif toolkit_id == "gmail":
|
||||
return await _index_composio_gmail(
|
||||
session=session,
|
||||
connector=connector,
|
||||
connector_id=connector_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
update_last_indexed=update_last_indexed,
|
||||
max_items=max_items,
|
||||
)
|
||||
elif toolkit_id == "googlecalendar":
|
||||
return await _index_composio_google_calendar(
|
||||
session=session,
|
||||
connector=connector,
|
||||
connector_id=connector_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
update_last_indexed=update_last_indexed,
|
||||
max_items=max_items,
|
||||
)
|
||||
else:
|
||||
error_msg = f"No indexer implemented for toolkit: {toolkit_id}"
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, {"error_type": "NoIndexerImplemented"}
|
||||
)
|
||||
return 0, error_msg
|
||||
|
||||
except SQLAlchemyError as db_error:
|
||||
await session.rollback()
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Database error during Composio indexing for connector {connector_id}",
|
||||
str(db_error),
|
||||
{"error_type": "SQLAlchemyError"},
|
||||
)
|
||||
logger.error(f"Database error: {db_error!s}", exc_info=True)
|
||||
return 0, f"Database error: {db_error!s}"
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Failed to index Composio connector {connector_id}",
|
||||
str(e),
|
||||
{"error_type": type(e).__name__},
|
||||
)
|
||||
logger.error(f"Failed to index Composio connector: {e!s}", exc_info=True)
|
||||
return 0, f"Failed to index Composio connector: {e!s}"
|
||||
|
||||
|
||||
async def _index_composio_google_drive(
|
||||
session: AsyncSession,
|
||||
connector,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
task_logger: TaskLoggingService,
|
||||
log_entry,
|
||||
update_last_indexed: bool = True,
|
||||
max_items: int = 1000,
|
||||
) -> tuple[int, str]:
|
||||
"""Index Google Drive files via Composio."""
|
||||
try:
|
||||
composio_connector = ComposioConnector(session, connector_id)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Fetching Google Drive files via Composio for connector {connector_id}",
|
||||
{"stage": "fetching_files"},
|
||||
)
|
||||
|
||||
# Fetch files
|
||||
all_files = []
|
||||
page_token = None
|
||||
|
||||
while len(all_files) < max_items:
|
||||
files, next_token, error = await composio_connector.list_drive_files(
|
||||
page_token=page_token,
|
||||
page_size=min(100, max_items - len(all_files)),
|
||||
)
|
||||
|
||||
if error:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, f"Failed to fetch Drive files: {error}", {}
|
||||
)
|
||||
return 0, f"Failed to fetch Drive files: {error}"
|
||||
|
||||
all_files.extend(files)
|
||||
|
||||
if not next_token:
|
||||
break
|
||||
page_token = next_token
|
||||
|
||||
if not all_files:
|
||||
success_msg = "No Google Drive files found"
|
||||
await task_logger.log_task_success(
|
||||
log_entry, success_msg, {"files_count": 0}
|
||||
)
|
||||
return 0, success_msg
|
||||
|
||||
logger.info(f"Found {len(all_files)} Google Drive files to index via Composio")
|
||||
|
||||
documents_indexed = 0
|
||||
documents_skipped = 0
|
||||
|
||||
for file_info in all_files:
|
||||
try:
|
||||
# Handle both standard Google API and potential Composio variations
|
||||
file_id = file_info.get("id", "") or file_info.get("fileId", "")
|
||||
file_name = file_info.get("name", "") or file_info.get("fileName", "") or "Untitled"
|
||||
mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
|
||||
|
||||
if not file_id:
|
||||
documents_skipped += 1
|
||||
continue
|
||||
|
||||
# Skip folders
|
||||
if mime_type == "application/vnd.google-apps.folder":
|
||||
continue
|
||||
|
||||
# Generate unique identifier hash
|
||||
unique_identifier_hash = generate_unique_identifier_hash(
|
||||
DocumentType.COMPOSIO_CONNECTOR, f"drive_{file_id}", search_space_id
|
||||
)
|
||||
|
||||
# Check if document exists
|
||||
existing_document = await check_document_by_unique_identifier(
|
||||
session, unique_identifier_hash
|
||||
)
|
||||
|
||||
# Get file content
|
||||
content, content_error = await composio_connector.get_drive_file_content(
|
||||
file_id
|
||||
)
|
||||
|
||||
if content_error or not content:
|
||||
logger.warning(f"Could not get content for file {file_name}: {content_error}")
|
||||
# Use metadata as content fallback
|
||||
markdown_content = f"# {file_name}\n\n"
|
||||
markdown_content += f"**File ID:** {file_id}\n"
|
||||
markdown_content += f"**Type:** {mime_type}\n"
|
||||
else:
|
||||
try:
|
||||
markdown_content = content.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
markdown_content = f"# {file_name}\n\n[Binary file content]\n"
|
||||
|
||||
content_hash = generate_content_hash(markdown_content, search_space_id)
|
||||
|
||||
if existing_document:
|
||||
if existing_document.content_hash == content_hash:
|
||||
documents_skipped += 1
|
||||
continue
|
||||
|
||||
# Update existing document
|
||||
user_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
|
||||
if user_llm:
|
||||
document_metadata = {
|
||||
"file_id": file_id,
|
||||
"file_name": file_name,
|
||||
"mime_type": mime_type,
|
||||
"document_type": "Google Drive File (Composio)",
|
||||
}
|
||||
summary_content, summary_embedding = await generate_document_summary(
|
||||
markdown_content, user_llm, document_metadata
|
||||
)
|
||||
else:
|
||||
summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
|
||||
summary_embedding = config.embedding_model_instance.embed(summary_content)
|
||||
|
||||
chunks = await create_document_chunks(markdown_content)
|
||||
|
||||
existing_document.title = f"Drive: {file_name}"
|
||||
existing_document.content = summary_content
|
||||
existing_document.content_hash = content_hash
|
||||
existing_document.embedding = summary_embedding
|
||||
existing_document.document_metadata = {
|
||||
"file_id": file_id,
|
||||
"file_name": file_name,
|
||||
"mime_type": mime_type,
|
||||
"connector_id": connector_id,
|
||||
"source": "composio",
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
continue
|
||||
|
||||
# Create new document
|
||||
user_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
|
||||
if user_llm:
|
||||
document_metadata = {
|
||||
"file_id": file_id,
|
||||
"file_name": file_name,
|
||||
"mime_type": mime_type,
|
||||
"document_type": "Google Drive File (Composio)",
|
||||
}
|
||||
summary_content, summary_embedding = await generate_document_summary(
|
||||
markdown_content, user_llm, document_metadata
|
||||
)
|
||||
else:
|
||||
summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
|
||||
summary_embedding = config.embedding_model_instance.embed(summary_content)
|
||||
|
||||
chunks = await create_document_chunks(markdown_content)
|
||||
|
||||
document = Document(
|
||||
search_space_id=search_space_id,
|
||||
title=f"Drive: {file_name}",
|
||||
document_type=DocumentType.COMPOSIO_CONNECTOR,
|
||||
document_metadata={
|
||||
"file_id": file_id,
|
||||
"file_name": file_name,
|
||||
"mime_type": mime_type,
|
||||
"connector_id": connector_id,
|
||||
"toolkit_id": "googledrive",
|
||||
"source": "composio",
|
||||
},
|
||||
content=summary_content,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
session.add(document)
|
||||
documents_indexed += 1
|
||||
|
||||
if documents_indexed % 10 == 0:
|
||||
await session.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing Drive file: {e!s}", exc_info=True)
|
||||
documents_skipped += 1
|
||||
continue
|
||||
|
||||
if documents_indexed > 0:
|
||||
await update_connector_last_indexed(session, connector, update_last_indexed)
|
||||
|
||||
await session.commit()
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully completed Google Drive indexing via Composio for connector {connector_id}",
|
||||
{
|
||||
"documents_indexed": documents_indexed,
|
||||
"documents_skipped": documents_skipped,
|
||||
},
|
||||
)
|
||||
|
||||
return documents_indexed, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to index Google Drive via Composio: {e!s}", exc_info=True)
|
||||
return 0, f"Failed to index Google Drive via Composio: {e!s}"
|
||||
|
||||
|
||||
async def _index_composio_gmail(
|
||||
session: AsyncSession,
|
||||
connector,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str | None,
|
||||
end_date: str | None,
|
||||
task_logger: TaskLoggingService,
|
||||
log_entry,
|
||||
update_last_indexed: bool = True,
|
||||
max_items: int = 1000,
|
||||
) -> tuple[int, str]:
|
||||
"""Index Gmail messages via Composio."""
|
||||
try:
|
||||
composio_connector = ComposioConnector(session, connector_id)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Fetching Gmail messages via Composio for connector {connector_id}",
|
||||
{"stage": "fetching_messages"},
|
||||
)
|
||||
|
||||
# Build query with date range
|
||||
query_parts = []
|
||||
if start_date:
|
||||
query_parts.append(f"after:{start_date.replace('-', '/')}")
|
||||
if end_date:
|
||||
query_parts.append(f"before:{end_date.replace('-', '/')}")
|
||||
query = " ".join(query_parts)
|
||||
|
||||
messages, error = await composio_connector.list_gmail_messages(
|
||||
query=query,
|
||||
max_results=max_items,
|
||||
)
|
||||
|
||||
if error:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, f"Failed to fetch Gmail messages: {error}", {}
|
||||
)
|
||||
return 0, f"Failed to fetch Gmail messages: {error}"
|
||||
|
||||
if not messages:
|
||||
success_msg = "No Gmail messages found in the specified date range"
|
||||
await task_logger.log_task_success(
|
||||
log_entry, success_msg, {"messages_count": 0}
|
||||
)
|
||||
return 0, success_msg
|
||||
|
||||
logger.info(f"Found {len(messages)} Gmail messages to index via Composio")
|
||||
|
||||
documents_indexed = 0
|
||||
documents_skipped = 0
|
||||
|
||||
for message in messages:
|
||||
try:
|
||||
# Composio uses 'messageId' (camelCase), not 'id'
|
||||
message_id = message.get("messageId", "") or message.get("id", "")
|
||||
if not message_id:
|
||||
documents_skipped += 1
|
||||
continue
|
||||
|
||||
# Composio's GMAIL_FETCH_EMAILS already returns full message content
|
||||
# No need for a separate detail API call
|
||||
|
||||
# Extract message info from Composio response
|
||||
# Composio structure: messageId, messageText, messageTimestamp, payload.headers, labelIds
|
||||
payload = message.get("payload", {})
|
||||
headers = payload.get("headers", [])
|
||||
|
||||
subject = "No Subject"
|
||||
sender = "Unknown Sender"
|
||||
date_str = message.get("messageTimestamp", "Unknown Date")
|
||||
|
||||
for header in headers:
|
||||
name = header.get("name", "").lower()
|
||||
value = header.get("value", "")
|
||||
if name == "subject":
|
||||
subject = value
|
||||
elif name == "from":
|
||||
sender = value
|
||||
elif name == "date":
|
||||
date_str = value
|
||||
|
||||
# Format to markdown using the full message data
|
||||
markdown_content = composio_connector.format_gmail_message_to_markdown(message)
|
||||
|
||||
# Generate unique identifier
|
||||
unique_identifier_hash = generate_unique_identifier_hash(
|
||||
DocumentType.COMPOSIO_CONNECTOR, f"gmail_{message_id}", search_space_id
|
||||
)
|
||||
|
||||
content_hash = generate_content_hash(markdown_content, search_space_id)
|
||||
|
||||
existing_document = await check_document_by_unique_identifier(
|
||||
session, unique_identifier_hash
|
||||
)
|
||||
|
||||
# Get label IDs from Composio response
|
||||
label_ids = message.get("labelIds", [])
|
||||
|
||||
if existing_document:
|
||||
if existing_document.content_hash == content_hash:
|
||||
documents_skipped += 1
|
||||
continue
|
||||
|
||||
# Update existing
|
||||
user_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
|
||||
if user_llm:
|
||||
document_metadata = {
|
||||
"message_id": message_id,
|
||||
"subject": subject,
|
||||
"sender": sender,
|
||||
"document_type": "Gmail Message (Composio)",
|
||||
}
|
||||
summary_content, summary_embedding = await generate_document_summary(
|
||||
markdown_content, user_llm, document_metadata
|
||||
)
|
||||
else:
|
||||
summary_content = f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
|
||||
summary_embedding = config.embedding_model_instance.embed(summary_content)
|
||||
|
||||
chunks = await create_document_chunks(markdown_content)
|
||||
|
||||
existing_document.title = f"Gmail: {subject}"
|
||||
existing_document.content = summary_content
|
||||
existing_document.content_hash = content_hash
|
||||
existing_document.embedding = summary_embedding
|
||||
existing_document.document_metadata = {
|
||||
"message_id": message_id,
|
||||
"subject": subject,
|
||||
"sender": sender,
|
||||
"date": date_str,
|
||||
"labels": label_ids,
|
||||
"connector_id": connector_id,
|
||||
"source": "composio",
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
continue
|
||||
|
||||
# Create new document
|
||||
user_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
|
||||
if user_llm:
|
||||
document_metadata = {
|
||||
"message_id": message_id,
|
||||
"subject": subject,
|
||||
"sender": sender,
|
||||
"document_type": "Gmail Message (Composio)",
|
||||
}
|
||||
summary_content, summary_embedding = await generate_document_summary(
|
||||
markdown_content, user_llm, document_metadata
|
||||
)
|
||||
else:
|
||||
summary_content = f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
|
||||
summary_embedding = config.embedding_model_instance.embed(summary_content)
|
||||
|
||||
chunks = await create_document_chunks(markdown_content)
|
||||
|
||||
document = Document(
|
||||
search_space_id=search_space_id,
|
||||
title=f"Gmail: {subject}",
|
||||
document_type=DocumentType.COMPOSIO_CONNECTOR,
|
||||
document_metadata={
|
||||
"message_id": message_id,
|
||||
"subject": subject,
|
||||
"sender": sender,
|
||||
"date": date_str,
|
||||
"labels": label_ids,
|
||||
"connector_id": connector_id,
|
||||
"toolkit_id": "gmail",
|
||||
"source": "composio",
|
||||
},
|
||||
content=summary_content,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
session.add(document)
|
||||
documents_indexed += 1
|
||||
|
||||
if documents_indexed % 10 == 0:
|
||||
await session.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
|
||||
documents_skipped += 1
|
||||
continue
|
||||
|
||||
if documents_indexed > 0:
|
||||
await update_connector_last_indexed(session, connector, update_last_indexed)
|
||||
|
||||
await session.commit()
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully completed Gmail indexing via Composio for connector {connector_id}",
|
||||
{
|
||||
"documents_indexed": documents_indexed,
|
||||
"documents_skipped": documents_skipped,
|
||||
},
|
||||
)
|
||||
|
||||
return documents_indexed, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to index Gmail via Composio: {e!s}", exc_info=True)
|
||||
return 0, f"Failed to index Gmail via Composio: {e!s}"
|
||||
|
||||
|
||||
async def _index_composio_google_calendar(
|
||||
session: AsyncSession,
|
||||
connector,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str | None,
|
||||
end_date: str | None,
|
||||
task_logger: TaskLoggingService,
|
||||
log_entry,
|
||||
update_last_indexed: bool = True,
|
||||
max_items: int = 2500,
|
||||
) -> tuple[int, str]:
|
||||
"""Index Google Calendar events via Composio."""
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
try:
|
||||
composio_connector = ComposioConnector(session, connector_id)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Fetching Google Calendar events via Composio for connector {connector_id}",
|
||||
{"stage": "fetching_events"},
|
||||
)
|
||||
|
||||
# Build time range
|
||||
if start_date:
|
||||
time_min = f"{start_date}T00:00:00Z"
|
||||
else:
|
||||
# Default to 365 days ago
|
||||
default_start = datetime.now() - timedelta(days=365)
|
||||
time_min = default_start.strftime("%Y-%m-%dT00:00:00Z")
|
||||
|
||||
if end_date:
|
||||
time_max = f"{end_date}T23:59:59Z"
|
||||
else:
|
||||
time_max = datetime.now().strftime("%Y-%m-%dT23:59:59Z")
|
||||
|
||||
events, error = await composio_connector.list_calendar_events(
|
||||
time_min=time_min,
|
||||
time_max=time_max,
|
||||
max_results=max_items,
|
||||
)
|
||||
|
||||
if error:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, f"Failed to fetch Calendar events: {error}", {}
|
||||
)
|
||||
return 0, f"Failed to fetch Calendar events: {error}"
|
||||
|
||||
if not events:
|
||||
success_msg = "No Google Calendar events found in the specified date range"
|
||||
await task_logger.log_task_success(
|
||||
log_entry, success_msg, {"events_count": 0}
|
||||
)
|
||||
return 0, success_msg
|
||||
|
||||
logger.info(f"Found {len(events)} Google Calendar events to index via Composio")
|
||||
|
||||
documents_indexed = 0
|
||||
documents_skipped = 0
|
||||
|
||||
for event in events:
|
||||
try:
|
||||
# Handle both standard Google API and potential Composio variations
|
||||
event_id = event.get("id", "") or event.get("eventId", "")
|
||||
summary = event.get("summary", "") or event.get("title", "") or "No Title"
|
||||
|
||||
if not event_id:
|
||||
documents_skipped += 1
|
||||
continue
|
||||
|
||||
# Format to markdown
|
||||
markdown_content = composio_connector.format_calendar_event_to_markdown(event)
|
||||
|
||||
# Generate unique identifier
|
||||
unique_identifier_hash = generate_unique_identifier_hash(
|
||||
DocumentType.COMPOSIO_CONNECTOR, f"calendar_{event_id}", search_space_id
|
||||
)
|
||||
|
||||
content_hash = generate_content_hash(markdown_content, search_space_id)
|
||||
|
||||
existing_document = await check_document_by_unique_identifier(
|
||||
session, unique_identifier_hash
|
||||
)
|
||||
|
||||
# Extract event times
|
||||
start = event.get("start", {})
|
||||
end = event.get("end", {})
|
||||
start_time = start.get("dateTime") or start.get("date", "")
|
||||
end_time = end.get("dateTime") or end.get("date", "")
|
||||
location = event.get("location", "")
|
||||
|
||||
if existing_document:
|
||||
if existing_document.content_hash == content_hash:
|
||||
documents_skipped += 1
|
||||
continue
|
||||
|
||||
# Update existing
|
||||
user_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
|
||||
if user_llm:
|
||||
document_metadata = {
|
||||
"event_id": event_id,
|
||||
"summary": summary,
|
||||
"start_time": start_time,
|
||||
"document_type": "Google Calendar Event (Composio)",
|
||||
}
|
||||
summary_content, summary_embedding = await generate_document_summary(
|
||||
markdown_content, user_llm, document_metadata
|
||||
)
|
||||
else:
|
||||
summary_content = f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
|
||||
if location:
|
||||
summary_content += f"\nLocation: {location}"
|
||||
summary_embedding = config.embedding_model_instance.embed(summary_content)
|
||||
|
||||
chunks = await create_document_chunks(markdown_content)
|
||||
|
||||
existing_document.title = f"Calendar: {summary}"
|
||||
existing_document.content = summary_content
|
||||
existing_document.content_hash = content_hash
|
||||
existing_document.embedding = summary_embedding
|
||||
existing_document.document_metadata = {
|
||||
"event_id": event_id,
|
||||
"summary": summary,
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
"location": location,
|
||||
"connector_id": connector_id,
|
||||
"source": "composio",
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
continue
|
||||
|
||||
# Create new document
|
||||
user_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
|
||||
if user_llm:
|
||||
document_metadata = {
|
||||
"event_id": event_id,
|
||||
"summary": summary,
|
||||
"start_time": start_time,
|
||||
"document_type": "Google Calendar Event (Composio)",
|
||||
}
|
||||
summary_content, summary_embedding = await generate_document_summary(
|
||||
markdown_content, user_llm, document_metadata
|
||||
)
|
||||
else:
|
||||
summary_content = f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
|
||||
if location:
|
||||
summary_content += f"\nLocation: {location}"
|
||||
summary_embedding = config.embedding_model_instance.embed(summary_content)
|
||||
|
||||
chunks = await create_document_chunks(markdown_content)
|
||||
|
||||
document = Document(
|
||||
search_space_id=search_space_id,
|
||||
title=f"Calendar: {summary}",
|
||||
document_type=DocumentType.COMPOSIO_CONNECTOR,
|
||||
document_metadata={
|
||||
"event_id": event_id,
|
||||
"summary": summary,
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
"location": location,
|
||||
"connector_id": connector_id,
|
||||
"toolkit_id": "googlecalendar",
|
||||
"source": "composio",
|
||||
},
|
||||
content=summary_content,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
session.add(document)
|
||||
documents_indexed += 1
|
||||
|
||||
if documents_indexed % 10 == 0:
|
||||
await session.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing Calendar event: {e!s}", exc_info=True)
|
||||
documents_skipped += 1
|
||||
continue
|
||||
|
||||
if documents_indexed > 0:
|
||||
await update_connector_last_indexed(session, connector, update_last_indexed)
|
||||
|
||||
await session.commit()
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully completed Google Calendar indexing via Composio for connector {connector_id}",
|
||||
{
|
||||
"documents_indexed": documents_indexed,
|
||||
"documents_skipped": documents_skipped,
|
||||
},
|
||||
)
|
||||
|
||||
return documents_indexed, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to index Google Calendar via Composio: {e!s}", exc_info=True)
|
||||
return 0, f"Failed to index Google Calendar via Composio: {e!s}"
|
||||
|
|
@ -26,6 +26,7 @@ Available indexers:
|
|||
# Calendar and scheduling
|
||||
from .airtable_indexer import index_airtable_records
|
||||
from .bookstack_indexer import index_bookstack_pages
|
||||
# Note: composio_indexer is imported directly in connector_tasks.py to avoid circular imports
|
||||
from .clickup_indexer import index_clickup_tasks
|
||||
from .confluence_indexer import index_confluence_pages
|
||||
from .discord_indexer import index_discord_messages
|
||||
|
|
@ -51,6 +52,7 @@ from .webcrawler_indexer import index_crawled_urls
|
|||
__all__ = [ # noqa: RUF022
|
||||
"index_airtable_records",
|
||||
"index_bookstack_pages",
|
||||
# "index_composio_connector", # Imported directly in connector_tasks.py to avoid circular imports
|
||||
"index_clickup_tasks",
|
||||
"index_confluence_pages",
|
||||
"index_discord_messages",
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ dependencies = [
|
|||
"starlette>=0.40.0,<0.51.0",
|
||||
"sse-starlette>=3.1.1,<3.1.2",
|
||||
"gitingest>=0.3.1",
|
||||
"composio>=0.10.9",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
|
|
|
|||
6080
surfsense_backend/uv.lock
generated
6080
surfsense_backend/uv.lock
generated
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue