From be5715cfebb1070458c702b4aee943a80da47b95 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Thu, 22 Jan 2026 22:33:28 +0530
Subject: [PATCH 01/51] feat: add Composio connector types and enhance
integration
- Introduced new enum values for Composio connectors: COMPOSIO_GOOGLE_DRIVE_CONNECTOR, COMPOSIO_GMAIL_CONNECTOR, and COMPOSIO_GOOGLE_CALENDAR_CONNECTOR.
- Updated database migration to add these new enum values to the relevant types.
- Refactored Composio integration logic to handle specific connector types, improving the management of connected accounts and indexing processes.
- Enhanced frontend components to support the new Composio connector types, including updated UI elements and connector configuration handling.
- Improved backend services to manage Composio connected accounts more effectively, including deletion and indexing tasks.
---
.../74_add_composio_connector_enums.py | 93 +++++++++-------
surfsense_backend/app/db.py | 8 +-
.../app/routes/composio_routes.py | 80 ++++++++++----
.../routes/search_source_connectors_routes.py | 88 ++++++++++++++-
.../app/services/composio_service.py | 34 ++++++
.../app/tasks/celery_tasks/connector_tasks.py | 10 +-
.../app/tasks/composio_indexer.py | 54 +++++----
.../assistant-ui/connector-popup.tsx | 14 ++-
.../components/composio-config.tsx | 103 ------------------
.../connector-configs/index.tsx | 4 +-
.../constants/connector-constants.ts | 26 ++++-
.../constants/connector-popup.schemas.ts | 2 +-
.../hooks/use-connector-dialog.ts | 64 ++++++-----
.../tabs/all-connectors-tab.tsx | 94 +++++++++-------
.../utils/connector-document-mapping.ts | 5 +-
surfsense_web/contracts/enums/connector.ts | 4 +-
.../contracts/enums/connectorIcons.tsx | 16 ++-
.../contracts/types/connector.types.ts | 11 +-
.../contracts/types/document.types.ts | 4 +-
19 files changed, 437 insertions(+), 277 deletions(-)
diff --git a/surfsense_backend/alembic/versions/74_add_composio_connector_enums.py b/surfsense_backend/alembic/versions/74_add_composio_connector_enums.py
index 454b60754..cadf70cb6 100644
--- a/surfsense_backend/alembic/versions/74_add_composio_connector_enums.py
+++ b/surfsense_backend/alembic/versions/74_add_composio_connector_enums.py
@@ -1,16 +1,21 @@
-"""Add COMPOSIO_CONNECTOR to SearchSourceConnectorType and DocumentType enums
+"""Add Composio connector types to SearchSourceConnectorType and DocumentType enums
Revision ID: 74
Revises: 73
Create Date: 2026-01-21
-This migration adds the COMPOSIO_CONNECTOR enum value to both:
+This migration adds the Composio connector enum values to both:
- searchsourceconnectortype (for connector type tracking)
- documenttype (for document type tracking)
Composio is a managed OAuth integration service that allows connecting
to various third-party services (Google Drive, Gmail, Calendar, etc.)
without requiring separate OAuth app verification.
+
+This migration adds three specific connector types:
+- COMPOSIO_GOOGLE_DRIVE_CONNECTOR
+- COMPOSIO_GMAIL_CONNECTOR
+- COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
"""
from collections.abc import Sequence
@@ -23,55 +28,65 @@ down_revision: str | None = "73"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
-# Define the ENUM type names and the new value
+# Define the ENUM type names and the new values
CONNECTOR_ENUM = "searchsourceconnectortype"
-CONNECTOR_NEW_VALUE = "COMPOSIO_CONNECTOR"
+CONNECTOR_NEW_VALUES = [
+ "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ "COMPOSIO_GMAIL_CONNECTOR",
+ "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+]
DOCUMENT_ENUM = "documenttype"
-DOCUMENT_NEW_VALUE = "COMPOSIO_CONNECTOR"
+DOCUMENT_NEW_VALUES = [
+ "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ "COMPOSIO_GMAIL_CONNECTOR",
+ "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+]
def upgrade() -> None:
- """Upgrade schema - add COMPOSIO_CONNECTOR to connector and document enums safely."""
- # Add COMPOSIO_CONNECTOR to searchsourceconnectortype only if not exists
- op.execute(
- f"""
- DO $$
- BEGIN
- IF NOT EXISTS (
- SELECT 1 FROM pg_enum
- WHERE enumlabel = '{CONNECTOR_NEW_VALUE}'
- AND enumtypid = (SELECT oid FROM pg_type WHERE typname = '{CONNECTOR_ENUM}')
- ) THEN
- ALTER TYPE {CONNECTOR_ENUM} ADD VALUE '{CONNECTOR_NEW_VALUE}';
- END IF;
- END$$;
- """
- )
+ """Upgrade schema - add Composio connector types to connector and document enums safely."""
+ # Add each Composio connector type to searchsourceconnectortype only if not exists
+ for value in CONNECTOR_NEW_VALUES:
+ op.execute(
+ f"""
+ DO $$
+ BEGIN
+ IF NOT EXISTS (
+ SELECT 1 FROM pg_enum e
+ JOIN pg_type t ON e.enumtypid = t.oid
+ WHERE t.typname = '{CONNECTOR_ENUM}' AND e.enumlabel = '{value}'
+ ) THEN
+ ALTER TYPE {CONNECTOR_ENUM} ADD VALUE '{value}';
+ END IF;
+ END$$;
+ """
+ )
- # Add COMPOSIO_CONNECTOR to documenttype only if not exists
- op.execute(
- f"""
- DO $$
- BEGIN
- IF NOT EXISTS (
- SELECT 1 FROM pg_enum
- WHERE enumlabel = '{DOCUMENT_NEW_VALUE}'
- AND enumtypid = (SELECT oid FROM pg_type WHERE typname = '{DOCUMENT_ENUM}')
- ) THEN
- ALTER TYPE {DOCUMENT_ENUM} ADD VALUE '{DOCUMENT_NEW_VALUE}';
- END IF;
- END$$;
- """
- )
+ # Add each Composio connector type to documenttype only if not exists
+ for value in DOCUMENT_NEW_VALUES:
+ op.execute(
+ f"""
+ DO $$
+ BEGIN
+ IF NOT EXISTS (
+ SELECT 1 FROM pg_enum e
+ JOIN pg_type t ON e.enumtypid = t.oid
+ WHERE t.typname = '{DOCUMENT_ENUM}' AND e.enumlabel = '{value}'
+ ) THEN
+ ALTER TYPE {DOCUMENT_ENUM} ADD VALUE '{value}';
+ END IF;
+ END$$;
+ """
+ )
def downgrade() -> None:
- """Downgrade schema - remove COMPOSIO_CONNECTOR from connector and document enums.
+ """Downgrade schema - remove Composio connector types from connector and document enums.
Note: PostgreSQL does not support removing enum values directly.
To properly downgrade, you would need to:
- 1. Delete any rows using the COMPOSIO_CONNECTOR value
- 2. Create new enums without COMPOSIO_CONNECTOR
+ 1. Delete any rows using the Composio connector type values
+ 2. Create new enums without the Composio connector types
3. Alter the columns to use the new enums
4. Drop the old enums
diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index b56f37373..705e89ea7 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -54,7 +54,9 @@ class DocumentType(str, Enum):
BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR"
CIRCLEBACK = "CIRCLEBACK"
NOTE = "NOTE"
- COMPOSIO_CONNECTOR = "COMPOSIO_CONNECTOR" # Generic Composio integration
+ COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"
+ COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR"
+ COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"
class SearchSourceConnectorType(str, Enum):
@@ -82,7 +84,9 @@ class SearchSourceConnectorType(str, Enum):
BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR"
CIRCLEBACK_CONNECTOR = "CIRCLEBACK_CONNECTOR"
MCP_CONNECTOR = "MCP_CONNECTOR" # Model Context Protocol - User-defined API tools
- COMPOSIO_CONNECTOR = "COMPOSIO_CONNECTOR" # Generic Composio integration (Google, Slack, etc.)
+ COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"
+ COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR"
+ COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"
class LiteLLMProvider(str, Enum):
diff --git a/surfsense_backend/app/routes/composio_routes.py b/surfsense_backend/app/routes/composio_routes.py
index b6f418aa2..77891fc88 100644
--- a/surfsense_backend/app/routes/composio_routes.py
+++ b/surfsense_backend/app/routes/composio_routes.py
@@ -19,6 +19,7 @@ from fastapi.responses import RedirectResponse
from pydantic import ValidationError
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
from app.config import config
from app.db import (
@@ -30,15 +31,17 @@ from app.db import (
from app.services.composio_service import (
COMPOSIO_TOOLKIT_NAMES,
INDEXABLE_TOOLKITS,
+ TOOLKIT_TO_CONNECTOR_TYPE,
ComposioService,
)
from app.users import current_active_user
-from app.utils.connector_naming import (
- check_duplicate_connector,
- generate_unique_connector_name,
-)
+from app.utils.connector_naming import generate_unique_connector_name
from app.utils.oauth_security import OAuthStateManager
+# Note: We no longer use check_duplicate_connector for Composio connectors because
+# Composio generates a new connected_account_id each time, even for the same Google account.
+# Instead, we check for existing connectors by type/space/user and update them.
+
logger = logging.getLogger(__name__)
router = APIRouter()
@@ -260,30 +263,65 @@ async def composio_callback(
"is_indexable": toolkit_id in INDEXABLE_TOOLKITS,
}
- # Check for duplicate connector
- # For Composio, we use toolkit_id + connected_account_id as unique identifier
- identifier = final_connected_account_id or f"{toolkit_id}_{user_id}"
-
- is_duplicate = await check_duplicate_connector(
- session,
- SearchSourceConnectorType.COMPOSIO_CONNECTOR,
- space_id,
- user_id,
- identifier,
- )
- if is_duplicate:
- logger.warning(
- f"Duplicate Composio connector detected for user {user_id} with toolkit {toolkit_id}"
+ # Get the specific connector type for this toolkit
+ connector_type_str = TOOLKIT_TO_CONNECTOR_TYPE.get(toolkit_id)
+ if not connector_type_str:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Unknown toolkit: {toolkit_id}. Available: {list(TOOLKIT_TO_CONNECTOR_TYPE.keys())}",
)
+ connector_type = SearchSourceConnectorType(connector_type_str)
+
+ # Check for existing connector of the same type for this user/space
+ # When reconnecting, Composio gives a new connected_account_id, so we need to
+ # check by connector_type, user_id, and search_space_id instead of connected_account_id
+ existing_connector_result = await session.execute(
+ select(SearchSourceConnector).where(
+ SearchSourceConnector.connector_type == connector_type,
+ SearchSourceConnector.search_space_id == space_id,
+ SearchSourceConnector.user_id == user_id,
+ )
+ )
+ existing_connector = existing_connector_result.scalars().first()
+
+ if existing_connector:
+ # Delete the old Composio connected account before updating
+ old_connected_account_id = existing_connector.config.get("composio_connected_account_id")
+ if old_connected_account_id and old_connected_account_id != final_connected_account_id:
+ try:
+ deleted = await service.delete_connected_account(old_connected_account_id)
+ if deleted:
+ logger.info(
+ f"Deleted old Composio connected account {old_connected_account_id} "
+ f"before updating connector {existing_connector.id}"
+ )
+ else:
+ logger.warning(
+ f"Failed to delete old Composio connected account {old_connected_account_id}"
+ )
+ except Exception as delete_error:
+ # Log but don't fail - the old account may already be deleted
+ logger.warning(
+ f"Error deleting old Composio connected account {old_connected_account_id}: {delete_error!s}"
+ )
+
+ # Update existing connector with new connected_account_id
+ logger.info(
+ f"Updating existing Composio connector {existing_connector.id} with new connected_account_id {final_connected_account_id}"
+ )
+ existing_connector.config = connector_config
+ await session.commit()
+ await session.refresh(existing_connector)
+
return RedirectResponse(
- url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&error=duplicate_account&connector=composio-connector"
+ url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&success=true&connector=composio-connector&connectorId={existing_connector.id}"
)
try:
# Generate a unique, user-friendly connector name
connector_name = await generate_unique_connector_name(
session,
- SearchSourceConnectorType.COMPOSIO_CONNECTOR,
+ connector_type,
space_id,
user_id,
f"{toolkit_name} (Composio)",
@@ -291,7 +329,7 @@ async def composio_callback(
db_connector = SearchSourceConnector(
name=connector_name,
- connector_type=SearchSourceConnectorType.COMPOSIO_CONNECTOR,
+ connector_type=connector_type,
config=connector_config,
search_space_id=space_id,
user_id=user_id,
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index d60d08d57..9ad03fba8 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -37,6 +37,7 @@ from app.db import (
async_session_maker,
get_async_session,
)
+from app.services.composio_service import ComposioService
from app.schemas import (
GoogleDriveIndexRequest,
MCPConnectorCreate,
@@ -529,6 +530,34 @@ async def delete_search_source_connector(
f"Failed to delete periodic schedule for connector {connector_id}"
)
+ # For Composio connectors, also delete the connected account in Composio
+ composio_connector_types = [
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
+ SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
+ ]
+ if db_connector.connector_type in composio_connector_types:
+ composio_connected_account_id = db_connector.config.get("composio_connected_account_id")
+ if composio_connected_account_id and ComposioService.is_enabled():
+ try:
+ service = ComposioService()
+ deleted = await service.delete_connected_account(composio_connected_account_id)
+ if deleted:
+ logger.info(
+ f"Successfully deleted Composio connected account {composio_connected_account_id} "
+ f"for connector {connector_id}"
+ )
+ else:
+ logger.warning(
+ f"Failed to delete Composio connected account {composio_connected_account_id} "
+ f"for connector {connector_id}"
+ )
+ except Exception as composio_error:
+ # Log but don't fail the deletion - Composio account may already be deleted
+ logger.warning(
+ f"Error deleting Composio connected account {composio_connected_account_id}: {composio_error!s}"
+ )
+
await session.delete(db_connector)
await session.commit()
return {"message": "Search source connector deleted successfully"}
@@ -868,7 +897,11 @@ async def index_connector_content(
)
response_message = "Web page indexing started in the background."
- elif connector.connector_type == SearchSourceConnectorType.COMPOSIO_CONNECTOR:
+ elif connector.connector_type in [
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
+ SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
+ ]:
from app.tasks.celery_tasks.connector_tasks import (
index_composio_connector_task,
)
@@ -2086,6 +2119,59 @@ async def run_bookstack_indexing(
)
+async def run_composio_indexing_with_new_session(
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ start_date: str,
+ end_date: str,
+):
+ """
+ Create a new session and run the Composio indexing task.
+ This prevents session leaks by creating a dedicated session for the background task.
+ """
+ async with async_session_maker() as session:
+ await run_composio_indexing(
+ session, connector_id, search_space_id, user_id, start_date, end_date
+ )
+
+
+async def run_composio_indexing(
+ session: AsyncSession,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ start_date: str,
+ end_date: str,
+):
+ """
+ Run Composio connector indexing with real-time notifications.
+
+ This wraps the Composio indexer with the notification system so that
+ Electric SQL can sync indexing progress to the frontend in real-time.
+
+ Args:
+ session: Database session
+ connector_id: ID of the Composio connector
+ search_space_id: ID of the search space
+ user_id: ID of the user
+ start_date: Start date for indexing
+ end_date: End date for indexing
+ """
+ from app.tasks.composio_indexer import index_composio_connector
+
+ await _run_indexing_with_notifications(
+ session=session,
+ connector_id=connector_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ start_date=start_date,
+ end_date=end_date,
+ indexing_function=index_composio_connector,
+ update_timestamp_func=_update_connector_timestamp_by_id,
+ )
+
+
# =============================================================================
# MCP Connector Routes
# =============================================================================
diff --git a/surfsense_backend/app/services/composio_service.py b/surfsense_backend/app/services/composio_service.py
index 4b6a32b03..17fbd64e0 100644
--- a/surfsense_backend/app/services/composio_service.py
+++ b/surfsense_backend/app/services/composio_service.py
@@ -39,6 +39,20 @@ COMPOSIO_TOOLKIT_NAMES = {
# Toolkits that support indexing (Phase 1: Google services only)
INDEXABLE_TOOLKITS = {"googledrive", "gmail", "googlecalendar"}
+# Mapping of toolkit IDs to connector types
+TOOLKIT_TO_CONNECTOR_TYPE = {
+ "googledrive": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ "gmail": "COMPOSIO_GMAIL_CONNECTOR",
+ "googlecalendar": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+}
+
+# Mapping of toolkit IDs to document types
+TOOLKIT_TO_DOCUMENT_TYPE = {
+ "googledrive": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ "gmail": "COMPOSIO_GMAIL_CONNECTOR",
+ "googlecalendar": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+}
+
class ComposioService:
"""Service for interacting with Composio API."""
@@ -298,6 +312,26 @@ class ComposioService:
logger.error(f"Failed to list connections for user {user_id}: {e!s}")
return []
+ async def delete_connected_account(self, connected_account_id: str) -> bool:
+ """
+ Delete a connected account from Composio.
+
+ This permanently removes the connected account and revokes access tokens.
+
+ Args:
+ connected_account_id: The Composio connected account ID to delete.
+
+ Returns:
+ True if deletion was successful, False otherwise.
+ """
+ try:
+ self.client.connected_accounts.delete(connected_account_id)
+ logger.info(f"Successfully deleted Composio connected account: {connected_account_id}")
+ return True
+ except Exception as e:
+ logger.error(f"Failed to delete Composio connected account {connected_account_id}: {e!s}")
+ return False
+
async def execute_tool(
self,
connected_account_id: str,
diff --git a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
index 72cedb40f..307b5a551 100644
--- a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
@@ -793,11 +793,13 @@ async def _index_composio_connector(
start_date: str,
end_date: str,
):
- """Index Composio connector content with new session."""
- # Import from tasks folder (not connector_indexers) to avoid circular import
- from app.tasks.composio_indexer import index_composio_connector
+ """Index Composio connector content with new session and real-time notifications."""
+ # Import from routes to use the notification-wrapped version
+ from app.routes.search_source_connectors_routes import (
+ run_composio_indexing,
+ )
async with get_celery_session_maker()() as session:
- await index_composio_connector(
+ await run_composio_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)
diff --git a/surfsense_backend/app/tasks/composio_indexer.py b/surfsense_backend/app/tasks/composio_indexer.py
index 01d2cfce4..8762561ee 100644
--- a/surfsense_backend/app/tasks/composio_indexer.py
+++ b/surfsense_backend/app/tasks/composio_indexer.py
@@ -23,7 +23,7 @@ from app.db import (
SearchSourceConnector,
SearchSourceConnectorType,
)
-from app.services.composio_service import INDEXABLE_TOOLKITS
+from app.services.composio_service import INDEXABLE_TOOLKITS, TOOLKIT_TO_DOCUMENT_TYPE
from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import (
@@ -58,15 +58,13 @@ async def check_document_by_unique_identifier(
async def get_connector_by_id(
- session: AsyncSession, connector_id: int, connector_type: SearchSourceConnectorType
+ session: AsyncSession, connector_id: int, connector_type: SearchSourceConnectorType | None
) -> SearchSourceConnector | None:
- """Get a connector by ID and type from the database."""
- result = await session.execute(
- select(SearchSourceConnector).filter(
- SearchSourceConnector.id == connector_id,
- SearchSourceConnector.connector_type == connector_type,
- )
- )
+ """Get a connector by ID and optionally by type from the database."""
+ query = select(SearchSourceConnector).filter(SearchSourceConnector.id == connector_id)
+ if connector_type is not None:
+ query = query.filter(SearchSourceConnector.connector_type == connector_type)
+ result = await session.execute(query)
return result.scalars().first()
@@ -129,10 +127,23 @@ async def index_composio_connector(
)
try:
- # Get connector by id
+ # Get connector by id - accept any Composio connector type
+ # We'll check the actual type after loading
connector = await get_connector_by_id(
- session, connector_id, SearchSourceConnectorType.COMPOSIO_CONNECTOR
+ session, connector_id, None # Don't filter by type, we'll validate after
)
+
+ # Validate it's a Composio connector
+ if connector and connector.connector_type not in [
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
+ SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
+ ]:
+ error_msg = f"Connector {connector_id} is not a Composio connector"
+ await task_logger.log_task_failure(
+ log_entry, error_msg, {"error_type": "InvalidConnectorType"}
+ )
+ return 0, error_msg
if not connector:
error_msg = f"Composio connector with ID {connector_id} not found"
@@ -276,7 +287,7 @@ async def _index_composio_google_drive(
await task_logger.log_task_success(
log_entry, success_msg, {"files_count": 0}
)
- return 0, success_msg
+ return 0, None # Return None (not error) when no items found - this is success with 0 items
logger.info(f"Found {len(all_files)} Google Drive files to index via Composio")
@@ -299,8 +310,9 @@ async def _index_composio_google_drive(
continue
# Generate unique identifier hash
+ document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
unique_identifier_hash = generate_unique_identifier_hash(
- DocumentType.COMPOSIO_CONNECTOR, f"drive_{file_id}", search_space_id
+ document_type, f"drive_{file_id}", search_space_id
)
# Check if document exists
@@ -394,7 +406,7 @@ async def _index_composio_google_drive(
document = Document(
search_space_id=search_space_id,
title=f"Drive: {file_name}",
- document_type=DocumentType.COMPOSIO_CONNECTOR,
+ document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
document_metadata={
"file_id": file_id,
"file_name": file_name,
@@ -489,7 +501,7 @@ async def _index_composio_gmail(
await task_logger.log_task_success(
log_entry, success_msg, {"messages_count": 0}
)
- return 0, success_msg
+ return 0, None # Return None (not error) when no items found - this is success with 0 items
logger.info(f"Found {len(messages)} Gmail messages to index via Composio")
@@ -530,8 +542,9 @@ async def _index_composio_gmail(
markdown_content = composio_connector.format_gmail_message_to_markdown(message)
# Generate unique identifier
+ document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"])
unique_identifier_hash = generate_unique_identifier_hash(
- DocumentType.COMPOSIO_CONNECTOR, f"gmail_{message_id}", search_space_id
+ document_type, f"gmail_{message_id}", search_space_id
)
content_hash = generate_content_hash(markdown_content, search_space_id)
@@ -612,7 +625,7 @@ async def _index_composio_gmail(
document = Document(
search_space_id=search_space_id,
title=f"Gmail: {subject}",
- document_type=DocumentType.COMPOSIO_CONNECTOR,
+ document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]),
document_metadata={
"message_id": message_id,
"subject": subject,
@@ -717,7 +730,7 @@ async def _index_composio_google_calendar(
await task_logger.log_task_success(
log_entry, success_msg, {"events_count": 0}
)
- return 0, success_msg
+ return 0, None # Return None (not error) when no items found - this is success with 0 items
logger.info(f"Found {len(events)} Google Calendar events to index via Composio")
@@ -738,8 +751,9 @@ async def _index_composio_google_calendar(
markdown_content = composio_connector.format_calendar_event_to_markdown(event)
# Generate unique identifier
+ document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"])
unique_identifier_hash = generate_unique_identifier_hash(
- DocumentType.COMPOSIO_CONNECTOR, f"calendar_{event_id}", search_space_id
+ document_type, f"calendar_{event_id}", search_space_id
)
content_hash = generate_content_hash(markdown_content, search_space_id)
@@ -828,7 +842,7 @@ async def _index_composio_google_calendar(
document = Document(
search_space_id=search_space_id,
title=f"Calendar: {summary}",
- document_type=DocumentType.COMPOSIO_CONNECTOR,
+ document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"]),
document_metadata={
"event_id": event_id,
"summary": summary,
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index 1f4341d07..228b12836 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -188,8 +188,18 @@ export const ConnectorIndicator: FC = () => {
searchSpaceId={searchSpaceId}
connectedToolkits={
(connectors || [])
- .filter((c: SearchSourceConnector) => c.connector_type === "COMPOSIO_CONNECTOR")
- .map((c: SearchSourceConnector) => c.config?.toolkit_id as string)
+ .filter((c: SearchSourceConnector) =>
+ c.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" ||
+ c.connector_type === "COMPOSIO_GMAIL_CONNECTOR" ||
+ c.connector_type === "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"
+ )
+ .map((c: SearchSourceConnector) => {
+ // Map connector type back to toolkit_id
+ if (c.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR") return "googledrive";
+ if (c.connector_type === "COMPOSIO_GMAIL_CONNECTOR") return "gmail";
+ if (c.connector_type === "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR") return "googlecalendar";
+ return c.config?.toolkit_id as string;
+ })
.filter(Boolean)
}
onBack={handleBackFromComposio}
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
index 6fe37e1e5..a96f906fe 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
@@ -1,7 +1,5 @@
"use client";
-import { ExternalLink, Info, Zap } from "lucide-react";
-import Image from "next/image";
import type { FC } from "react";
import { Badge } from "@/components/ui/badge";
import type { SearchSourceConnector } from "@/contracts/types/connector.types";
@@ -13,92 +11,13 @@ interface ComposioConfigProps {
onNameChange?: (name: string) => void;
}
-// Get toolkit display info
-const getToolkitInfo = (toolkitId: string): { name: string; icon: string; description: string } => {
- switch (toolkitId) {
- case "googledrive":
- return {
- name: "Google Drive",
- icon: "/connectors/google-drive.svg",
- description: "Files and documents from Google Drive",
- };
- case "gmail":
- return {
- name: "Gmail",
- icon: "/connectors/google-gmail.svg",
- description: "Emails from Gmail",
- };
- case "googlecalendar":
- return {
- name: "Google Calendar",
- icon: "/connectors/google-calendar.svg",
- description: "Events from Google Calendar",
- };
- case "slack":
- return {
- name: "Slack",
- icon: "/connectors/slack.svg",
- description: "Messages from Slack",
- };
- case "notion":
- return {
- name: "Notion",
- icon: "/connectors/notion.svg",
- description: "Pages from Notion",
- };
- case "github":
- return {
- name: "GitHub",
- icon: "/connectors/github.svg",
- description: "Repositories from GitHub",
- };
- default:
- return {
- name: toolkitId,
- icon: "/connectors/composio.svg",
- description: "Connected via Composio",
- };
- }
-};
-
export const ComposioConfig: FC = ({ connector }) => {
const toolkitId = connector.config?.toolkit_id as string;
- const toolkitName = connector.config?.toolkit_name as string;
const isIndexable = connector.config?.is_indexable as boolean;
const composioAccountId = connector.config?.composio_connected_account_id as string;
- const toolkitInfo = getToolkitInfo(toolkitId);
-
return (
- {/* Toolkit Info Card */}
-
-
-
-
-
-
-
-
{toolkitName || toolkitInfo.name}
-
-
- Composio
-
-
-
{toolkitInfo.description}
-
-
-
-
{/* Connection Details */}
@@ -133,28 +52,6 @@ export const ComposioConfig: FC = ({ connector }) => {
)}
-
- {/* Info Banner */}
-
-
-
-
-
- This connection uses Composio's managed OAuth, which means you don't need to
- wait for app verification. Your data is securely accessed through Composio.
-
-
- Learn more about Composio
-
-
-
-
-
);
};
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx
index a7a92597c..160185b1e 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx
@@ -74,7 +74,9 @@ export function getConnectorConfigComponent(
return CirclebackConfig;
case "MCP_CONNECTOR":
return MCPConfig;
- case "COMPOSIO_CONNECTOR":
+ case "COMPOSIO_GOOGLE_DRIVE_CONNECTOR":
+ case "COMPOSIO_GMAIL_CONNECTOR":
+ case "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR":
return ComposioConfig;
// OAuth connectors (Gmail, Calendar, Airtable, Notion) and others don't need special config UI
default:
diff --git a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts
index 7646d7a9b..11066f28a 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts
@@ -168,14 +168,28 @@ export const OTHER_CONNECTORS = [
},
] as const;
-// Composio Connector (Single entry that opens toolkit selector)
+// Composio Connectors - Individual entries for each supported toolkit
export const COMPOSIO_CONNECTORS = [
{
- id: "composio-connector",
- title: "Composio",
- description: "Connect 100+ apps via Composio (Google, Slack, Notion, etc.)",
- connectorType: EnumConnectorName.COMPOSIO_CONNECTOR,
- // No authEndpoint - handled via toolkit selector view
+ id: "composio-googledrive",
+ title: "Google Drive",
+ description: "Search your Drive files via Composio",
+ connectorType: EnumConnectorName.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
+ authEndpoint: "/api/v1/auth/composio/connector/add/?toolkit_id=googledrive",
+ },
+ {
+ id: "composio-gmail",
+ title: "Gmail",
+ description: "Search through your emails via Composio",
+ connectorType: EnumConnectorName.COMPOSIO_GMAIL_CONNECTOR,
+ authEndpoint: "/api/v1/auth/composio/connector/add/?toolkit_id=gmail",
+ },
+ {
+ id: "composio-googlecalendar",
+ title: "Google Calendar",
+ description: "Search through your events via Composio",
+ connectorType: EnumConnectorName.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
+ authEndpoint: "/api/v1/auth/composio/connector/add/?toolkit_id=googlecalendar",
},
] as const;
diff --git a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-popup.schemas.ts b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-popup.schemas.ts
index d74d66203..c7e77f666 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-popup.schemas.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-popup.schemas.ts
@@ -7,7 +7,7 @@ import { searchSourceConnectorTypeEnum } from "@/contracts/types/connector.types
export const connectorPopupQueryParamsSchema = z.object({
modal: z.enum(["connectors"]).optional(),
tab: z.enum(["all", "active"]).optional(),
- view: z.enum(["configure", "edit", "connect", "youtube", "accounts", "mcp-list"]).optional(),
+ view: z.enum(["configure", "edit", "connect", "youtube", "accounts", "mcp-list", "composio"]).optional(),
connector: z.string().optional(),
connectorId: z.string().optional(),
connectorType: z.string().optional(),
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index c6ef1a927..4a177ac36 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -26,7 +26,7 @@ import {
import { cacheKeys } from "@/lib/query-client/cache-keys";
import { queryClient } from "@/lib/query-client/client";
import type { IndexingConfigState } from "../constants/connector-constants";
-import { OAUTH_CONNECTORS, OTHER_CONNECTORS } from "../constants/connector-constants";
+import { COMPOSIO_CONNECTORS, OAUTH_CONNECTORS, OTHER_CONNECTORS } from "../constants/connector-constants";
import {
dateRangeSchema,
frequencyMinutesSchema,
@@ -176,15 +176,24 @@ export const useConnectorDialog = () => {
}
// Handle accounts view
- if (params.view === "accounts" && params.connectorType && !viewingAccountsType) {
- const oauthConnector = OAUTH_CONNECTORS.find(
- (c) => c.connectorType === params.connectorType
- );
- if (oauthConnector) {
- setViewingAccountsType({
- connectorType: oauthConnector.connectorType,
- connectorTitle: oauthConnector.title,
- });
+ if (params.view === "accounts" && params.connectorType) {
+ // Update state if not set, or if connectorType has changed
+ const needsUpdate = !viewingAccountsType ||
+ viewingAccountsType.connectorType !== params.connectorType;
+
+ if (needsUpdate) {
+ // Check both OAUTH_CONNECTORS and COMPOSIO_CONNECTORS
+ const oauthConnector = OAUTH_CONNECTORS.find(
+ (c) => c.connectorType === params.connectorType
+ ) || COMPOSIO_CONNECTORS.find(
+ (c) => c.connectorType === params.connectorType
+ );
+ if (oauthConnector) {
+ setViewingAccountsType({
+ connectorType: oauthConnector.connectorType,
+ connectorTitle: oauthConnector.title,
+ });
+ }
}
}
@@ -293,6 +302,8 @@ export const useConnectorDialog = () => {
indexingConfig,
connectingConnectorType,
viewingAccountsType,
+ viewingMCPList,
+ viewingComposio,
]);
// Detect OAuth success / Failure and transition to config view
@@ -389,15 +400,19 @@ export const useConnectorDialog = () => {
// Handle OAuth connection
const handleConnectOAuth = useCallback(
- async (connector: (typeof OAUTH_CONNECTORS)[number]) => {
+ async (connector: (typeof OAUTH_CONNECTORS)[number] | (typeof COMPOSIO_CONNECTORS)[number]) => {
if (!searchSpaceId || !connector.authEndpoint) return;
// Set connecting state immediately to disable button and show spinner
setConnectingId(connector.id);
try {
+ // Check if authEndpoint already has query parameters
+ const separator = connector.authEndpoint.includes("?") ? "&" : "?";
+ const url = `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}${connector.authEndpoint}${separator}space_id=${searchSpaceId}`;
+
const response = await authenticatedFetch(
- `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}${connector.authEndpoint}?space_id=${searchSpaceId}`,
+ url,
{ method: "GET" }
);
@@ -799,23 +814,19 @@ export const useConnectorDialog = () => {
// Handle viewing accounts list for OAuth connector type
const handleViewAccountsList = useCallback(
- (connectorType: string, connectorTitle: string) => {
+ (connectorType: string, _connectorTitle?: string) => {
if (!searchSpaceId) return;
- setViewingAccountsType({
- connectorType,
- connectorTitle,
- });
-
// Update URL to show accounts view, preserving current tab
+ // The useEffect will handle setting viewingAccountsType based on URL params
const url = new URL(window.location.href);
url.searchParams.set("modal", "connectors");
url.searchParams.set("view", "accounts");
url.searchParams.set("connectorType", connectorType);
// Keep the current tab in URL so we can go back to it
- window.history.pushState({ modal: true }, "", url.toString());
+ router.replace(url.pathname + url.search, { scroll: false });
},
- [searchSpaceId]
+ [searchSpaceId, router]
);
// Handle going back from accounts list view
@@ -839,8 +850,8 @@ export const useConnectorDialog = () => {
const url = new URL(window.location.href);
url.searchParams.set("modal", "connectors");
url.searchParams.set("view", "mcp-list");
- window.history.pushState({ modal: true }, "", url.toString());
- }, [searchSpaceId]);
+ router.replace(url.pathname + url.search, { scroll: false });
+ }, [searchSpaceId, router]);
// Handle going back from MCP list view
const handleBackFromMCPList = useCallback(() => {
@@ -871,8 +882,8 @@ export const useConnectorDialog = () => {
const url = new URL(window.location.href);
url.searchParams.set("modal", "connectors");
url.searchParams.set("view", "composio");
- window.history.pushState({ modal: true }, "", url.toString());
- }, [searchSpaceId]);
+ router.replace(url.pathname + url.search, { scroll: false });
+ }, [searchSpaceId, router]);
// Handle going back from Composio view
const handleBackFromComposio = useCallback(() => {
@@ -1423,7 +1434,7 @@ export const useConnectorDialog = () => {
setIsDisconnecting(false);
}
},
- [editingConnector, searchSpaceId, deleteConnector, router]
+ [editingConnector, searchSpaceId, deleteConnector, router, cameFromMCPList]
);
// Handle quick index (index without date picker, uses backend defaults)
@@ -1579,6 +1590,7 @@ export const useConnectorDialog = () => {
viewingAccountsType,
viewingMCPList,
viewingComposio,
+ connectingComposioToolkit,
// Setters
setSearchQuery,
@@ -1616,8 +1628,6 @@ export const useConnectorDialog = () => {
setIndexingConnectorConfig,
// Composio
- viewingComposio,
- connectingComposioToolkit,
handleOpenComposio,
handleBackFromComposio,
handleConnectComposioToolkit,
diff --git a/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx b/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx
index 1b36b3b81..4a0680200 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx
@@ -4,7 +4,6 @@ import type { FC } from "react";
import { EnumConnectorName } from "@/contracts/enums/connector";
import type { SearchSourceConnector } from "@/contracts/types/connector.types";
import { ConnectorCard } from "../components/connector-card";
-import { ComposioConnectorCard } from "../components/composio-connector-card";
import { CRAWLERS, OAUTH_CONNECTORS, OTHER_CONNECTORS, COMPOSIO_CONNECTORS } from "../constants/connector-constants";
import { getDocumentCountForConnector } from "../utils/connector-document-mapping";
@@ -29,13 +28,12 @@ interface AllConnectorsTabProps {
allConnectors: SearchSourceConnector[] | undefined;
documentTypeCounts?: Record;
indexingConnectorIds?: Set;
- onConnectOAuth: (connector: (typeof OAUTH_CONNECTORS)[number]) => void;
+ onConnectOAuth: (connector: (typeof OAUTH_CONNECTORS)[number] | (typeof COMPOSIO_CONNECTORS)[number]) => void;
onConnectNonOAuth?: (connectorType: string) => void;
onCreateWebcrawler?: () => void;
onCreateYouTubeCrawler?: () => void;
onManage?: (connector: SearchSourceConnector) => void;
onViewAccountsList?: (connectorType: string, connectorTitle: string) => void;
- onOpenComposio?: () => void;
}
export const AllConnectorsTab: FC = ({
@@ -51,7 +49,6 @@ export const AllConnectorsTab: FC = ({
onCreateYouTubeCrawler,
onManage,
onViewAccountsList,
- onOpenComposio,
}) => {
// Filter connectors based on search
const filteredOAuth = OAUTH_CONNECTORS.filter(
@@ -79,23 +76,16 @@ export const AllConnectorsTab: FC = ({
c.description.toLowerCase().includes(searchQuery.toLowerCase())
);
- // Count Composio connectors
- const composioConnectorCount = allConnectors
- ? allConnectors.filter(
- (c: SearchSourceConnector) => c.connector_type === EnumConnectorName.COMPOSIO_CONNECTOR
- ).length
- : 0;
-
return (
- {/* Quick Connect */}
- {filteredOAuth.length > 0 && (
+ {/* Managed OAuth (Composio Integrations) */}
+ {filteredComposio.length > 0 && (
-
Quick Connect
+ Managed OAuth
- {filteredOAuth.map((connector) => {
+ {filteredComposio.map((connector) => {
const isConnected = connectedTypes.has(connector.connectorType);
const isConnecting = connectingId === connector.id;
@@ -109,17 +99,6 @@ export const AllConnectorsTab: FC
= ({
const accountCount = typeConnectors.length;
- // Get the most recent last_indexed_at across all accounts
- const mostRecentLastIndexed = typeConnectors.reduce(
- (latest, c) => {
- if (!c.last_indexed_at) return latest;
- if (!latest) return c.last_indexed_at;
- return new Date(c.last_indexed_at) > new Date(latest)
- ? c.last_indexed_at
- : latest;
- },
- undefined
- );
const documentCount = getDocumentCountForConnector(
connector.connectorType,
@@ -154,26 +133,57 @@ export const AllConnectorsTab: FC = ({
)}
- {/* Composio Integrations */}
- {filteredComposio.length > 0 && onOpenComposio && (
+ {/* Quick Connect */}
+ {filteredOAuth.length > 0 && (
-
Managed OAuth
-
- No verification needed
-
+ Quick Connect
- {filteredComposio.map((connector) => (
-
- ))}
+ {filteredOAuth.map((connector) => {
+ const isConnected = connectedTypes.has(connector.connectorType);
+ const isConnecting = connectingId === connector.id;
+
+ // Find all connectors of this type
+ const typeConnectors =
+ isConnected && allConnectors
+ ? allConnectors.filter(
+ (c: SearchSourceConnector) => c.connector_type === connector.connectorType
+ )
+ : [];
+
+ const accountCount = typeConnectors.length;
+
+
+ const documentCount = getDocumentCountForConnector(
+ connector.connectorType,
+ documentTypeCounts
+ );
+
+ // Check if any account is currently indexing
+ const isIndexing = typeConnectors.some((c) => indexingConnectorIds?.has(c.id));
+
+ return (
+ onConnectOAuth(connector)}
+ onManage={
+ isConnected && onViewAccountsList
+ ? () => onViewAccountsList(connector.connectorType, connector.title)
+ : undefined
+ }
+ />
+ );
+ })}
)}
diff --git a/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts b/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts
index 522e1763c..ded3bdcca 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts
@@ -30,7 +30,10 @@ export const CONNECTOR_TO_DOCUMENT_TYPE: Record = {
// Special mappings (connector type differs from document type)
GOOGLE_DRIVE_CONNECTOR: "GOOGLE_DRIVE_FILE",
WEBCRAWLER_CONNECTOR: "CRAWLED_URL",
- COMPOSIO_CONNECTOR: "COMPOSIO_CONNECTOR",
+ // Composio connectors map to their own document types
+ COMPOSIO_GOOGLE_DRIVE_CONNECTOR: "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ COMPOSIO_GMAIL_CONNECTOR: "COMPOSIO_GMAIL_CONNECTOR",
+ COMPOSIO_GOOGLE_CALENDAR_CONNECTOR: "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
};
/**
diff --git a/surfsense_web/contracts/enums/connector.ts b/surfsense_web/contracts/enums/connector.ts
index e1fb1e3f2..20d6093b6 100644
--- a/surfsense_web/contracts/enums/connector.ts
+++ b/surfsense_web/contracts/enums/connector.ts
@@ -24,5 +24,7 @@ export enum EnumConnectorName {
YOUTUBE_CONNECTOR = "YOUTUBE_CONNECTOR",
CIRCLEBACK_CONNECTOR = "CIRCLEBACK_CONNECTOR",
MCP_CONNECTOR = "MCP_CONNECTOR",
- COMPOSIO_CONNECTOR = "COMPOSIO_CONNECTOR",
+ COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR",
+ COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
}
diff --git a/surfsense_web/contracts/enums/connectorIcons.tsx b/surfsense_web/contracts/enums/connectorIcons.tsx
index 947c886d5..a1e6c9040 100644
--- a/surfsense_web/contracts/enums/connectorIcons.tsx
+++ b/surfsense_web/contracts/enums/connectorIcons.tsx
@@ -66,8 +66,12 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas
return ;
case EnumConnectorName.MCP_CONNECTOR:
return ;
- case EnumConnectorName.COMPOSIO_CONNECTOR:
- return ;
+ case EnumConnectorName.COMPOSIO_GOOGLE_DRIVE_CONNECTOR:
+ return ;
+ case EnumConnectorName.COMPOSIO_GMAIL_CONNECTOR:
+ return ;
+ case EnumConnectorName.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR:
+ return ;
// Additional cases for non-enum connector types
case "YOUTUBE_CONNECTOR":
return ;
@@ -87,8 +91,12 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas
return ;
case "GOOGLE_DRIVE_FILE":
return ;
- case "COMPOSIO_CONNECTOR":
- return ;
+ case "COMPOSIO_GOOGLE_DRIVE_CONNECTOR":
+ return ;
+ case "COMPOSIO_GMAIL_CONNECTOR":
+ return ;
+ case "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR":
+ return ;
case "NOTE":
return ;
case "EXTENSION":
diff --git a/surfsense_web/contracts/types/connector.types.ts b/surfsense_web/contracts/types/connector.types.ts
index 861bf1758..d52469ce9 100644
--- a/surfsense_web/contracts/types/connector.types.ts
+++ b/surfsense_web/contracts/types/connector.types.ts
@@ -27,7 +27,9 @@ export const searchSourceConnectorTypeEnum = z.enum([
"BOOKSTACK_CONNECTOR",
"CIRCLEBACK_CONNECTOR",
"MCP_CONNECTOR",
- "COMPOSIO_CONNECTOR",
+ "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ "COMPOSIO_GMAIL_CONNECTOR",
+ "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
]);
export const searchSourceConnector = z.object({
@@ -149,6 +151,13 @@ export const googleDriveIndexBody = z.object({
name: z.string(),
})
),
+ indexing_options: z
+ .object({
+ max_files_per_folder: z.number().int().min(1).max(1000),
+ incremental_sync: z.boolean(),
+ include_subfolders: z.boolean(),
+ })
+ .optional(),
});
/**
diff --git a/surfsense_web/contracts/types/document.types.ts b/surfsense_web/contracts/types/document.types.ts
index a8f3a3b38..01a58173e 100644
--- a/surfsense_web/contracts/types/document.types.ts
+++ b/surfsense_web/contracts/types/document.types.ts
@@ -25,7 +25,9 @@ export const documentTypeEnum = z.enum([
"CIRCLEBACK",
"SURFSENSE_DOCS",
"NOTE",
- "COMPOSIO_CONNECTOR",
+ "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ "COMPOSIO_GMAIL_CONNECTOR",
+ "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
]);
export const document = z.object({
From 6139b07a66f859adc80d93310e420209eeb3f2e0 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 03:57:02 +0530
Subject: [PATCH 02/51] fix: remove toolkit view
---
.../assistant-ui/connector-popup.tsx | 34 +-
.../hooks/use-connector-dialog.ts | 79 -----
.../tabs/all-connectors-tab.tsx | 2 +-
.../views/composio-toolkit-view.tsx | 301 ------------------
4 files changed, 2 insertions(+), 414 deletions(-)
delete mode 100644 surfsense_web/components/assistant-ui/connector-popup/views/composio-toolkit-view.tsx
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index 228b12836..a1108f7c8 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -21,7 +21,6 @@ import { useConnectorDialog } from "./connector-popup/hooks/use-connector-dialog
import { useIndexingConnectors } from "./connector-popup/hooks/use-indexing-connectors";
import { ActiveConnectorsTab } from "./connector-popup/tabs/active-connectors-tab";
import { AllConnectorsTab } from "./connector-popup/tabs/all-connectors-tab";
-import { ComposioToolkitView } from "./connector-popup/views/composio-toolkit-view";
import { ConnectorAccountsListView } from "./connector-popup/views/connector-accounts-list-view";
import { YouTubeCrawlerView } from "./connector-popup/views/youtube-crawler-view";
@@ -88,12 +87,6 @@ export const ConnectorIndicator: FC = () => {
setConnectorConfig,
setIndexingConnectorConfig,
setConnectorName,
- // Composio
- viewingComposio,
- connectingComposioToolkit,
- handleOpenComposio,
- handleBackFromComposio,
- handleConnectComposioToolkit,
} = useConnectorDialog();
// Fetch connectors using Electric SQL + PGlite for real-time updates
@@ -142,7 +135,7 @@ export const ConnectorIndicator: FC = () => {
// Check which connectors are already connected
// Using Electric SQL + PGlite for real-time connector updates
- const connectedTypes = new Set(
+ const connectedTypes = new Set(
(connectors || []).map((c: SearchSourceConnector) => c.connector_type)
);
@@ -183,30 +176,6 @@ export const ConnectorIndicator: FC = () => {
{/* YouTube Crawler View - shown when adding YouTube videos */}
{isYouTubeView && searchSpaceId ? (
- ) : viewingComposio && searchSpaceId ? (
-
- c.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" ||
- c.connector_type === "COMPOSIO_GMAIL_CONNECTOR" ||
- c.connector_type === "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"
- )
- .map((c: SearchSourceConnector) => {
- // Map connector type back to toolkit_id
- if (c.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR") return "googledrive";
- if (c.connector_type === "COMPOSIO_GMAIL_CONNECTOR") return "gmail";
- if (c.connector_type === "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR") return "googlecalendar";
- return c.config?.toolkit_id as string;
- })
- .filter(Boolean)
- }
- onBack={handleBackFromComposio}
- onConnectToolkit={handleConnectComposioToolkit}
- isConnecting={connectingComposioToolkit !== null}
- connectingToolkitId={connectingComposioToolkit}
- />
) : viewingMCPList ? (
{
onCreateYouTubeCrawler={handleCreateYouTubeCrawler}
onManage={handleStartEdit}
onViewAccountsList={handleViewAccountsList}
- onOpenComposio={handleOpenComposio}
/>
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index 4a177ac36..3ea1aab48 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -83,9 +83,6 @@ export const useConnectorDialog = () => {
// MCP list view state (for managing multiple MCP connectors)
const [viewingMCPList, setViewingMCPList] = useState(false);
- // Composio toolkit view state
- const [viewingComposio, setViewingComposio] = useState(false);
- const [connectingComposioToolkit, setConnectingComposioToolkit] = useState(null);
// Track if we came from accounts list when entering edit mode
const [cameFromAccountsList, setCameFromAccountsList] = useState<{
@@ -159,17 +156,6 @@ export const useConnectorDialog = () => {
setViewingMCPList(true);
}
- // Clear Composio view if view is not "composio" anymore
- if (params.view !== "composio" && viewingComposio) {
- setViewingComposio(false);
- setConnectingComposioToolkit(null);
- }
-
- // Handle Composio view
- if (params.view === "composio" && !viewingComposio) {
- setViewingComposio(true);
- }
-
// Handle connect view
if (params.view === "connect" && params.connectorType && !connectingConnectorType) {
setConnectingConnectorType(params.connectorType);
@@ -303,7 +289,6 @@ export const useConnectorDialog = () => {
connectingConnectorType,
viewingAccountsType,
viewingMCPList,
- viewingComposio,
]);
// Detect OAuth success / Failure and transition to config view
@@ -872,63 +857,6 @@ export const useConnectorDialog = () => {
router.replace(url.pathname + url.search, { scroll: false });
}, [router]);
- // Handle opening Composio toolkit view
- const handleOpenComposio = useCallback(() => {
- if (!searchSpaceId) return;
-
- setViewingComposio(true);
-
- // Update URL to show Composio view
- const url = new URL(window.location.href);
- url.searchParams.set("modal", "connectors");
- url.searchParams.set("view", "composio");
- router.replace(url.pathname + url.search, { scroll: false });
- }, [searchSpaceId, router]);
-
- // Handle going back from Composio view
- const handleBackFromComposio = useCallback(() => {
- setViewingComposio(false);
- setConnectingComposioToolkit(null);
- const url = new URL(window.location.href);
- url.searchParams.set("modal", "connectors");
- url.searchParams.delete("view");
- router.replace(url.pathname + url.search, { scroll: false });
- }, [router]);
-
- // Handle connecting a Composio toolkit
- const handleConnectComposioToolkit = useCallback(
- async (toolkitId: string) => {
- if (!searchSpaceId) return;
-
- setConnectingComposioToolkit(toolkitId);
-
- try {
- const response = await authenticatedFetch(
- `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/auth/composio/connector/add?space_id=${searchSpaceId}&toolkit_id=${toolkitId}`,
- { method: "GET" }
- );
-
- if (!response.ok) {
- throw new Error(`Failed to initiate Composio OAuth for ${toolkitId}`);
- }
-
- const data = await response.json();
-
- if (data.auth_url) {
- // Redirect to Composio OAuth
- window.location.href = data.auth_url;
- } else {
- throw new Error("No authorization URL received from Composio");
- }
- } catch (error) {
- console.error("Error connecting Composio toolkit:", error);
- toast.error(`Failed to connect ${toolkitId}. Please try again.`);
- setConnectingComposioToolkit(null);
- }
- },
- [searchSpaceId]
- );
-
// Handle starting indexing
const handleStartIndexing = useCallback(
async (refreshConnectors: () => void) => {
@@ -1589,8 +1517,6 @@ export const useConnectorDialog = () => {
allConnectors,
viewingAccountsType,
viewingMCPList,
- viewingComposio,
- connectingComposioToolkit,
// Setters
setSearchQuery,
@@ -1626,10 +1552,5 @@ export const useConnectorDialog = () => {
connectorConfig,
setConnectorConfig,
setIndexingConnectorConfig,
-
- // Composio
- handleOpenComposio,
- handleBackFromComposio,
- handleConnectComposioToolkit,
};
};
diff --git a/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx b/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx
index 4a0680200..ffe879d5d 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx
@@ -82,7 +82,7 @@ export const AllConnectorsTab: FC = ({
{filteredComposio.length > 0 && (
-
Managed OAuth
+ Managed OAuth (Composio)
{filteredComposio.map((connector) => {
diff --git a/surfsense_web/components/assistant-ui/connector-popup/views/composio-toolkit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/views/composio-toolkit-view.tsx
deleted file mode 100644
index 456835597..000000000
--- a/surfsense_web/components/assistant-ui/connector-popup/views/composio-toolkit-view.tsx
+++ /dev/null
@@ -1,301 +0,0 @@
-"use client";
-
-import {
- ArrowLeft,
- Calendar,
- Check,
- ExternalLink,
- Github,
- Loader2,
- Mail,
- HardDrive,
- MessageSquare,
- FileText,
- Zap,
-} from "lucide-react";
-import Image from "next/image";
-import type { FC } from "react";
-import { useState } from "react";
-import { Badge } from "@/components/ui/badge";
-import { Button } from "@/components/ui/button";
-import { cn } from "@/lib/utils";
-
-interface ComposioToolkit {
- id: string;
- name: string;
- description: string;
- isIndexable: boolean;
-}
-
-interface ComposioToolkitViewProps {
- searchSpaceId: string;
- connectedToolkits: string[];
- onBack: () => void;
- onConnectToolkit: (toolkitId: string) => void;
- isConnecting: boolean;
- connectingToolkitId: string | null;
-}
-
-// Available Composio toolkits
-const COMPOSIO_TOOLKITS: ComposioToolkit[] = [
- {
- id: "googledrive",
- name: "Google Drive",
- description: "Search your Drive files and documents",
- isIndexable: true,
- },
- {
- id: "gmail",
- name: "Gmail",
- description: "Search through your emails",
- isIndexable: true,
- },
- {
- id: "googlecalendar",
- name: "Google Calendar",
- description: "Search through your events",
- isIndexable: true,
- },
- {
- id: "slack",
- name: "Slack",
- description: "Search Slack messages",
- isIndexable: false,
- },
- {
- id: "notion",
- name: "Notion",
- description: "Search Notion pages",
- isIndexable: false,
- },
- {
- id: "github",
- name: "GitHub",
- description: "Search repositories and code",
- isIndexable: false,
- },
-];
-
-// Get icon for toolkit
-const getToolkitIcon = (toolkitId: string, className?: string) => {
- const iconClass = className || "size-5";
-
- switch (toolkitId) {
- case "googledrive":
- return
;
- case "gmail":
- return
;
- case "googlecalendar":
- return
;
- case "slack":
- return
;
- case "notion":
- return
;
- case "github":
- return
;
- default:
- return
;
- }
-};
-
-export const ComposioToolkitView: FC
= ({
- searchSpaceId,
- connectedToolkits,
- onBack,
- onConnectToolkit,
- isConnecting,
- connectingToolkitId,
-}) => {
- const [hoveredToolkit, setHoveredToolkit] = useState(null);
-
- // Separate indexable and non-indexable toolkits
- const indexableToolkits = COMPOSIO_TOOLKITS.filter((t) => t.isIndexable);
- const nonIndexableToolkits = COMPOSIO_TOOLKITS.filter((t) => !t.isIndexable);
-
- return (
-
- {/* Header */}
-
- {/* Back button */}
-
-
- Back to connectors
-
-
- {/* Header content */}
-
-
-
-
-
-
-
- Composio
-
-
- Connect 100+ apps with managed OAuth - no verification needed
-
-
-
-
- Powered by Composio
-
-
-
-
-
- {/* Content */}
-
- {/* Indexable Toolkits (Google Services) */}
-
-
-
Google Services
-
- Indexable
-
-
-
- Connect Google services via Composio's verified OAuth app. Your data will be indexed and searchable.
-
-
- {indexableToolkits.map((toolkit) => {
- const isConnected = connectedToolkits.includes(toolkit.id);
- const isThisConnecting = connectingToolkitId === toolkit.id;
-
- return (
-
setHoveredToolkit(toolkit.id)}
- onMouseLeave={() => setHoveredToolkit(null)}
- className={cn(
- "group relative flex flex-col p-4 rounded-xl border transition-all duration-200",
- isConnected
- ? "border-emerald-500/30 bg-emerald-500/5"
- : "border-border bg-card hover:border-violet-500/30 hover:bg-violet-500/5"
- )}
- >
-
-
- {getToolkitIcon(toolkit.id, "size-5")}
-
- {isConnected && (
-
-
- Connected
-
- )}
-
-
{toolkit.name}
-
- {toolkit.description}
-
-
onConnectToolkit(toolkit.id)}
- disabled={isConnecting || isConnected}
- >
- {isThisConnecting ? (
- <>
-
- Connecting...
- >
- ) : isConnected ? (
- "Connected"
- ) : (
- "Connect"
- )}
-
-
- );
- })}
-
-
-
- {/* Non-Indexable Toolkits (Coming Soon) */}
-
-
-
More Integrations
-
- Coming Soon
-
-
-
- Connect these services for future indexing support. Currently available for connection only.
-
-
- {nonIndexableToolkits.map((toolkit) => (
-
-
-
- {getToolkitIcon(toolkit.id, "size-5")}
-
-
- Soon
-
-
-
{toolkit.name}
-
- {toolkit.description}
-
-
- Coming Soon
-
-
- ))}
-
-
-
- {/* Info footer */}
-
-
-
-
-
-
-
Why use Composio?
-
- Composio provides pre-verified OAuth apps, so you don't need to wait for Google app verification.
- Your data is securely processed through Composio's managed authentication.
-
-
-
-
-
-
- );
-};
From 4cbf80d73a74170a532cf1b531d7a9d670cc4663 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 04:44:37 +0530
Subject: [PATCH 03/51] feat: enhance Composio integration with pagination and
improved error handling
- Updated the list_gmail_messages method to support pagination with page tokens, allowing for more efficient message retrieval.
- Modified the return structure to include next_page_token and result_size_estimate for better client-side handling.
- Improved error handling and logging throughout the Gmail indexing process, ensuring better visibility into failures.
- Implemented batch processing for Gmail messages, committing changes incrementally to prevent data loss.
- Ensured consistent timestamp updates for connectors, even when no documents are indexed, to maintain accurate UI states.
- Refactored the indexing logic to streamline message processing and enhance overall performance.
---
.../app/connectors/composio_connector.py | 15 +-
.../routes/search_source_connectors_routes.py | 16 +-
.../app/services/composio_service.py | 54 +-
.../app/tasks/composio_indexer.py | 579 ++++++++++++------
4 files changed, 451 insertions(+), 213 deletions(-)
diff --git a/surfsense_backend/app/connectors/composio_connector.py b/surfsense_backend/app/connectors/composio_connector.py
index 18fd9564c..21e339d12 100644
--- a/surfsense_backend/app/connectors/composio_connector.py
+++ b/surfsense_backend/app/connectors/composio_connector.py
@@ -151,21 +151,23 @@ class ComposioConnector:
async def list_gmail_messages(
self,
query: str = "",
- max_results: int = 100,
- ) -> tuple[list[dict[str, Any]], str | None]:
+ max_results: int = 50,
+ page_token: str | None = None,
+ ) -> tuple[list[dict[str, Any]], str | None, int | None, str | None]:
"""
- List Gmail messages via Composio.
+ List Gmail messages via Composio with pagination support.
Args:
query: Gmail search query.
- max_results: Maximum number of messages.
+ max_results: Maximum number of messages per page (default: 50).
+ page_token: Optional pagination token for next page.
Returns:
- Tuple of (messages list, error message).
+ Tuple of (messages list, next_page_token, result_size_estimate, error message).
"""
connected_account_id = await self.get_connected_account_id()
if not connected_account_id:
- return [], "No connected account ID found"
+ return [], None, None, "No connected account ID found"
entity_id = await self.get_entity_id()
service = await self._get_service()
@@ -174,6 +176,7 @@ class ComposioConnector:
entity_id=entity_id,
query=query,
max_results=max_results,
+ page_token=page_token,
)
async def get_gmail_message_detail(
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 9ad03fba8..1578ad0d5 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -957,7 +957,7 @@ async def _update_connector_timestamp_by_id(session: AsyncSession, connector_id:
connector = result.scalars().first()
if connector:
- connector.last_indexed_at = datetime.now()
+ connector.last_indexed_at = datetime.now(UTC) # Use UTC for timezone consistency
await session.commit()
logger.info(f"Updated last_indexed_at for connector {connector_id}")
except Exception as e:
@@ -1097,18 +1097,22 @@ async def _run_indexing_with_notifications(
)
await update_timestamp_func(session, connector_id)
+ await session.commit() # Commit timestamp update
logger.info(
f"Indexing completed successfully: {documents_processed} documents processed"
)
# Update notification on success
if notification:
+ # Refresh notification to ensure it's not stale after timestamp update commit
+ await session.refresh(notification)
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
indexed_count=documents_processed,
error_message=None,
)
+ await session.commit() # Commit to ensure Electric SQL syncs the notification update
elif documents_processed > 0:
# Update notification to storing stage
if notification:
@@ -1124,24 +1128,30 @@ async def _run_indexing_with_notifications(
f"Indexing completed successfully: {documents_processed} documents processed"
)
if notification:
+ # Refresh notification to ensure it's not stale after indexing function commits
+ await session.refresh(notification)
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
indexed_count=documents_processed,
error_message=None,
)
+ await session.commit() # Commit to ensure Electric SQL syncs the notification update
else:
# No new documents processed - check if this is an error or just no changes
if error_or_warning:
# Actual failure
logger.error(f"Indexing failed: {error_or_warning}")
if notification:
+ # Refresh notification to ensure it's not stale after indexing function commits
+ await session.refresh(notification)
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
indexed_count=0,
error_message=error_or_warning,
)
+ await session.commit() # Commit to ensure Electric SQL syncs the notification update
else:
# Success - just no new documents to index (all skipped/unchanged)
logger.info(
@@ -1150,13 +1160,17 @@ async def _run_indexing_with_notifications(
# Still update timestamp so ElectricSQL syncs and clears "Syncing" UI
if update_timestamp_func:
await update_timestamp_func(session, connector_id)
+ await session.commit() # Commit timestamp update
if notification:
+ # Refresh notification to ensure it's not stale after timestamp update commit
+ await session.refresh(notification)
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
indexed_count=0,
error_message=None, # No error - sync succeeded
)
+ await session.commit() # Commit to ensure Electric SQL syncs the notification update
except Exception as e:
logger.error(f"Error in indexing task: {e!s}", exc_info=True)
diff --git a/surfsense_backend/app/services/composio_service.py b/surfsense_backend/app/services/composio_service.py
index 17fbd64e0..e32cbf8a0 100644
--- a/surfsense_backend/app/services/composio_service.py
+++ b/surfsense_backend/app/services/composio_service.py
@@ -256,7 +256,6 @@ class ComposioService:
"user_id": getattr(acc, "user_id", None),
})
- logger.info(f"DEBUG: Found {len(result)} TOTAL connections in Composio")
return result
except Exception as e:
logger.error(f"Failed to list all connections: {e!s}")
@@ -273,7 +272,6 @@ class ComposioService:
List of connected account details.
"""
try:
- logger.info(f"DEBUG: Calling connected_accounts.list(user_id='{user_id}')")
accounts_response = self.client.connected_accounts.list(user_id=user_id)
# Handle paginated response (may have .items attribute) or direct list
@@ -358,7 +356,6 @@ class ComposioService:
# - connected_account_id: for authentication
# - user_id: user identifier (SDK uses user_id, not entity_id)
# - dangerously_skip_version_check: skip version check for manual execution
- logger.info(f"DEBUG: Executing tool {tool_name} with params: {params}")
result = self.client.tools.execute(
slug=tool_name,
connected_account_id=connected_account_id,
@@ -366,8 +363,6 @@ class ComposioService:
arguments=params or {},
dangerously_skip_version_check=True,
)
- logger.info(f"DEBUG: Tool {tool_name} raw result type: {type(result)}")
- logger.info(f"DEBUG: Tool {tool_name} raw result: {result}")
return {"success": True, "data": result}
except Exception as e:
logger.error(f"Failed to execute tool {tool_name}: {e!s}")
@@ -417,7 +412,6 @@ class ComposioService:
return [], None, result.get("error", "Unknown error")
data = result.get("data", {})
- logger.info(f"DEBUG: Drive data type: {type(data)}, keys: {data.keys() if isinstance(data, dict) else 'N/A'}")
# Handle nested response structure from Composio
files = []
@@ -429,7 +423,6 @@ class ComposioService:
elif isinstance(data, list):
files = data
- logger.info(f"DEBUG: Extracted {len(files)} drive files")
return files, next_token, None
except Exception as e:
@@ -478,25 +471,30 @@ class ComposioService:
connected_account_id: str,
entity_id: str,
query: str = "",
- max_results: int = 100,
- ) -> tuple[list[dict[str, Any]], str | None]:
+ max_results: int = 50,
+ page_token: str | None = None,
+ ) -> tuple[list[dict[str, Any]], str | None, int | None, str | None]:
"""
- List Gmail messages via Composio.
+ List Gmail messages via Composio with pagination support.
Args:
connected_account_id: Composio connected account ID.
entity_id: The entity/user ID that owns the connected account.
query: Gmail search query.
- max_results: Maximum number of messages to return.
+ max_results: Maximum number of messages to return per page (default: 50 to avoid payload size issues).
+ page_token: Optional pagination token for next page.
Returns:
- Tuple of (messages list, error message).
+ Tuple of (messages list, next_page_token, result_size_estimate, error message).
"""
try:
- # Composio uses snake_case for parameters, max is 500
- params = {"max_results": min(max_results, 500)}
+ # Use smaller batch size to avoid 413 payload too large errors
+ # Composio uses snake_case for parameters
+ params = {"max_results": min(max_results, 50)} # Reduced from 500 to 50
if query:
params["query"] = query # Composio uses 'query' not 'q'
+ if page_token:
+ params["page_token"] = page_token
result = await self.execute_tool(
connected_account_id=connected_account_id,
@@ -506,25 +504,38 @@ class ComposioService:
)
if not result.get("success"):
- return [], result.get("error", "Unknown error")
+ return [], None, result.get("error", "Unknown error")
data = result.get("data", {})
- logger.info(f"DEBUG: Gmail data type: {type(data)}, keys: {data.keys() if isinstance(data, dict) else 'N/A'}")
- logger.info(f"DEBUG: Gmail full data: {data}")
# Try different possible response structures
messages = []
+ next_token = None
+ result_size_estimate = None
if isinstance(data, dict):
messages = data.get("messages", []) or data.get("data", {}).get("messages", []) or data.get("emails", [])
+ # Check for pagination token in various possible locations
+ next_token = (
+ data.get("nextPageToken")
+ or data.get("next_page_token")
+ or data.get("data", {}).get("nextPageToken")
+ or data.get("data", {}).get("next_page_token")
+ )
+ # Extract resultSizeEstimate if available (Gmail API provides this)
+ result_size_estimate = (
+ data.get("resultSizeEstimate")
+ or data.get("result_size_estimate")
+ or data.get("data", {}).get("resultSizeEstimate")
+ or data.get("data", {}).get("result_size_estimate")
+ )
elif isinstance(data, list):
messages = data
- logger.info(f"DEBUG: Extracted {len(messages)} messages")
- return messages, None
+ return messages, next_token, result_size_estimate, None
except Exception as e:
logger.error(f"Failed to list Gmail messages: {e!s}")
- return [], str(e)
+ return [], None, str(e)
async def get_gmail_message_detail(
self, connected_account_id: str, entity_id: str, message_id: str
@@ -603,8 +614,6 @@ class ComposioService:
return [], result.get("error", "Unknown error")
data = result.get("data", {})
- logger.info(f"DEBUG: Calendar data type: {type(data)}, keys: {data.keys() if isinstance(data, dict) else 'N/A'}")
- logger.info(f"DEBUG: Calendar full data: {data}")
# Try different possible response structures
events = []
@@ -613,7 +622,6 @@ class ComposioService:
elif isinstance(data, list):
events = data
- logger.info(f"DEBUG: Extracted {len(events)} calendar events")
return events, None
except Exception as e:
diff --git a/surfsense_backend/app/tasks/composio_indexer.py b/surfsense_backend/app/tasks/composio_indexer.py
index 8762561ee..c9cd74234 100644
--- a/surfsense_backend/app/tasks/composio_indexer.py
+++ b/surfsense_backend/app/tasks/composio_indexer.py
@@ -9,6 +9,7 @@ to avoid circular import issues with the connector_indexers package.
import logging
from datetime import UTC, datetime
+from typing import Any
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession
@@ -26,6 +27,7 @@ from app.db import (
from app.services.composio_service import INDEXABLE_TOOLKITS, TOOLKIT_TO_DOCUMENT_TYPE
from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService
+from app.tasks.connector_indexers.base import calculate_date_range
from app.utils.document_converters import (
create_document_chunks,
generate_content_hash,
@@ -75,7 +77,7 @@ async def update_connector_last_indexed(
) -> None:
"""Update the last_indexed_at timestamp for a connector."""
if update_last_indexed:
- connector.last_indexed_at = datetime.now()
+ connector.last_indexed_at = datetime.now(UTC) # Use UTC for timezone consistency
logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
@@ -287,6 +289,9 @@ async def _index_composio_google_drive(
await task_logger.log_task_success(
log_entry, success_msg, {"files_count": 0}
)
+ # CRITICAL: Update timestamp even when no files found so Electric SQL syncs and UI shows indexed status
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+ await session.commit()
return 0, None # Return None (not error) when no items found - this is success with 0 items
logger.info(f"Found {len(all_files)} Google Drive files to index via Composio")
@@ -380,6 +385,13 @@ async def _index_composio_google_drive(
existing_document.updated_at = get_current_timestamp()
documents_indexed += 1
+
+ # Batch commit every 10 documents
+ if documents_indexed % 10 == 0:
+ logger.info(
+ f"Committing batch: {documents_indexed} Google Drive files processed so far"
+ )
+ await session.commit()
continue
# Create new document
@@ -425,7 +437,11 @@ async def _index_composio_google_drive(
session.add(document)
documents_indexed += 1
+ # Batch commit every 10 documents
if documents_indexed % 10 == 0:
+ logger.info(
+ f"Committing batch: {documents_indexed} Google Drive files processed so far"
+ )
await session.commit()
except Exception as e:
@@ -433,10 +449,19 @@ async def _index_composio_google_drive(
documents_skipped += 1
continue
- if documents_indexed > 0:
- await update_connector_last_indexed(session, connector, update_last_indexed)
+ # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+ # This ensures the UI shows "Last indexed" instead of "Never indexed"
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+ # Final commit to ensure all documents are persisted (safety net)
+ # This matches the pattern used in non-Composio Gmail indexer
+ logger.info(
+ f"Final commit: Total {documents_indexed} Google Drive files processed"
+ )
await session.commit()
+ logger.info(
+ "Successfully committed all Composio Google Drive document changes to database"
+ )
await task_logger.log_task_success(
log_entry,
@@ -454,154 +479,89 @@ async def _index_composio_google_drive(
return 0, f"Failed to index Google Drive via Composio: {e!s}"
-async def _index_composio_gmail(
+async def _process_gmail_message_batch(
session: AsyncSession,
- connector,
+ messages: list[dict[str, Any]],
+ composio_connector: ComposioConnector,
connector_id: int,
search_space_id: int,
user_id: str,
- start_date: str | None,
- end_date: str | None,
- task_logger: TaskLoggingService,
- log_entry,
- update_last_indexed: bool = True,
- max_items: int = 1000,
-) -> tuple[int, str]:
- """Index Gmail messages via Composio."""
- try:
- composio_connector = ComposioConnector(session, connector_id)
+ total_documents_indexed: int = 0,
+) -> tuple[int, int]:
+ """
+ Process a batch of Gmail messages and index them.
+
+ Args:
+ total_documents_indexed: Running total of documents indexed so far (for batch commits).
+
+ Returns:
+ Tuple of (documents_indexed, documents_skipped)
+ """
+ documents_indexed = 0
+ documents_skipped = 0
- await task_logger.log_task_progress(
- log_entry,
- f"Fetching Gmail messages via Composio for connector {connector_id}",
- {"stage": "fetching_messages"},
- )
+ for message in messages:
+ try:
+ # Composio uses 'messageId' (camelCase), not 'id'
+ message_id = message.get("messageId", "") or message.get("id", "")
+ if not message_id:
+ documents_skipped += 1
+ continue
- # Build query with date range
- query_parts = []
- if start_date:
- query_parts.append(f"after:{start_date.replace('-', '/')}")
- if end_date:
- query_parts.append(f"before:{end_date.replace('-', '/')}")
- query = " ".join(query_parts)
+ # Composio's GMAIL_FETCH_EMAILS already returns full message content
+ # No need for a separate detail API call
- messages, error = await composio_connector.list_gmail_messages(
- query=query,
- max_results=max_items,
- )
+ # Extract message info from Composio response
+ # Composio structure: messageId, messageText, messageTimestamp, payload.headers, labelIds
+ payload = message.get("payload", {})
+ headers = payload.get("headers", [])
- if error:
- await task_logger.log_task_failure(
- log_entry, f"Failed to fetch Gmail messages: {error}", {}
+ subject = "No Subject"
+ sender = "Unknown Sender"
+ date_str = message.get("messageTimestamp", "Unknown Date")
+
+ for header in headers:
+ name = header.get("name", "").lower()
+ value = header.get("value", "")
+ if name == "subject":
+ subject = value
+ elif name == "from":
+ sender = value
+ elif name == "date":
+ date_str = value
+
+ # Format to markdown using the full message data
+ markdown_content = composio_connector.format_gmail_message_to_markdown(message)
+
+ # Check for empty content (defensive parsing per Composio best practices)
+ if not markdown_content.strip():
+ logger.warning(f"Skipping Gmail message with no content: {subject}")
+ documents_skipped += 1
+ continue
+
+ # Generate unique identifier
+ document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"])
+ unique_identifier_hash = generate_unique_identifier_hash(
+ document_type, f"gmail_{message_id}", search_space_id
)
- return 0, f"Failed to fetch Gmail messages: {error}"
- if not messages:
- success_msg = "No Gmail messages found in the specified date range"
- await task_logger.log_task_success(
- log_entry, success_msg, {"messages_count": 0}
+ content_hash = generate_content_hash(markdown_content, search_space_id)
+
+ existing_document = await check_document_by_unique_identifier(
+ session, unique_identifier_hash
)
- return 0, None # Return None (not error) when no items found - this is success with 0 items
- logger.info(f"Found {len(messages)} Gmail messages to index via Composio")
+ # Get label IDs from Composio response
+ label_ids = message.get("labelIds", [])
+ # Extract thread_id if available (for consistency with non-Composio implementation)
+ thread_id = message.get("threadId", "") or message.get("thread_id", "")
- documents_indexed = 0
- documents_skipped = 0
-
- for message in messages:
- try:
- # Composio uses 'messageId' (camelCase), not 'id'
- message_id = message.get("messageId", "") or message.get("id", "")
- if not message_id:
+ if existing_document:
+ if existing_document.content_hash == content_hash:
documents_skipped += 1
continue
- # Composio's GMAIL_FETCH_EMAILS already returns full message content
- # No need for a separate detail API call
-
- # Extract message info from Composio response
- # Composio structure: messageId, messageText, messageTimestamp, payload.headers, labelIds
- payload = message.get("payload", {})
- headers = payload.get("headers", [])
-
- subject = "No Subject"
- sender = "Unknown Sender"
- date_str = message.get("messageTimestamp", "Unknown Date")
-
- for header in headers:
- name = header.get("name", "").lower()
- value = header.get("value", "")
- if name == "subject":
- subject = value
- elif name == "from":
- sender = value
- elif name == "date":
- date_str = value
-
- # Format to markdown using the full message data
- markdown_content = composio_connector.format_gmail_message_to_markdown(message)
-
- # Generate unique identifier
- document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"])
- unique_identifier_hash = generate_unique_identifier_hash(
- document_type, f"gmail_{message_id}", search_space_id
- )
-
- content_hash = generate_content_hash(markdown_content, search_space_id)
-
- existing_document = await check_document_by_unique_identifier(
- session, unique_identifier_hash
- )
-
- # Get label IDs from Composio response
- label_ids = message.get("labelIds", [])
-
- if existing_document:
- if existing_document.content_hash == content_hash:
- documents_skipped += 1
- continue
-
- # Update existing
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
-
- if user_llm:
- document_metadata = {
- "message_id": message_id,
- "subject": subject,
- "sender": sender,
- "document_type": "Gmail Message (Composio)",
- }
- summary_content, summary_embedding = await generate_document_summary(
- markdown_content, user_llm, document_metadata
- )
- else:
- summary_content = f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
- summary_embedding = config.embedding_model_instance.embed(summary_content)
-
- chunks = await create_document_chunks(markdown_content)
-
- existing_document.title = f"Gmail: {subject}"
- existing_document.content = summary_content
- existing_document.content_hash = content_hash
- existing_document.embedding = summary_embedding
- existing_document.document_metadata = {
- "message_id": message_id,
- "subject": subject,
- "sender": sender,
- "date": date_str,
- "labels": label_ids,
- "connector_id": connector_id,
- "source": "composio",
- }
- existing_document.chunks = chunks
- existing_document.updated_at = get_current_timestamp()
-
- documents_indexed += 1
- continue
-
- # Create new document
+ # Update existing
user_llm = await get_user_long_context_llm(
session, user_id, search_space_id
)
@@ -609,6 +569,7 @@ async def _index_composio_gmail(
if user_llm:
document_metadata = {
"message_id": message_id,
+ "thread_id": thread_id,
"subject": subject,
"sender": sender,
"document_type": "Gmail Message (Composio)",
@@ -622,53 +583,276 @@ async def _index_composio_gmail(
chunks = await create_document_chunks(markdown_content)
- document = Document(
- search_space_id=search_space_id,
- title=f"Gmail: {subject}",
- document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]),
- document_metadata={
- "message_id": message_id,
- "subject": subject,
- "sender": sender,
- "date": date_str,
- "labels": label_ids,
- "connector_id": connector_id,
- "toolkit_id": "gmail",
- "source": "composio",
- },
- content=summary_content,
- content_hash=content_hash,
- unique_identifier_hash=unique_identifier_hash,
- embedding=summary_embedding,
- chunks=chunks,
- updated_at=get_current_timestamp(),
- )
- session.add(document)
+ existing_document.title = f"Gmail: {subject}"
+ existing_document.content = summary_content
+ existing_document.content_hash = content_hash
+ existing_document.embedding = summary_embedding
+ existing_document.document_metadata = {
+ "message_id": message_id,
+ "thread_id": thread_id,
+ "subject": subject,
+ "sender": sender,
+ "date": date_str,
+ "labels": label_ids,
+ "connector_id": connector_id,
+ "source": "composio",
+ }
+ existing_document.chunks = chunks
+ existing_document.updated_at = get_current_timestamp()
+
documents_indexed += 1
-
- if documents_indexed % 10 == 0:
+
+ # Batch commit every 10 documents
+ current_total = total_documents_indexed + documents_indexed
+ if current_total % 10 == 0:
+ logger.info(
+ f"Committing batch: {current_total} Gmail messages processed so far"
+ )
await session.commit()
-
- except Exception as e:
- logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
- documents_skipped += 1
continue
- if documents_indexed > 0:
- await update_connector_last_indexed(session, connector, update_last_indexed)
+ # Create new document
+ user_llm = await get_user_long_context_llm(
+ session, user_id, search_space_id
+ )
+ if user_llm:
+ document_metadata = {
+ "message_id": message_id,
+ "thread_id": thread_id,
+ "subject": subject,
+ "sender": sender,
+ "document_type": "Gmail Message (Composio)",
+ }
+ summary_content, summary_embedding = await generate_document_summary(
+ markdown_content, user_llm, document_metadata
+ )
+ else:
+ summary_content = f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
+ summary_embedding = config.embedding_model_instance.embed(summary_content)
+
+ chunks = await create_document_chunks(markdown_content)
+
+ document = Document(
+ search_space_id=search_space_id,
+ title=f"Gmail: {subject}",
+ document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]),
+ document_metadata={
+ "message_id": message_id,
+ "thread_id": thread_id,
+ "subject": subject,
+ "sender": sender,
+ "date": date_str,
+ "labels": label_ids,
+ "connector_id": connector_id,
+ "toolkit_id": "gmail",
+ "source": "composio",
+ },
+ content=summary_content,
+ content_hash=content_hash,
+ unique_identifier_hash=unique_identifier_hash,
+ embedding=summary_embedding,
+ chunks=chunks,
+ updated_at=get_current_timestamp(),
+ )
+ session.add(document)
+ documents_indexed += 1
+
+ # Batch commit every 10 documents
+ current_total = total_documents_indexed + documents_indexed
+ if current_total % 10 == 0:
+ logger.info(
+ f"Committing batch: {current_total} Gmail messages processed so far"
+ )
+ await session.commit()
+
+ except Exception as e:
+ logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
+ documents_skipped += 1
+ # Rollback on error to avoid partial state (per Composio best practices)
+ try:
+ await session.rollback()
+ except Exception as rollback_error:
+ logger.error(f"Error during rollback: {rollback_error!s}", exc_info=True)
+ continue
+
+ return documents_indexed, documents_skipped
+
+
+async def _index_composio_gmail(
+ session: AsyncSession,
+ connector,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ start_date: str | None,
+ end_date: str | None,
+ task_logger: TaskLoggingService,
+ log_entry,
+ update_last_indexed: bool = True,
+ max_items: int = 1000,
+) -> tuple[int, str]:
+ """Index Gmail messages via Composio with pagination and incremental processing."""
+ try:
+ composio_connector = ComposioConnector(session, connector_id)
+
+ # Normalize date values - handle "undefined" strings from frontend
+ if start_date == "undefined" or start_date == "":
+ start_date = None
+ if end_date == "undefined" or end_date == "":
+ end_date = None
+
+ # Calculate date range with defaults (uses last_indexed_at or 365 days back)
+ # This ensures indexing works even when user doesn't specify dates
+ start_date_str, end_date_str = calculate_date_range(
+ connector, start_date, end_date, default_days_back=365
+ )
+
+ # Build query with date range
+ query_parts = []
+ if start_date_str:
+ query_parts.append(f"after:{start_date_str.replace('-', '/')}")
+ if end_date_str:
+ query_parts.append(f"before:{end_date_str.replace('-', '/')}")
+ query = " ".join(query_parts) if query_parts else ""
+
+ logger.info(
+ f"Gmail query for connector {connector_id}: '{query}' "
+ f"(start_date={start_date_str}, end_date={end_date_str})"
+ )
+
+ # Use smaller batch size to avoid 413 payload too large errors
+ batch_size = 50
+ page_token = None
+ total_documents_indexed = 0
+ total_documents_skipped = 0
+ total_messages_fetched = 0
+ result_size_estimate = None # Will be set from first API response
+
+ while total_messages_fetched < max_items:
+ # Calculate how many messages to fetch in this batch
+ remaining = max_items - total_messages_fetched
+ current_batch_size = min(batch_size, remaining)
+
+ # Use result_size_estimate if available, otherwise fall back to max_items
+ estimated_total = result_size_estimate if result_size_estimate is not None else max_items
+ # Cap estimated_total at max_items to avoid showing misleading progress
+ estimated_total = min(estimated_total, max_items)
+
+ await task_logger.log_task_progress(
+ log_entry,
+ f"Fetching Gmail messages batch via Composio for connector {connector_id} "
+ f"({total_messages_fetched}/{estimated_total} fetched, {total_documents_indexed} indexed)",
+ {
+ "stage": "fetching_messages",
+ "batch_size": current_batch_size,
+ "total_fetched": total_messages_fetched,
+ "total_indexed": total_documents_indexed,
+ "estimated_total": estimated_total,
+ },
+ )
+
+ # Fetch batch of messages
+ messages, next_token, result_size_estimate_batch, error = await composio_connector.list_gmail_messages(
+ query=query,
+ max_results=current_batch_size,
+ page_token=page_token,
+ )
+
+ if error:
+ await task_logger.log_task_failure(
+ log_entry, f"Failed to fetch Gmail messages: {error}", {}
+ )
+ return 0, f"Failed to fetch Gmail messages: {error}"
+
+ if not messages:
+ # No more messages available
+ break
+
+ # Update result_size_estimate from first response (Gmail provides this estimate)
+ if result_size_estimate is None and result_size_estimate_batch is not None:
+ result_size_estimate = result_size_estimate_batch
+ logger.info(f"Gmail API estimated {result_size_estimate} total messages for query: '{query}'")
+
+ total_messages_fetched += len(messages)
+ # Recalculate estimated_total after potentially updating result_size_estimate
+ estimated_total = result_size_estimate if result_size_estimate is not None else max_items
+ estimated_total = min(estimated_total, max_items)
+
+ logger.info(
+ f"Fetched batch of {len(messages)} Gmail messages "
+ f"(total: {total_messages_fetched}/{estimated_total})"
+ )
+
+ # Process batch incrementally
+ batch_indexed, batch_skipped = await _process_gmail_message_batch(
+ session=session,
+ messages=messages,
+ composio_connector=composio_connector,
+ connector_id=connector_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ total_documents_indexed=total_documents_indexed,
+ )
+
+ total_documents_indexed += batch_indexed
+ total_documents_skipped += batch_skipped
+
+ logger.info(
+ f"Processed batch: {batch_indexed} indexed, {batch_skipped} skipped "
+ f"(total: {total_documents_indexed} indexed, {total_documents_skipped} skipped)"
+ )
+
+ # Batch commits happen in _process_gmail_message_batch every 10 documents
+ # This ensures progress is saved incrementally, preventing data loss on crashes
+
+ # Check if we should continue
+ if not next_token:
+ # No more pages available
+ break
+
+ if len(messages) < current_batch_size:
+ # Last page had fewer items than requested, we're done
+ break
+
+ # Continue with next page
+ page_token = next_token
+
+ if total_messages_fetched == 0:
+ success_msg = "No Gmail messages found in the specified date range"
+ await task_logger.log_task_success(
+ log_entry, success_msg, {"messages_count": 0}
+ )
+ # CRITICAL: Update timestamp even when no messages found so Electric SQL syncs and UI shows indexed status
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+ await session.commit()
+ return 0, None # Return None (not error) when no items found
+
+ # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+ # This ensures the UI shows "Last indexed" instead of "Never indexed"
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+
+ # Final commit to ensure all documents are persisted (safety net)
+ # This matches the pattern used in non-Composio Gmail indexer
+ logger.info(
+ f"Final commit: Total {total_documents_indexed} Gmail messages processed"
+ )
await session.commit()
+ logger.info(
+ "Successfully committed all Composio Gmail document changes to database"
+ )
await task_logger.log_task_success(
log_entry,
f"Successfully completed Gmail indexing via Composio for connector {connector_id}",
{
- "documents_indexed": documents_indexed,
- "documents_skipped": documents_skipped,
+ "documents_indexed": total_documents_indexed,
+ "documents_skipped": total_documents_skipped,
+ "messages_fetched": total_messages_fetched,
},
)
- return documents_indexed, None
+ return total_documents_indexed, None
except Exception as e:
logger.error(f"Failed to index Gmail via Composio: {e!s}", exc_info=True)
@@ -689,8 +873,6 @@ async def _index_composio_google_calendar(
max_items: int = 2500,
) -> tuple[int, str]:
"""Index Google Calendar events via Composio."""
- from datetime import datetime, timedelta
-
try:
composio_connector = ComposioConnector(session, connector_id)
@@ -700,18 +882,26 @@ async def _index_composio_google_calendar(
{"stage": "fetching_events"},
)
- # Build time range
- if start_date:
- time_min = f"{start_date}T00:00:00Z"
- else:
- # Default to 365 days ago
- default_start = datetime.now() - timedelta(days=365)
- time_min = default_start.strftime("%Y-%m-%dT00:00:00Z")
+ # Normalize date values - handle "undefined" strings from frontend
+ if start_date == "undefined" or start_date == "":
+ start_date = None
+ if end_date == "undefined" or end_date == "":
+ end_date = None
- if end_date:
- time_max = f"{end_date}T23:59:59Z"
- else:
- time_max = datetime.now().strftime("%Y-%m-%dT23:59:59Z")
+ # Calculate date range with defaults (uses last_indexed_at or 365 days back)
+ # This ensures indexing works even when user doesn't specify dates
+ start_date_str, end_date_str = calculate_date_range(
+ connector, start_date, end_date, default_days_back=365
+ )
+
+ # Build time range for API call
+ time_min = f"{start_date_str}T00:00:00Z"
+ time_max = f"{end_date_str}T23:59:59Z"
+
+ logger.info(
+ f"Google Calendar query for connector {connector_id}: "
+ f"(start_date={start_date_str}, end_date={end_date_str})"
+ )
events, error = await composio_connector.list_calendar_events(
time_min=time_min,
@@ -730,6 +920,9 @@ async def _index_composio_google_calendar(
await task_logger.log_task_success(
log_entry, success_msg, {"events_count": 0}
)
+ # CRITICAL: Update timestamp even when no events found so Electric SQL syncs and UI shows indexed status
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+ await session.commit()
return 0, None # Return None (not error) when no items found - this is success with 0 items
logger.info(f"Found {len(events)} Google Calendar events to index via Composio")
@@ -814,6 +1007,13 @@ async def _index_composio_google_calendar(
existing_document.updated_at = get_current_timestamp()
documents_indexed += 1
+
+ # Batch commit every 10 documents
+ if documents_indexed % 10 == 0:
+ logger.info(
+ f"Committing batch: {documents_indexed} Google Calendar events processed so far"
+ )
+ await session.commit()
continue
# Create new document
@@ -863,7 +1063,11 @@ async def _index_composio_google_calendar(
session.add(document)
documents_indexed += 1
+ # Batch commit every 10 documents
if documents_indexed % 10 == 0:
+ logger.info(
+ f"Committing batch: {documents_indexed} Google Calendar events processed so far"
+ )
await session.commit()
except Exception as e:
@@ -871,10 +1075,19 @@ async def _index_composio_google_calendar(
documents_skipped += 1
continue
- if documents_indexed > 0:
- await update_connector_last_indexed(session, connector, update_last_indexed)
+ # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+ # This ensures the UI shows "Last indexed" instead of "Never indexed"
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+ # Final commit to ensure all documents are persisted (safety net)
+ # This matches the pattern used in non-Composio Gmail indexer
+ logger.info(
+ f"Final commit: Total {documents_indexed} Google Calendar events processed"
+ )
await session.commit()
+ logger.info(
+ "Successfully committed all Composio Google Calendar document changes to database"
+ )
await task_logger.log_task_success(
log_entry,
From e6a4ac7c9cd14c3bcae4bbeb91b7b58abd538b80 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 04:56:15 +0530
Subject: [PATCH 04/51] fix: change animation from spring to tween for sliding
---
.../components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx | 2 +-
.../components/layout/ui/sidebar/AllSharedChatsSidebar.tsx | 2 +-
surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx
index 39f1b95bc..c094ff44a 100644
--- a/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx
@@ -231,7 +231,7 @@ export function AllPrivateChatsSidebar({
initial={{ x: "-100%" }}
animate={{ x: 0 }}
exit={{ x: "-100%" }}
- transition={{ type: "spring", damping: 25, stiffness: 300 }}
+ transition={{ type: "tween", duration: 0.3, ease: "easeOut" }}
className="fixed inset-y-0 left-0 z-70 w-80 bg-background shadow-xl flex flex-col pointer-events-auto isolate"
role="dialog"
aria-modal="true"
diff --git a/surfsense_web/components/layout/ui/sidebar/AllSharedChatsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/AllSharedChatsSidebar.tsx
index 8dd593945..76dbf1aad 100644
--- a/surfsense_web/components/layout/ui/sidebar/AllSharedChatsSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/AllSharedChatsSidebar.tsx
@@ -231,7 +231,7 @@ export function AllSharedChatsSidebar({
initial={{ x: "-100%" }}
animate={{ x: 0 }}
exit={{ x: "-100%" }}
- transition={{ type: "spring", damping: 25, stiffness: 300 }}
+ transition={{ type: "tween", duration: 0.3, ease: "easeOut" }}
className="fixed inset-y-0 left-0 z-70 w-80 bg-background shadow-xl flex flex-col pointer-events-auto isolate"
role="dialog"
aria-modal="true"
diff --git a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
index 166d77eca..a3fd3ea14 100644
--- a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
@@ -446,7 +446,7 @@ export function InboxSidebar({
initial={{ x: "-100%" }}
animate={{ x: 0 }}
exit={{ x: "-100%" }}
- transition={{ type: "spring", damping: 25, stiffness: 300 }}
+ transition={{ type: "tween", duration: 0.3, ease: "easeOut" }}
className="fixed inset-y-0 left-0 z-70 w-90 bg-background shadow-xl flex flex-col pointer-events-auto isolate"
role="dialog"
aria-modal="true"
From 7ec7ed5c3b6dde85127e8809d7c07c47fe62fd87 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 05:17:28 +0530
Subject: [PATCH 05/51] feat: enhance Composio Google Drive integration with
folder and file selection
- Added a new endpoint to list folders and files in a user's Composio Google Drive, supporting hierarchical structure.
- Implemented UI components for selecting specific folders and files to index, improving user control over indexing options.
- Introduced indexing options for maximum files per folder and inclusion of subfolders, allowing for customizable indexing behavior.
- Enhanced error handling and logging for Composio Drive operations, ensuring better visibility into issues during file retrieval and indexing.
- Updated the Composio configuration component to reflect new selection capabilities and indexing options.
---
.../app/routes/composio_routes.py | 122 ++++++
.../routes/search_source_connectors_routes.py | 40 +-
.../app/services/composio_service.py | 6 +-
.../app/tasks/composio_indexer.py | 195 +++++++++-
.../components/composio-config.tsx | 294 +++++++++++++-
.../views/connector-edit-view.tsx | 7 +-
.../hooks/use-connector-dialog.ts | 8 +-
.../connectors/composio-drive-folder-tree.tsx | 365 ++++++++++++++++++
.../hooks/use-composio-drive-folders.ts | 29 ++
.../lib/apis/connectors-api.service.ts | 23 ++
surfsense_web/lib/query-client/cache-keys.ts | 4 +
11 files changed, 1069 insertions(+), 24 deletions(-)
create mode 100644 surfsense_web/components/connectors/composio-drive-folder-tree.tsx
create mode 100644 surfsense_web/hooks/use-composio-drive-folders.ts
diff --git a/surfsense_backend/app/routes/composio_routes.py b/surfsense_backend/app/routes/composio_routes.py
index 77891fc88..25e545dfb 100644
--- a/surfsense_backend/app/routes/composio_routes.py
+++ b/surfsense_backend/app/routes/composio_routes.py
@@ -8,6 +8,7 @@ Endpoints:
- GET /composio/toolkits - List available Composio toolkits
- GET /auth/composio/connector/add - Initiate OAuth for a specific toolkit
- GET /auth/composio/connector/callback - Handle OAuth callback
+- GET /connectors/{connector_id}/composio-drive/folders - List folders/files for Composio Google Drive
"""
import asyncio
@@ -369,3 +370,124 @@ async def composio_callback(
raise HTTPException(
status_code=500, detail=f"Failed to complete Composio OAuth: {e!s}"
) from e
+
+
+@router.get("/connectors/{connector_id}/composio-drive/folders")
+async def list_composio_drive_folders(
+ connector_id: int,
+ parent_id: str | None = None,
+ session: AsyncSession = Depends(get_async_session),
+ user: User = Depends(current_active_user),
+):
+ """
+ List folders AND files in user's Google Drive via Composio with hierarchical support.
+
+ This is called at index time from the manage connector page to display
+ the complete file system (folders and files). Only folders are selectable.
+
+ Args:
+ connector_id: ID of the Composio Google Drive connector
+ parent_id: Optional parent folder ID to list contents (None for root)
+
+ Returns:
+ JSON with list of items: {
+ "items": [
+ {"id": str, "name": str, "mimeType": str, "isFolder": bool, ...},
+ ...
+ ]
+ }
+ """
+ if not ComposioService.is_enabled():
+ raise HTTPException(
+ status_code=503,
+ detail="Composio integration is not enabled.",
+ )
+
+ try:
+ # Get connector and verify ownership
+ result = await session.execute(
+ select(SearchSourceConnector).filter(
+ SearchSourceConnector.id == connector_id,
+ SearchSourceConnector.user_id == user.id,
+ SearchSourceConnector.connector_type
+ == SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
+ )
+ )
+ connector = result.scalars().first()
+
+ if not connector:
+ raise HTTPException(
+ status_code=404,
+ detail="Composio Google Drive connector not found or access denied",
+ )
+
+ # Get Composio connected account ID from config
+ composio_connected_account_id = connector.config.get("composio_connected_account_id")
+ if not composio_connected_account_id:
+ raise HTTPException(
+ status_code=400,
+ detail="Composio connected account not found. Please reconnect the connector.",
+ )
+
+ # Initialize Composio service and fetch files
+ service = ComposioService()
+ entity_id = f"surfsense_{user.id}"
+
+ # Fetch files/folders from Composio Google Drive
+ files, next_token, error = await service.get_drive_files(
+ connected_account_id=composio_connected_account_id,
+ entity_id=entity_id,
+ folder_id=parent_id,
+ page_size=100,
+ )
+
+ if error:
+ logger.error(f"Failed to list Composio Drive files: {error}")
+ raise HTTPException(
+ status_code=500, detail=f"Failed to list folder contents: {error}"
+ )
+
+ # Transform files to match the expected format with isFolder field
+ items = []
+ for file_info in files:
+ file_id = file_info.get("id", "") or file_info.get("fileId", "")
+ file_name = file_info.get("name", "") or file_info.get("fileName", "") or "Untitled"
+ mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
+
+ if not file_id:
+ continue
+
+ is_folder = mime_type == "application/vnd.google-apps.folder"
+
+ items.append({
+ "id": file_id,
+ "name": file_name,
+ "mimeType": mime_type,
+ "isFolder": is_folder,
+ "parents": file_info.get("parents", []),
+ "size": file_info.get("size"),
+ "iconLink": file_info.get("iconLink"),
+ })
+
+ # Sort: folders first, then files, both alphabetically
+ folders = sorted([item for item in items if item["isFolder"]], key=lambda x: x["name"].lower())
+ files_list = sorted([item for item in items if not item["isFolder"]], key=lambda x: x["name"].lower())
+ items = folders + files_list
+
+ folder_count = len(folders)
+ file_count = len(files_list)
+
+ logger.info(
+ f"✅ Listed {len(items)} total items ({folder_count} folders, {file_count} files) for Composio connector {connector_id}"
+ + (f" in folder {parent_id}" if parent_id else " in ROOT")
+ )
+
+ return {"items": items}
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error listing Composio Drive contents: {e!s}", exc_info=True)
+ raise HTTPException(
+ status_code=500, detail=f"Failed to list Drive contents: {e!s}"
+ ) from e
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 1578ad0d5..89cdd9f95 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -897,8 +897,46 @@ async def index_connector_content(
)
response_message = "Web page indexing started in the background."
+ elif connector.connector_type == SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR:
+ from app.tasks.celery_tasks.connector_tasks import (
+ index_composio_connector_task,
+ )
+
+ # For Composio Google Drive, if drive_items is provided, update connector config
+ # This allows the UI to pass folder/file selection like the regular Google Drive connector
+ if drive_items and drive_items.has_items():
+ # Update connector config with the selected folders/files
+ config = connector.config or {}
+ config["selected_folders"] = [{"id": f.id, "name": f.name} for f in drive_items.folders]
+ config["selected_files"] = [{"id": f.id, "name": f.name} for f in drive_items.files]
+ if drive_items.indexing_options:
+ config["indexing_options"] = {
+ "max_files_per_folder": drive_items.indexing_options.max_files_per_folder,
+ "incremental_sync": drive_items.indexing_options.incremental_sync,
+ "include_subfolders": drive_items.indexing_options.include_subfolders,
+ }
+ connector.config = config
+ from sqlalchemy.orm.attributes import flag_modified
+ flag_modified(connector, "config")
+ await session.commit()
+ await session.refresh(connector)
+
+ logger.info(
+ f"Triggering Composio Google Drive indexing for connector {connector_id} into search space {search_space_id}, "
+ f"folders: {len(drive_items.folders)}, files: {len(drive_items.files)}"
+ )
+ else:
+ logger.info(
+ f"Triggering Composio Google Drive indexing for connector {connector_id} into search space {search_space_id} "
+ f"using existing config (from {indexing_from} to {indexing_to})"
+ )
+
+ index_composio_connector_task.delay(
+ connector_id, search_space_id, str(user.id), indexing_from, indexing_to
+ )
+ response_message = "Composio Google Drive indexing started in the background."
+
elif connector.connector_type in [
- SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
]:
diff --git a/surfsense_backend/app/services/composio_service.py b/surfsense_backend/app/services/composio_service.py
index e32cbf8a0..5a6148533 100644
--- a/surfsense_backend/app/services/composio_service.py
+++ b/surfsense_backend/app/services/composio_service.py
@@ -397,7 +397,11 @@ class ComposioService:
"page_size": min(page_size, 100),
}
if folder_id:
- params["folder_id"] = folder_id
+ # List contents of a specific folder (exclude shortcuts - we don't have access to them)
+ params["q"] = f"'{folder_id}' in parents and trashed = false and mimeType != 'application/vnd.google-apps.shortcut'"
+ else:
+ # List root-level items only (My Drive root), exclude shortcuts
+ params["q"] = "'root' in parents and trashed = false and mimeType != 'application/vnd.google-apps.shortcut'"
if page_token:
params["page_token"] = page_token
diff --git a/surfsense_backend/app/tasks/composio_indexer.py b/surfsense_backend/app/tasks/composio_indexer.py
index c9cd74234..f568d4134 100644
--- a/surfsense_backend/app/tasks/composio_indexer.py
+++ b/surfsense_backend/app/tasks/composio_indexer.py
@@ -252,37 +252,123 @@ async def _index_composio_google_drive(
update_last_indexed: bool = True,
max_items: int = 1000,
) -> tuple[int, str]:
- """Index Google Drive files via Composio."""
+ """Index Google Drive files via Composio.
+
+ Supports folder/file selection via connector config:
+ - selected_folders: List of {id, name} for folders to index
+ - selected_files: List of {id, name} for individual files to index
+ - indexing_options: {max_files_per_folder, incremental_sync, include_subfolders}
+ """
try:
composio_connector = ComposioConnector(session, connector_id)
+ connector_config = await composio_connector.get_config()
+
+ # Get folder/file selection configuration
+ selected_folders = connector_config.get("selected_folders", [])
+ selected_files = connector_config.get("selected_files", [])
+ indexing_options = connector_config.get("indexing_options", {})
+
+ max_files_per_folder = indexing_options.get("max_files_per_folder", 100)
+ include_subfolders = indexing_options.get("include_subfolders", True)
await task_logger.log_task_progress(
log_entry,
f"Fetching Google Drive files via Composio for connector {connector_id}",
- {"stage": "fetching_files"},
+ {"stage": "fetching_files", "selected_folders": len(selected_folders), "selected_files": len(selected_files)},
)
- # Fetch files
all_files = []
- page_token = None
- while len(all_files) < max_items:
- files, next_token, error = await composio_connector.list_drive_files(
- page_token=page_token,
- page_size=min(100, max_items - len(all_files)),
- )
+ # If specific folders/files are selected, fetch from those
+ if selected_folders or selected_files:
+ # Fetch files from selected folders
+ for folder in selected_folders:
+ folder_id = folder.get("id")
+ folder_name = folder.get("name", "Unknown")
+
+ if not folder_id:
+ continue
+
+ # Handle special case for "root" folder
+ actual_folder_id = None if folder_id == "root" else folder_id
+
+ logger.info(f"Fetching files from folder: {folder_name} ({folder_id})")
+
+ # Fetch files from this folder
+ folder_files = []
+ page_token = None
+
+ while len(folder_files) < max_files_per_folder:
+ files, next_token, error = await composio_connector.list_drive_files(
+ folder_id=actual_folder_id,
+ page_token=page_token,
+ page_size=min(100, max_files_per_folder - len(folder_files)),
+ )
- if error:
- await task_logger.log_task_failure(
- log_entry, f"Failed to fetch Drive files: {error}", {}
+ if error:
+ logger.warning(f"Failed to fetch files from folder {folder_name}: {error}")
+ break
+
+ # Process files
+ for file_info in files:
+ mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
+
+ # If it's a folder and include_subfolders is enabled, recursively fetch
+ if mime_type == "application/vnd.google-apps.folder":
+ if include_subfolders:
+ # Add subfolder files recursively
+ subfolder_files = await _fetch_folder_files_recursively(
+ composio_connector,
+ file_info.get("id"),
+ max_files=max_files_per_folder,
+ current_count=len(folder_files),
+ )
+ folder_files.extend(subfolder_files)
+ else:
+ folder_files.append(file_info)
+
+ if not next_token:
+ break
+ page_token = next_token
+
+ all_files.extend(folder_files[:max_files_per_folder])
+ logger.info(f"Found {len(folder_files)} files in folder {folder_name}")
+
+ # Add specifically selected files
+ for selected_file in selected_files:
+ file_id = selected_file.get("id")
+ file_name = selected_file.get("name", "Unknown")
+
+ if not file_id:
+ continue
+
+ # Add file info (we'll fetch content later during indexing)
+ all_files.append({
+ "id": file_id,
+ "name": file_name,
+ "mimeType": "", # Will be determined later
+ })
+ else:
+ # No selection specified - fetch all files (original behavior)
+ page_token = None
+
+ while len(all_files) < max_items:
+ files, next_token, error = await composio_connector.list_drive_files(
+ page_token=page_token,
+ page_size=min(100, max_items - len(all_files)),
)
- return 0, f"Failed to fetch Drive files: {error}"
- all_files.extend(files)
+ if error:
+ await task_logger.log_task_failure(
+ log_entry, f"Failed to fetch Drive files: {error}", {}
+ )
+ return 0, f"Failed to fetch Drive files: {error}"
- if not next_token:
- break
- page_token = next_token
+ all_files.extend(files)
+
+ if not next_token:
+ break
+ page_token = next_token
if not all_files:
success_msg = "No Google Drive files found"
@@ -479,6 +565,81 @@ async def _index_composio_google_drive(
return 0, f"Failed to index Google Drive via Composio: {e!s}"
+async def _fetch_folder_files_recursively(
+ composio_connector: ComposioConnector,
+ folder_id: str,
+ max_files: int = 100,
+ current_count: int = 0,
+ depth: int = 0,
+ max_depth: int = 10,
+) -> list[dict[str, Any]]:
+ """
+ Recursively fetch files from a Google Drive folder via Composio.
+
+ Args:
+ composio_connector: The Composio connector instance
+ folder_id: Google Drive folder ID
+ max_files: Maximum number of files to fetch
+ current_count: Current number of files already fetched
+ depth: Current recursion depth
+ max_depth: Maximum recursion depth to prevent infinite loops
+
+ Returns:
+ List of file info dictionaries
+ """
+ if depth >= max_depth:
+ logger.warning(f"Max recursion depth reached for folder {folder_id}")
+ return []
+
+ if current_count >= max_files:
+ return []
+
+ all_files = []
+ page_token = None
+
+ try:
+ while len(all_files) + current_count < max_files:
+ files, next_token, error = await composio_connector.list_drive_files(
+ folder_id=folder_id,
+ page_token=page_token,
+ page_size=min(100, max_files - len(all_files) - current_count),
+ )
+
+ if error:
+ logger.warning(f"Error fetching files from subfolder {folder_id}: {error}")
+ break
+
+ for file_info in files:
+ mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
+
+ if mime_type == "application/vnd.google-apps.folder":
+ # Recursively fetch from subfolders
+ subfolder_files = await _fetch_folder_files_recursively(
+ composio_connector,
+ file_info.get("id"),
+ max_files=max_files,
+ current_count=current_count + len(all_files),
+ depth=depth + 1,
+ max_depth=max_depth,
+ )
+ all_files.extend(subfolder_files)
+ else:
+ all_files.append(file_info)
+
+ if len(all_files) + current_count >= max_files:
+ break
+
+ if not next_token:
+ break
+ page_token = next_token
+
+ return all_files[:max_files - current_count]
+
+ except Exception as e:
+ logger.error(f"Error in recursive folder fetch: {e!s}")
+ return all_files
+
+
async def _process_gmail_message_batch(
session: AsyncSession,
messages: list[dict[str, Any]],
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
index a96f906fe..255d0cef4 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
@@ -1,7 +1,20 @@
"use client";
+import { File, FileSpreadsheet, FileText, FolderClosed, Image, Presentation } from "lucide-react";
import type { FC } from "react";
+import { useEffect, useState } from "react";
+import { ComposioDriveFolderTree } from "@/components/connectors/composio-drive-folder-tree";
import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Label } from "@/components/ui/label";
+import {
+ Select,
+ SelectContent,
+ SelectItem,
+ SelectTrigger,
+ SelectValue,
+} from "@/components/ui/select";
+import { Switch } from "@/components/ui/switch";
import type { SearchSourceConnector } from "@/contracts/types/connector.types";
import { cn } from "@/lib/utils";
@@ -11,11 +24,134 @@ interface ComposioConfigProps {
onNameChange?: (name: string) => void;
}
-export const ComposioConfig: FC = ({ connector }) => {
+interface SelectedFolder {
+ id: string;
+ name: string;
+}
+
+interface IndexingOptions {
+ max_files_per_folder: number;
+ incremental_sync: boolean;
+ include_subfolders: boolean;
+}
+
+const DEFAULT_INDEXING_OPTIONS: IndexingOptions = {
+ max_files_per_folder: 100,
+ incremental_sync: true,
+ include_subfolders: true,
+};
+
+// Helper to get appropriate icon for file type based on file name
+function getFileIconFromName(fileName: string, className: string = "size-3.5 shrink-0") {
+ const lowerName = fileName.toLowerCase();
+ // Spreadsheets
+ if (
+ lowerName.endsWith(".xlsx") ||
+ lowerName.endsWith(".xls") ||
+ lowerName.endsWith(".csv") ||
+ lowerName.includes("spreadsheet")
+ ) {
+ return ;
+ }
+ // Presentations
+ if (
+ lowerName.endsWith(".pptx") ||
+ lowerName.endsWith(".ppt") ||
+ lowerName.includes("presentation")
+ ) {
+ return ;
+ }
+ // Documents (word, text only - not PDF)
+ if (
+ lowerName.endsWith(".docx") ||
+ lowerName.endsWith(".doc") ||
+ lowerName.endsWith(".txt") ||
+ lowerName.includes("document") ||
+ lowerName.includes("word") ||
+ lowerName.includes("text")
+ ) {
+ return ;
+ }
+ // Images
+ if (
+ lowerName.endsWith(".png") ||
+ lowerName.endsWith(".jpg") ||
+ lowerName.endsWith(".jpeg") ||
+ lowerName.endsWith(".gif") ||
+ lowerName.endsWith(".webp") ||
+ lowerName.endsWith(".svg")
+ ) {
+ return ;
+ }
+ // Default (including PDF)
+ return ;
+}
+
+export const ComposioConfig: FC = ({ connector, onConfigChange }) => {
const toolkitId = connector.config?.toolkit_id as string;
const isIndexable = connector.config?.is_indexable as boolean;
const composioAccountId = connector.config?.composio_connected_account_id as string;
+ // Check if this is a Google Drive Composio connector
+ const isGoogleDrive = toolkitId === "googledrive";
+
+ // Initialize with existing selected folders and files from connector config
+ const existingFolders =
+ (connector.config?.selected_folders as SelectedFolder[] | undefined) || [];
+ const existingFiles = (connector.config?.selected_files as SelectedFolder[] | undefined) || [];
+ const existingIndexingOptions =
+ (connector.config?.indexing_options as IndexingOptions | undefined) || DEFAULT_INDEXING_OPTIONS;
+
+ const [selectedFolders, setSelectedFolders] = useState(existingFolders);
+ const [selectedFiles, setSelectedFiles] = useState(existingFiles);
+ const [showFolderSelector, setShowFolderSelector] = useState(false);
+ const [indexingOptions, setIndexingOptions] = useState(existingIndexingOptions);
+
+ // Update selected folders and files when connector config changes
+ useEffect(() => {
+ const folders = (connector.config?.selected_folders as SelectedFolder[] | undefined) || [];
+ const files = (connector.config?.selected_files as SelectedFolder[] | undefined) || [];
+ const options =
+ (connector.config?.indexing_options as IndexingOptions | undefined) ||
+ DEFAULT_INDEXING_OPTIONS;
+ setSelectedFolders(folders);
+ setSelectedFiles(files);
+ setIndexingOptions(options);
+ }, [connector.config]);
+
+ const updateConfig = (
+ folders: SelectedFolder[],
+ files: SelectedFolder[],
+ options: IndexingOptions
+ ) => {
+ if (onConfigChange) {
+ onConfigChange({
+ ...connector.config,
+ selected_folders: folders,
+ selected_files: files,
+ indexing_options: options,
+ });
+ }
+ };
+
+ const handleSelectFolders = (folders: SelectedFolder[]) => {
+ setSelectedFolders(folders);
+ updateConfig(folders, selectedFiles, indexingOptions);
+ };
+
+ const handleSelectFiles = (files: SelectedFolder[]) => {
+ setSelectedFiles(files);
+ updateConfig(selectedFolders, files, indexingOptions);
+ };
+
+ const handleIndexingOptionChange = (key: keyof IndexingOptions, value: number | boolean) => {
+ const newOptions = { ...indexingOptions, [key]: value };
+ setIndexingOptions(newOptions);
+ updateConfig(selectedFolders, selectedFiles, newOptions);
+ };
+
+ const totalSelected = selectedFolders.length + selectedFiles.length;
+
return (
{/* Connection Details */}
@@ -52,6 +188,162 @@ export const ComposioConfig: FC = ({ connector }) => {
)}
+
+ {/* Google Drive specific: Folder & File Selection */}
+ {isGoogleDrive && isIndexable && (
+ <>
+
+
+
Folder & File Selection
+
+ Select specific folders and/or individual files to index.
+
+
+
+ {totalSelected > 0 && (
+
+
+ Selected {totalSelected} item{totalSelected > 1 ? "s" : ""}: {(() => {
+ const parts: string[] = [];
+ if (selectedFolders.length > 0) {
+ parts.push(
+ `${selectedFolders.length} folder${selectedFolders.length > 1 ? "s" : ""}`
+ );
+ }
+ if (selectedFiles.length > 0) {
+ parts.push(`${selectedFiles.length} file${selectedFiles.length > 1 ? "s" : ""}`);
+ }
+ return parts.length > 0 ? `(${parts.join(" ")})` : "";
+ })()}
+
+
+ {selectedFolders.map((folder) => (
+
+
+ {folder.name}
+
+ ))}
+ {selectedFiles.map((file) => (
+
+ {getFileIconFromName(file.name)}
+ {file.name}
+
+ ))}
+
+
+ )}
+
+ {showFolderSelector ? (
+
+
+ setShowFolderSelector(false)}
+ className="bg-slate-400/5 dark:bg-white/5 border-slate-400/20 hover:bg-slate-400/10 dark:hover:bg-white/10 text-xs sm:text-sm h-8 sm:h-9"
+ >
+ Done Selecting
+
+
+ ) : (
+
setShowFolderSelector(true)}
+ className="bg-slate-400/5 dark:bg-white/5 border-slate-400/20 hover:bg-slate-400/10 dark:hover:bg-white/10 text-xs sm:text-sm h-8 sm:h-9"
+ >
+ {totalSelected > 0 ? "Change Selection" : "Select Folders & Files"}
+
+ )}
+
+
+ {/* Indexing Options */}
+
+
+
Indexing Options
+
+ Configure how files are indexed from your Google Drive.
+
+
+
+ {/* Max files per folder */}
+
+
+
+
+ Max files per folder
+
+
+ Maximum number of files to index from each folder
+
+
+
+ handleIndexingOptionChange("max_files_per_folder", parseInt(value, 10))
+ }
+ >
+
+
+
+
+
+ 50 files
+
+
+ 100 files
+
+
+ 250 files
+
+
+ 500 files
+
+
+ 1000 files
+
+
+
+
+
+
+ {/* Include subfolders toggle */}
+
+
+
+ Include subfolders
+
+
+ Recursively index files in subfolders of selected folders
+
+
+
handleIndexingOptionChange("include_subfolders", checked)}
+ />
+
+
+ >
+ )}
);
};
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
index 66afd84a5..71258a519 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@@ -224,8 +224,11 @@ export const ConnectorEditView: FC = ({
{/* Periodic sync - shown for all indexable connectors */}
{(() => {
- // Check if Google Drive has folders/files selected
+ // Check if Google Drive (regular or Composio) has folders/files selected
const isGoogleDrive = connector.connector_type === "GOOGLE_DRIVE_CONNECTOR";
+ const isComposioGoogleDrive =
+ connector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR";
+ const requiresFolderSelection = isGoogleDrive || isComposioGoogleDrive;
const selectedFolders =
(connector.config?.selected_folders as
| Array<{ id: string; name: string }>
@@ -235,7 +238,7 @@ export const ConnectorEditView: FC = ({
| Array<{ id: string; name: string }>
| undefined) || [];
const hasItemsSelected = selectedFolders.length > 0 || selectedFiles.length > 0;
- const isDisabled = isGoogleDrive && !hasItemsSelected;
+ const isDisabled = requiresFolderSelection && !hasItemsSelected;
return (
{
return;
}
- // Prevent periodic indexing for Google Drive without folders/files selected
- if (periodicEnabled && editingConnector.connector_type === "GOOGLE_DRIVE_CONNECTOR") {
+ // Prevent periodic indexing for Google Drive (regular or Composio) without folders/files selected
+ if (
+ periodicEnabled &&
+ (editingConnector.connector_type === "GOOGLE_DRIVE_CONNECTOR" ||
+ editingConnector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR")
+ ) {
const selectedFolders = (connectorConfig || editingConnector.config)?.selected_folders as
| Array<{ id: string; name: string }>
| undefined;
diff --git a/surfsense_web/components/connectors/composio-drive-folder-tree.tsx b/surfsense_web/components/connectors/composio-drive-folder-tree.tsx
new file mode 100644
index 000000000..72c36edd5
--- /dev/null
+++ b/surfsense_web/components/connectors/composio-drive-folder-tree.tsx
@@ -0,0 +1,365 @@
+"use client";
+
+import {
+ ChevronDown,
+ ChevronRight,
+ File,
+ FileSpreadsheet,
+ FileText,
+ FolderClosed,
+ FolderOpen,
+ HardDrive,
+ Image,
+ Loader2,
+ Presentation,
+} from "lucide-react";
+import { useState } from "react";
+import { Checkbox } from "@/components/ui/checkbox";
+import { ScrollArea } from "@/components/ui/scroll-area";
+import { useComposioDriveFolders } from "@/hooks/use-composio-drive-folders";
+import { connectorsApiService } from "@/lib/apis/connectors-api.service";
+import { cn } from "@/lib/utils";
+
+interface DriveItem {
+ id: string;
+ name: string;
+ mimeType: string;
+ isFolder: boolean;
+ parents?: string[];
+ size?: number;
+ iconLink?: string;
+}
+
+interface ItemTreeNode {
+ item: DriveItem;
+ children: DriveItem[] | null; // null = not loaded, [] = loaded but empty
+ isExpanded: boolean;
+ isLoading: boolean;
+}
+
+interface SelectedFolder {
+ id: string;
+ name: string;
+}
+
+interface ComposioDriveFolderTreeProps {
+ connectorId: number;
+ selectedFolders: SelectedFolder[];
+ onSelectFolders: (folders: SelectedFolder[]) => void;
+ selectedFiles?: SelectedFolder[];
+ onSelectFiles?: (files: SelectedFolder[]) => void;
+}
+
+// Helper to get appropriate icon for file type
+function getFileIcon(mimeType: string, className: string = "h-4 w-4") {
+ if (mimeType.includes("spreadsheet") || mimeType.includes("excel")) {
+ return ;
+ }
+ if (mimeType.includes("presentation") || mimeType.includes("powerpoint")) {
+ return ;
+ }
+ if (mimeType.includes("document") || mimeType.includes("word") || mimeType.includes("text")) {
+ return ;
+ }
+ if (mimeType.includes("image")) {
+ return ;
+ }
+ return ;
+}
+
+export function ComposioDriveFolderTree({
+ connectorId,
+ selectedFolders,
+ onSelectFolders,
+ selectedFiles = [],
+ onSelectFiles = () => {},
+}: ComposioDriveFolderTreeProps) {
+ const [itemStates, setItemStates] = useState>(new Map());
+
+ const { data: rootData, isLoading: isLoadingRoot } = useComposioDriveFolders({
+ connectorId,
+ });
+
+ const rootItems = rootData?.items || [];
+
+ const isFolderSelected = (folderId: string): boolean => {
+ return selectedFolders.some((f) => f.id === folderId);
+ };
+
+ const isFileSelected = (fileId: string): boolean => {
+ return selectedFiles.some((f) => f.id === fileId);
+ };
+
+ const toggleFolderSelection = (folderId: string, folderName: string) => {
+ if (isFolderSelected(folderId)) {
+ onSelectFolders(selectedFolders.filter((f) => f.id !== folderId));
+ } else {
+ onSelectFolders([...selectedFolders, { id: folderId, name: folderName }]);
+ }
+ };
+
+ const toggleFileSelection = (fileId: string, fileName: string) => {
+ if (isFileSelected(fileId)) {
+ onSelectFiles(selectedFiles.filter((f) => f.id !== fileId));
+ } else {
+ onSelectFiles([...selectedFiles, { id: fileId, name: fileName }]);
+ }
+ };
+
+ /**
+ * Find an item by ID across all loaded items (root and nested).
+ */
+ const findItem = (itemId: string): DriveItem | undefined => {
+ const state = itemStates.get(itemId);
+ if (state?.item) return state.item;
+
+ const rootItem = rootItems.find((item) => item.id === itemId);
+ if (rootItem) return rootItem;
+
+ for (const [, nodeState] of itemStates) {
+ if (nodeState.children) {
+ const found = nodeState.children.find((child) => child.id === itemId);
+ if (found) return found;
+ }
+ }
+
+ return undefined;
+ };
+
+ /**
+ * Load and display contents of a specific folder.
+ */
+ const loadFolderContents = async (folderId: string) => {
+ try {
+ setItemStates((prev) => {
+ const newMap = new Map(prev);
+ const existing = newMap.get(folderId);
+ if (existing) {
+ newMap.set(folderId, { ...existing, isLoading: true });
+ } else {
+ const item = findItem(folderId);
+ if (item) {
+ newMap.set(folderId, {
+ item,
+ children: null,
+ isExpanded: false,
+ isLoading: true,
+ });
+ }
+ }
+ return newMap;
+ });
+
+ const data = await connectorsApiService.listComposioDriveFolders({
+ connector_id: connectorId,
+ parent_id: folderId,
+ });
+ const items = data.items || [];
+
+ setItemStates((prev) => {
+ const newMap = new Map(prev);
+ const existing = newMap.get(folderId);
+ const item = existing?.item || findItem(folderId);
+
+ if (item) {
+ newMap.set(folderId, {
+ item,
+ children: items,
+ isExpanded: true,
+ isLoading: false,
+ });
+ } else {
+ console.error(`Could not find item for folderId: ${folderId}`);
+ }
+ return newMap;
+ });
+ } catch (error) {
+ console.error("Error loading folder contents:", error);
+ setItemStates((prev) => {
+ const newMap = new Map(prev);
+ const existing = newMap.get(folderId);
+ if (existing) {
+ newMap.set(folderId, { ...existing, isLoading: false });
+ }
+ return newMap;
+ });
+ }
+ };
+
+ /**
+ * Toggle folder expand/collapse state.
+ */
+ const toggleFolder = async (item: DriveItem) => {
+ if (!item.isFolder) return;
+
+ const state = itemStates.get(item.id);
+
+ if (!state || state.children === null) {
+ await loadFolderContents(item.id);
+ } else {
+ setItemStates((prev) => {
+ const newMap = new Map(prev);
+ newMap.set(item.id, {
+ ...state,
+ isExpanded: !state.isExpanded,
+ });
+ return newMap;
+ });
+ }
+ };
+
+ /**
+ * Render a single item (folder or file) with its children.
+ */
+ const renderItem = (item: DriveItem, level: number = 0) => {
+ const state = itemStates.get(item.id);
+ const isExpanded = state?.isExpanded || false;
+ const isLoading = state?.isLoading || false;
+ const children = state?.children;
+ const isFolder = item.isFolder;
+ const isSelected = isFolder ? isFolderSelected(item.id) : isFileSelected(item.id);
+
+ const childFolders = children?.filter((c) => c.isFolder) || [];
+ const childFiles = children?.filter((c) => !c.isFolder) || [];
+
+ const indentSize = 0.75; // Smaller indent for mobile
+
+ return (
+
+
+ {isFolder ? (
+
{
+ e.stopPropagation();
+ toggleFolder(item);
+ }}
+ aria-label={isExpanded ? `Collapse ${item.name}` : `Expand ${item.name}`}
+ >
+ {isLoading ? (
+
+ ) : isExpanded ? (
+
+ ) : (
+
+ )}
+
+ ) : (
+
+ )}
+
+
{
+ if (isFolder) {
+ toggleFolderSelection(item.id, item.name);
+ } else {
+ toggleFileSelection(item.id, item.name);
+ }
+ }}
+ className="shrink-0 h-3.5 w-3.5 sm:h-4 sm:w-4 border-slate-400/20 dark:border-white/20"
+ onClick={(e) => e.stopPropagation()}
+ />
+
+
+ {isFolder ? (
+ isExpanded ? (
+
+ ) : (
+
+ )
+ ) : (
+ getFileIcon(item.mimeType, "h-3 w-3 sm:h-4 sm:w-4")
+ )}
+
+
+ {isFolder ? (
+ toggleFolder(item)}
+ >
+ {item.name}
+
+ ) : (
+
+ {item.name}
+
+ )}
+
+
+ {isExpanded && isFolder && children && (
+
+ {childFolders.map((child) => renderItem(child, level + 1))}
+ {childFiles.map((child) => renderItem(child, level + 1))}
+
+ {children.length === 0 && (
+
+ Empty folder
+
+ )}
+
+ )}
+
+ );
+ };
+
+ return (
+
+
+
+
+
+ toggleFolderSelection("root", "My Drive")}
+ className="shrink-0 h-3.5 w-3.5 sm:h-4 sm:w-4 border-slate-400/20 dark:border-white/20"
+ />
+
+ toggleFolderSelection("root", "My Drive")}
+ >
+ My Drive
+
+
+
+
+ {isLoadingRoot && (
+
+
+
+ )}
+
+
+ {!isLoadingRoot && rootItems.map((item) => renderItem(item, 0))}
+
+
+ {!isLoadingRoot && rootItems.length === 0 && (
+
+ No files or folders found in your Google Drive
+
+ )}
+
+
+
+ );
+}
+
diff --git a/surfsense_web/hooks/use-composio-drive-folders.ts b/surfsense_web/hooks/use-composio-drive-folders.ts
new file mode 100644
index 000000000..af8da1a81
--- /dev/null
+++ b/surfsense_web/hooks/use-composio-drive-folders.ts
@@ -0,0 +1,29 @@
+import { useQuery } from "@tanstack/react-query";
+import { connectorsApiService } from "@/lib/apis/connectors-api.service";
+import { cacheKeys } from "@/lib/query-client/cache-keys";
+
+interface UseComposioDriveFoldersOptions {
+ connectorId: number;
+ parentId?: string;
+ enabled?: boolean;
+}
+
+export function useComposioDriveFolders({
+ connectorId,
+ parentId,
+ enabled = true,
+}: UseComposioDriveFoldersOptions) {
+ return useQuery({
+ queryKey: cacheKeys.connectors.composioDrive.folders(connectorId, parentId),
+ queryFn: async () => {
+ return connectorsApiService.listComposioDriveFolders({
+ connector_id: connectorId,
+ parent_id: parentId,
+ });
+ },
+ enabled: enabled && !!connectorId,
+ staleTime: 5 * 60 * 1000, // 5 minutes
+ retry: 2,
+ });
+}
+
diff --git a/surfsense_web/lib/apis/connectors-api.service.ts b/surfsense_web/lib/apis/connectors-api.service.ts
index 0e4f7f4d5..567db38de 100644
--- a/surfsense_web/lib/apis/connectors-api.service.ts
+++ b/surfsense_web/lib/apis/connectors-api.service.ts
@@ -233,6 +233,29 @@ class ConnectorsApiService {
);
};
+ /**
+ * List Composio Google Drive folders and files
+ */
+ listComposioDriveFolders = async (request: ListGoogleDriveFoldersRequest) => {
+ const parsedRequest = listGoogleDriveFoldersRequest.safeParse(request);
+
+ if (!parsedRequest.success) {
+ console.error("Invalid request:", parsedRequest.error);
+
+ const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", ");
+ throw new ValidationError(`Invalid request: ${errorMessage}`);
+ }
+
+ const { connector_id, parent_id } = parsedRequest.data;
+
+ const queryParams = parent_id ? `?parent_id=${encodeURIComponent(parent_id)}` : "";
+
+ return baseApiService.get(
+ `/api/v1/connectors/${connector_id}/composio-drive/folders${queryParams}`,
+ listGoogleDriveFoldersResponse
+ );
+ };
+
// =============================================================================
// MCP Connector Methods
// =============================================================================
diff --git a/surfsense_web/lib/query-client/cache-keys.ts b/surfsense_web/lib/query-client/cache-keys.ts
index 72f2bbd54..8ffc3b786 100644
--- a/surfsense_web/lib/query-client/cache-keys.ts
+++ b/surfsense_web/lib/query-client/cache-keys.ts
@@ -71,6 +71,10 @@ export const cacheKeys = {
folders: (connectorId: number, parentId?: string) =>
["connectors", "google-drive", connectorId, "folders", parentId] as const,
},
+ composioDrive: {
+ folders: (connectorId: number, parentId?: string) =>
+ ["connectors", "composio-drive", connectorId, "folders", parentId] as const,
+ },
},
comments: {
byMessage: (messageId: number) => ["comments", "message", messageId] as const,
From 42752bbeabea23f03e34821143d769b0ec83afc2 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 05:28:03 +0530
Subject: [PATCH 06/51] feat: improve Composio file processing and error
handling
- Enhanced the handling of file content from Composio, supporting both binary and text files with appropriate processing methods.
- Introduced robust error logging and handling for file content extraction, ensuring better visibility into issues during processing.
- Updated the indexing logic to accommodate new content processing methods, improving overall reliability and user feedback on errors.
- Added temporary file handling for binary files to facilitate text extraction using the ETL service.
---
.../routes/search_source_connectors_routes.py | 6 +-
.../app/services/composio_service.py | 75 ++++-
.../app/tasks/composio_indexer.py | 301 +++++++++++++++++-
3 files changed, 360 insertions(+), 22 deletions(-)
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 89cdd9f95..ed306c7bc 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -1140,7 +1140,7 @@ async def _run_indexing_with_notifications(
f"Indexing completed successfully: {documents_processed} documents processed"
)
- # Update notification on success
+ # Update notification on success (or partial success with errors)
if notification:
# Refresh notification to ensure it's not stale after timestamp update commit
await session.refresh(notification)
@@ -1148,7 +1148,7 @@ async def _run_indexing_with_notifications(
session=session,
notification=notification,
indexed_count=documents_processed,
- error_message=None,
+ error_message=error_or_warning, # Show errors even if some documents were indexed
)
await session.commit() # Commit to ensure Electric SQL syncs the notification update
elif documents_processed > 0:
@@ -1172,7 +1172,7 @@ async def _run_indexing_with_notifications(
session=session,
notification=notification,
indexed_count=documents_processed,
- error_message=None,
+ error_message=error_or_warning, # Show errors even if some documents were indexed
)
await session.commit() # Commit to ensure Electric SQL syncs the notification update
else:
diff --git a/surfsense_backend/app/services/composio_service.py b/surfsense_backend/app/services/composio_service.py
index 5a6148533..1173cfb6a 100644
--- a/surfsense_backend/app/services/composio_service.py
+++ b/surfsense_backend/app/services/composio_service.py
@@ -458,11 +458,76 @@ class ComposioService:
if not result.get("success"):
return None, result.get("error", "Unknown error")
- content = result.get("data")
- if isinstance(content, str):
- content = content.encode("utf-8")
-
- return content, None
+ data = result.get("data")
+
+ # Composio GOOGLEDRIVE_DOWNLOAD_FILE returns a dict with file info
+ # The actual content is in "downloaded_file_content" field
+ if isinstance(data, dict):
+ # Try known Composio response fields in order of preference
+ content = None
+
+ # Primary field from GOOGLEDRIVE_DOWNLOAD_FILE
+ if "downloaded_file_content" in data:
+ content = data["downloaded_file_content"]
+ # downloaded_file_content might itself be a dict with the actual content inside
+ if isinstance(content, dict):
+ # Try to extract actual content from nested dict
+ # Note: Composio nests downloaded_file_content inside another downloaded_file_content
+ actual_content = (
+ content.get("downloaded_file_content") or
+ content.get("content") or
+ content.get("data") or
+ content.get("file_content") or
+ content.get("body") or
+ content.get("text")
+ )
+ if actual_content is not None:
+ content = actual_content
+ else:
+ # Log structure for debugging
+ logger.warning(f"downloaded_file_content is dict with keys: {list(content.keys())}")
+ return None, f"Cannot extract content from downloaded_file_content. Keys: {list(content.keys())}"
+ # Fallback fields for compatibility
+ elif "content" in data:
+ content = data["content"]
+ elif "file_content" in data:
+ content = data["file_content"]
+ elif "data" in data:
+ content = data["data"]
+
+ if content is None:
+ # Log available keys for debugging
+ logger.warning(f"Composio response dict keys: {list(data.keys())}")
+ return None, f"No file content found in Composio response. Available keys: {list(data.keys())}"
+
+ # Convert content to bytes
+ if isinstance(content, str):
+ # Check if it's base64 encoded
+ import base64
+ try:
+ # Try to decode as base64 first
+ content = base64.b64decode(content)
+ except Exception:
+ # If not base64, encode as UTF-8
+ content = content.encode("utf-8")
+ elif isinstance(content, bytes):
+ pass # Already bytes
+ elif isinstance(content, dict):
+ # Still a dict after all extraction attempts - log structure
+ logger.warning(f"Content still dict after extraction: {list(content.keys())}")
+ return None, f"Unexpected nested content structure: {list(content.keys())}"
+ else:
+ return None, f"Unexpected content type in Composio response: {type(content).__name__}"
+
+ return content, None
+ elif isinstance(data, str):
+ return data.encode("utf-8"), None
+ elif isinstance(data, bytes):
+ return data, None
+ elif data is None:
+ return None, "No data returned from Composio"
+ else:
+ return None, f"Unexpected data type from Composio: {type(data).__name__}"
except Exception as e:
logger.error(f"Failed to get Drive file content: {e!s}")
diff --git a/surfsense_backend/app/tasks/composio_indexer.py b/surfsense_backend/app/tasks/composio_indexer.py
index f568d4134..6f40e6d66 100644
--- a/surfsense_backend/app/tasks/composio_indexer.py
+++ b/surfsense_backend/app/tasks/composio_indexer.py
@@ -8,7 +8,10 @@ to avoid circular import issues with the connector_indexers package.
"""
import logging
+import os
+import tempfile
from datetime import UTC, datetime
+from pathlib import Path
from typing import Any
from sqlalchemy.exc import SQLAlchemyError
@@ -21,6 +24,7 @@ from app.connectors.composio_connector import ComposioConnector
from app.db import (
Document,
DocumentType,
+ Log,
SearchSourceConnector,
SearchSourceConnectorType,
)
@@ -81,6 +85,237 @@ async def update_connector_last_indexed(
logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+# Binary file extensions that need file processor
+BINARY_FILE_EXTENSIONS = {
+ ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
+ ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp",
+ ".zip", ".tar", ".gz", ".rar", ".7z",
+ ".mp3", ".mp4", ".wav", ".avi", ".mov",
+ ".exe", ".dll", ".so", ".bin",
+}
+
+# Text file extensions that can be decoded as UTF-8
+TEXT_FILE_EXTENSIONS = {
+ ".txt", ".md", ".markdown", ".json", ".xml", ".html", ".htm",
+ ".css", ".js", ".ts", ".py", ".java", ".c", ".cpp", ".h",
+ ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf",
+ ".sh", ".bash", ".zsh", ".fish",
+ ".sql", ".csv", ".tsv",
+ ".rst", ".tex", ".log",
+}
+
+
+def _is_binary_file(file_name: str, mime_type: str) -> bool:
+ """Check if a file is binary based on extension or mime type."""
+ extension = Path(file_name).suffix.lower()
+
+ # Check extension first
+ if extension in BINARY_FILE_EXTENSIONS:
+ return True
+ if extension in TEXT_FILE_EXTENSIONS:
+ return False
+
+ # Check mime type
+ if mime_type:
+ if mime_type.startswith(("image/", "audio/", "video/", "application/pdf")):
+ return True
+ if mime_type.startswith(("text/", "application/json", "application/xml")):
+ return False
+ # Office documents
+ if "spreadsheet" in mime_type or "document" in mime_type or "presentation" in mime_type:
+ return True
+
+ # Default to text for unknown types
+ return False
+
+
+async def _process_file_content(
+ content: bytes | str,
+ file_name: str,
+ file_id: str,
+ mime_type: str,
+ search_space_id: int,
+ user_id: str,
+ session: AsyncSession,
+ task_logger: TaskLoggingService,
+ log_entry: Log,
+ processing_errors: list[str],
+) -> str:
+ """
+ Process file content and return markdown text.
+
+ For binary files (PDFs, images, etc.), uses Surfsense's ETL service.
+ For text files, decodes as UTF-8.
+
+ Args:
+ content: File content as bytes or string
+ file_name: Name of the file
+ file_id: Google Drive file ID
+ mime_type: MIME type of the file
+ search_space_id: Search space ID
+ user_id: User ID
+ session: Database session
+ task_logger: Task logging service
+ log_entry: Log entry for tracking
+ processing_errors: List to append errors to
+
+ Returns:
+ Markdown content string
+ """
+ # Ensure content is bytes
+ if isinstance(content, str):
+ content = content.encode("utf-8")
+
+ # Check if this is a binary file
+ if _is_binary_file(file_name, mime_type):
+ # Use ETL service for binary files (PDF, Office docs, etc.)
+ temp_file_path = None
+ try:
+ # Get file extension
+ extension = Path(file_name).suffix or ".bin"
+
+ # Write to temp file
+ with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as tmp_file:
+ tmp_file.write(content)
+ temp_file_path = tmp_file.name
+
+ # Use the configured ETL service to extract text
+ extracted_text = await _extract_text_with_etl(
+ temp_file_path, file_name, task_logger, log_entry
+ )
+
+ if extracted_text:
+ return extracted_text
+ else:
+ # Fallback if extraction fails
+ logger.warning(f"Could not extract text from binary file {file_name}")
+ return f"# {file_name}\n\n[Binary file - text extraction failed]\n\n**File ID:** {file_id}\n**Type:** {mime_type}\n"
+
+ except Exception as e:
+ error_msg = f"Error processing binary file {file_name}: {e!s}"
+ logger.error(error_msg)
+ processing_errors.append(error_msg)
+ return f"# {file_name}\n\n[Binary file - processing error]\n\n**File ID:** {file_id}\n**Type:** {mime_type}\n"
+ finally:
+ # Cleanup temp file
+ if temp_file_path and os.path.exists(temp_file_path):
+ try:
+ os.unlink(temp_file_path)
+ except Exception as e:
+ logger.debug(f"Could not delete temp file {temp_file_path}: {e}")
+ else:
+ # Text file - try to decode as UTF-8
+ try:
+ return content.decode("utf-8")
+ except UnicodeDecodeError:
+ # Try other encodings
+ for encoding in ["latin-1", "cp1252", "iso-8859-1"]:
+ try:
+ return content.decode(encoding)
+ except UnicodeDecodeError:
+ continue
+
+ # If all encodings fail, treat as binary
+ error_msg = f"Could not decode text file {file_name} with any encoding"
+ logger.warning(error_msg)
+ processing_errors.append(error_msg)
+ return f"# {file_name}\n\n[File content could not be decoded]\n\n**File ID:** {file_id}\n**Type:** {mime_type}\n"
+
+
+async def _extract_text_with_etl(
+ file_path: str,
+ file_name: str,
+ task_logger: TaskLoggingService,
+ log_entry: Log,
+) -> str | None:
+ """
+ Extract text from a file using the configured ETL service.
+
+ Args:
+ file_path: Path to the file
+ file_name: Name of the file
+ task_logger: Task logging service
+ log_entry: Log entry for tracking
+
+ Returns:
+ Extracted text as markdown, or None if extraction fails
+ """
+ import warnings
+ from logging import ERROR, getLogger
+
+ etl_service = config.ETL_SERVICE
+
+ try:
+ if etl_service == "UNSTRUCTURED":
+ from langchain_unstructured import UnstructuredLoader
+
+ from app.utils.document_converters import convert_document_to_markdown
+
+ loader = UnstructuredLoader(
+ file_path,
+ mode="elements",
+ post_processors=[],
+ languages=["eng"],
+ include_orig_elements=False,
+ include_metadata=False,
+ strategy="auto",
+ )
+
+ docs = await loader.aload()
+ if docs:
+ return await convert_document_to_markdown(docs)
+ return None
+
+ elif etl_service == "LLAMACLOUD":
+ from app.tasks.document_processors.file_processors import parse_with_llamacloud_retry
+
+ # Estimate pages (rough estimate based on file size)
+ file_size = os.path.getsize(file_path)
+ estimated_pages = max(1, file_size // (80 * 1024))
+
+ result = await parse_with_llamacloud_retry(
+ file_path=file_path,
+ estimated_pages=estimated_pages,
+ task_logger=task_logger,
+ log_entry=log_entry,
+ )
+
+ markdown_documents = await result.aget_markdown_documents(split_by_page=False)
+ if markdown_documents:
+ return markdown_documents[0].text
+ return None
+
+ elif etl_service == "DOCLING":
+ from app.services.docling_service import create_docling_service
+
+ docling_service = create_docling_service()
+
+ # Suppress pdfminer warnings
+ pdfminer_logger = getLogger("pdfminer")
+ original_level = pdfminer_logger.level
+
+ with warnings.catch_warnings():
+ warnings.filterwarnings("ignore", category=UserWarning, module="pdfminer")
+ warnings.filterwarnings("ignore", message=".*Cannot set gray non-stroke color.*")
+ warnings.filterwarnings("ignore", message=".*invalid float value.*")
+
+ pdfminer_logger.setLevel(ERROR)
+
+ try:
+ result = await docling_service.process_document(file_path, file_name)
+ finally:
+ pdfminer_logger.setLevel(original_level)
+
+ return result.get("content")
+ else:
+ logger.warning(f"Unknown ETL service: {etl_service}")
+ return None
+
+ except Exception as e:
+ logger.error(f"ETL extraction failed for {file_name}: {e!s}")
+ return None
+
+
# ============ Main indexer function ============
@@ -384,6 +619,7 @@ async def _index_composio_google_drive(
documents_indexed = 0
documents_skipped = 0
+ processing_errors = []
for file_info in all_files:
try:
@@ -422,11 +658,28 @@ async def _index_composio_google_drive(
markdown_content = f"# {file_name}\n\n"
markdown_content += f"**File ID:** {file_id}\n"
markdown_content += f"**Type:** {mime_type}\n"
+ elif isinstance(content, dict):
+ # Safety check: if content is still a dict, log error and use fallback
+ error_msg = f"Unexpected dict content format for file {file_name}: {list(content.keys())}"
+ logger.error(error_msg)
+ processing_errors.append(error_msg)
+ markdown_content = f"# {file_name}\n\n"
+ markdown_content += f"**File ID:** {file_id}\n"
+ markdown_content += f"**Type:** {mime_type}\n"
else:
- try:
- markdown_content = content.decode("utf-8")
- except UnicodeDecodeError:
- markdown_content = f"# {file_name}\n\n[Binary file content]\n"
+ # Process content based on file type
+ markdown_content = await _process_file_content(
+ content=content,
+ file_name=file_name,
+ file_id=file_id,
+ mime_type=mime_type,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ session=session,
+ task_logger=task_logger,
+ log_entry=log_entry,
+ processing_errors=processing_errors,
+ )
content_hash = generate_content_hash(markdown_content, search_space_id)
@@ -531,7 +784,9 @@ async def _index_composio_google_drive(
await session.commit()
except Exception as e:
- logger.error(f"Error processing Drive file: {e!s}", exc_info=True)
+ error_msg = f"Error processing Drive file {file_name or 'unknown'}: {e!s}"
+ logger.error(error_msg, exc_info=True)
+ processing_errors.append(error_msg)
documents_skipped += 1
continue
@@ -549,16 +804,34 @@ async def _index_composio_google_drive(
"Successfully committed all Composio Google Drive document changes to database"
)
- await task_logger.log_task_success(
- log_entry,
- f"Successfully completed Google Drive indexing via Composio for connector {connector_id}",
- {
- "documents_indexed": documents_indexed,
- "documents_skipped": documents_skipped,
- },
- )
+ # If there were processing errors, return them so notification can show them
+ error_message = None
+ if processing_errors:
+ # Combine all errors into a single message
+ if len(processing_errors) == 1:
+ error_message = processing_errors[0]
+ else:
+ error_message = f"Failed to process {len(processing_errors)} file(s). First error: {processing_errors[0]}"
+ await task_logger.log_task_failure(
+ log_entry,
+ f"Completed Google Drive indexing with {len(processing_errors)} error(s) for connector {connector_id}",
+ {
+ "documents_indexed": documents_indexed,
+ "documents_skipped": documents_skipped,
+ "errors": processing_errors,
+ },
+ )
+ else:
+ await task_logger.log_task_success(
+ log_entry,
+ f"Successfully completed Google Drive indexing via Composio for connector {connector_id}",
+ {
+ "documents_indexed": documents_indexed,
+ "documents_skipped": documents_skipped,
+ },
+ )
- return documents_indexed, None
+ return documents_indexed, error_message
except Exception as e:
logger.error(f"Failed to index Google Drive via Composio: {e!s}", exc_info=True)
From 8a0b8346a5ee913a94a922ae8750072ce3b0ec11 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 05:28:18 +0530
Subject: [PATCH 07/51] chore: ran linting
---
.../74_add_composio_connector_enums.py | 4 +-
.../app/connectors/composio_connector.py | 6 +-
.../app/connectors/github_connector.py | 46 +-
.../app/routes/composio_routes.py | 74 ++-
.../routes/search_source_connectors_routes.py | 48 +-
.../app/services/composio_service.py | 178 ++++---
.../app/tasks/composio_indexer.py | 441 ++++++++++++------
.../app/tasks/connector_indexers/__init__.py | 1 +
.../connector_indexers/github_indexer.py | 14 +-
.../components/composio-config.tsx | 8 +-
.../constants/connector-popup.schemas.ts | 4 +-
.../hooks/use-connector-dialog.ts | 28 +-
.../tabs/all-connectors-tab.tsx | 17 +-
.../connectors/composio-drive-folder-tree.tsx | 1 -
.../hooks/use-composio-drive-folders.ts | 1 -
15 files changed, 583 insertions(+), 288 deletions(-)
diff --git a/surfsense_backend/alembic/versions/74_add_composio_connector_enums.py b/surfsense_backend/alembic/versions/74_add_composio_connector_enums.py
index cadf70cb6..2996d9d07 100644
--- a/surfsense_backend/alembic/versions/74_add_composio_connector_enums.py
+++ b/surfsense_backend/alembic/versions/74_add_composio_connector_enums.py
@@ -82,14 +82,14 @@ def upgrade() -> None:
def downgrade() -> None:
"""Downgrade schema - remove Composio connector types from connector and document enums.
-
+
Note: PostgreSQL does not support removing enum values directly.
To properly downgrade, you would need to:
1. Delete any rows using the Composio connector type values
2. Create new enums without the Composio connector types
3. Alter the columns to use the new enums
4. Drop the old enums
-
+
This is left as a no-op since removing enum values is complex
and typically not needed in practice.
"""
diff --git a/surfsense_backend/app/connectors/composio_connector.py b/surfsense_backend/app/connectors/composio_connector.py
index 21e339d12..b49988887 100644
--- a/surfsense_backend/app/connectors/composio_connector.py
+++ b/surfsense_backend/app/connectors/composio_connector.py
@@ -12,7 +12,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from app.db import SearchSourceConnector
-from app.services.composio_service import ComposioService, INDEXABLE_TOOLKITS
+from app.services.composio_service import INDEXABLE_TOOLKITS, ComposioService
logger = logging.getLogger(__name__)
@@ -271,7 +271,9 @@ class ComposioConnector:
from_email = header_dict.get("from", "Unknown Sender")
to_email = header_dict.get("to", "Unknown Recipient")
# Composio provides messageTimestamp directly
- date_str = message.get("messageTimestamp", "") or header_dict.get("date", "Unknown Date")
+ date_str = message.get("messageTimestamp", "") or header_dict.get(
+ "date", "Unknown Date"
+ )
# Build markdown content
markdown_content = f"# {subject}\n\n"
diff --git a/surfsense_backend/app/connectors/github_connector.py b/surfsense_backend/app/connectors/github_connector.py
index 6f04ccdba..9d4b98c4b 100644
--- a/surfsense_backend/app/connectors/github_connector.py
+++ b/surfsense_backend/app/connectors/github_connector.py
@@ -58,7 +58,9 @@ class GitHubConnector:
if self.token:
logger.info("GitHub connector initialized with authentication token.")
else:
- logger.info("GitHub connector initialized without token (public repos only).")
+ logger.info(
+ "GitHub connector initialized without token (public repos only)."
+ )
def ingest_repository(
self,
@@ -95,17 +97,27 @@ class GitHubConnector:
cmd = [
"gitingest",
repo_url,
- "--output", output_path,
- "--max-size", str(max_file_size),
+ "--output",
+ output_path,
+ "--max-size",
+ str(max_file_size),
# Common exclude patterns
- "-e", "node_modules/*",
- "-e", "vendor/*",
- "-e", ".git/*",
- "-e", "__pycache__/*",
- "-e", "dist/*",
- "-e", "build/*",
- "-e", "*.lock",
- "-e", "package-lock.json",
+ "-e",
+ "node_modules/*",
+ "-e",
+ "vendor/*",
+ "-e",
+ ".git/*",
+ "-e",
+ "__pycache__/*",
+ "-e",
+ "dist/*",
+ "-e",
+ "build/*",
+ "-e",
+ "*.lock",
+ "-e",
+ "package-lock.json",
]
# Add branch if specified
@@ -147,7 +159,9 @@ class GitHubConnector:
os.unlink(output_path)
if not full_content or not full_content.strip():
- logger.warning(f"No content retrieved from repository: {repo_full_name}")
+ logger.warning(
+ f"No content retrieved from repository: {repo_full_name}"
+ )
return None
# Parse the gitingest output
@@ -171,11 +185,11 @@ class GitHubConnector:
logger.error(f"gitingest timed out for repository: {repo_full_name}")
return None
except FileNotFoundError:
- logger.error(
- "gitingest CLI not found. Falling back to Python library."
- )
+ logger.error("gitingest CLI not found. Falling back to Python library.")
# Fall back to Python library
- return self._ingest_with_python_library(repo_full_name, branch, max_file_size)
+ return self._ingest_with_python_library(
+ repo_full_name, branch, max_file_size
+ )
except Exception as e:
logger.error(f"Failed to ingest repository {repo_full_name}: {e}")
return None
diff --git a/surfsense_backend/app/routes/composio_routes.py b/surfsense_backend/app/routes/composio_routes.py
index 25e545dfb..dec9beb02 100644
--- a/surfsense_backend/app/routes/composio_routes.py
+++ b/surfsense_backend/app/routes/composio_routes.py
@@ -11,7 +11,6 @@ Endpoints:
- GET /connectors/{connector_id}/composio-drive/folders - List folders/files for Composio Google Drive
"""
-import asyncio
import logging
from uuid import UUID
@@ -89,7 +88,9 @@ async def list_composio_toolkits(user: User = Depends(current_active_user)):
@router.get("/auth/composio/connector/add")
async def initiate_composio_auth(
space_id: int,
- toolkit_id: str = Query(..., description="Composio toolkit ID (e.g., 'googledrive', 'gmail')"),
+ toolkit_id: str = Query(
+ ..., description="Composio toolkit ID (e.g., 'googledrive', 'gmail')"
+ ),
user: User = Depends(current_active_user),
):
"""
@@ -239,13 +240,15 @@ async def composio_callback(
# Initialize Composio service
service = ComposioService()
entity_id = f"surfsense_{user_id}"
-
+
# Use camelCase param if provided (Composio's format), fallback to snake_case
final_connected_account_id = connectedAccountId or connected_account_id
-
+
# DEBUG: Log all query parameters received
- logger.info(f"DEBUG: Callback received - connectedAccountId: {connectedAccountId}, connected_account_id: {connected_account_id}, using: {final_connected_account_id}")
-
+ logger.info(
+ f"DEBUG: Callback received - connectedAccountId: {connectedAccountId}, connected_account_id: {connected_account_id}, using: {final_connected_account_id}"
+ )
+
# If we still don't have a connected_account_id, warn but continue
# (the connector will be created but indexing won't work until updated)
if not final_connected_account_id:
@@ -254,7 +257,9 @@ async def composio_callback(
"The connector will be created but indexing may not work."
)
else:
- logger.info(f"Successfully got connected_account_id: {final_connected_account_id}")
+ logger.info(
+ f"Successfully got connected_account_id: {final_connected_account_id}"
+ )
# Build connector config
connector_config = {
@@ -287,10 +292,17 @@ async def composio_callback(
if existing_connector:
# Delete the old Composio connected account before updating
- old_connected_account_id = existing_connector.config.get("composio_connected_account_id")
- if old_connected_account_id and old_connected_account_id != final_connected_account_id:
+ old_connected_account_id = existing_connector.config.get(
+ "composio_connected_account_id"
+ )
+ if (
+ old_connected_account_id
+ and old_connected_account_id != final_connected_account_id
+ ):
try:
- deleted = await service.delete_connected_account(old_connected_account_id)
+ deleted = await service.delete_connected_account(
+ old_connected_account_id
+ )
if deleted:
logger.info(
f"Deleted old Composio connected account {old_connected_account_id} "
@@ -422,7 +434,9 @@ async def list_composio_drive_folders(
)
# Get Composio connected account ID from config
- composio_connected_account_id = connector.config.get("composio_connected_account_id")
+ composio_connected_account_id = connector.config.get(
+ "composio_connected_account_id"
+ )
if not composio_connected_account_id:
raise HTTPException(
status_code=400,
@@ -451,27 +465,37 @@ async def list_composio_drive_folders(
items = []
for file_info in files:
file_id = file_info.get("id", "") or file_info.get("fileId", "")
- file_name = file_info.get("name", "") or file_info.get("fileName", "") or "Untitled"
+ file_name = (
+ file_info.get("name", "") or file_info.get("fileName", "") or "Untitled"
+ )
mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
-
+
if not file_id:
continue
is_folder = mime_type == "application/vnd.google-apps.folder"
-
- items.append({
- "id": file_id,
- "name": file_name,
- "mimeType": mime_type,
- "isFolder": is_folder,
- "parents": file_info.get("parents", []),
- "size": file_info.get("size"),
- "iconLink": file_info.get("iconLink"),
- })
+
+ items.append(
+ {
+ "id": file_id,
+ "name": file_name,
+ "mimeType": mime_type,
+ "isFolder": is_folder,
+ "parents": file_info.get("parents", []),
+ "size": file_info.get("size"),
+ "iconLink": file_info.get("iconLink"),
+ }
+ )
# Sort: folders first, then files, both alphabetically
- folders = sorted([item for item in items if item["isFolder"]], key=lambda x: x["name"].lower())
- files_list = sorted([item for item in items if not item["isFolder"]], key=lambda x: x["name"].lower())
+ folders = sorted(
+ [item for item in items if item["isFolder"]],
+ key=lambda x: x["name"].lower(),
+ )
+ files_list = sorted(
+ [item for item in items if not item["isFolder"]],
+ key=lambda x: x["name"].lower(),
+ )
items = folders + files_list
folder_count = len(folders)
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index ed306c7bc..433acac1c 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -37,7 +37,6 @@ from app.db import (
async_session_maker,
get_async_session,
)
-from app.services.composio_service import ComposioService
from app.schemas import (
GoogleDriveIndexRequest,
MCPConnectorCreate,
@@ -48,6 +47,7 @@ from app.schemas import (
SearchSourceConnectorRead,
SearchSourceConnectorUpdate,
)
+from app.services.composio_service import ComposioService
from app.services.notification_service import NotificationService
from app.tasks.connector_indexers import (
index_airtable_records,
@@ -537,11 +537,15 @@ async def delete_search_source_connector(
SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
]
if db_connector.connector_type in composio_connector_types:
- composio_connected_account_id = db_connector.config.get("composio_connected_account_id")
+ composio_connected_account_id = db_connector.config.get(
+ "composio_connected_account_id"
+ )
if composio_connected_account_id and ComposioService.is_enabled():
try:
service = ComposioService()
- deleted = await service.delete_connected_account(composio_connected_account_id)
+ deleted = await service.delete_connected_account(
+ composio_connected_account_id
+ )
if deleted:
logger.info(
f"Successfully deleted Composio connected account {composio_connected_account_id} "
@@ -897,7 +901,10 @@ async def index_connector_content(
)
response_message = "Web page indexing started in the background."
- elif connector.connector_type == SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR:
+ elif (
+ connector.connector_type
+ == SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
+ ):
from app.tasks.celery_tasks.connector_tasks import (
index_composio_connector_task,
)
@@ -907,8 +914,12 @@ async def index_connector_content(
if drive_items and drive_items.has_items():
# Update connector config with the selected folders/files
config = connector.config or {}
- config["selected_folders"] = [{"id": f.id, "name": f.name} for f in drive_items.folders]
- config["selected_files"] = [{"id": f.id, "name": f.name} for f in drive_items.files]
+ config["selected_folders"] = [
+ {"id": f.id, "name": f.name} for f in drive_items.folders
+ ]
+ config["selected_files"] = [
+ {"id": f.id, "name": f.name} for f in drive_items.files
+ ]
if drive_items.indexing_options:
config["indexing_options"] = {
"max_files_per_folder": drive_items.indexing_options.max_files_per_folder,
@@ -917,6 +928,7 @@ async def index_connector_content(
}
connector.config = config
from sqlalchemy.orm.attributes import flag_modified
+
flag_modified(connector, "config")
await session.commit()
await session.refresh(connector)
@@ -934,7 +946,9 @@ async def index_connector_content(
index_composio_connector_task.delay(
connector_id, search_space_id, str(user.id), indexing_from, indexing_to
)
- response_message = "Composio Google Drive indexing started in the background."
+ response_message = (
+ "Composio Google Drive indexing started in the background."
+ )
elif connector.connector_type in [
SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
@@ -995,7 +1009,9 @@ async def _update_connector_timestamp_by_id(session: AsyncSession, connector_id:
connector = result.scalars().first()
if connector:
- connector.last_indexed_at = datetime.now(UTC) # Use UTC for timezone consistency
+ connector.last_indexed_at = datetime.now(
+ UTC
+ ) # Use UTC for timezone consistency
await session.commit()
logger.info(f"Updated last_indexed_at for connector {connector_id}")
except Exception as e:
@@ -1150,7 +1166,9 @@ async def _run_indexing_with_notifications(
indexed_count=documents_processed,
error_message=error_or_warning, # Show errors even if some documents were indexed
)
- await session.commit() # Commit to ensure Electric SQL syncs the notification update
+ await (
+ session.commit()
+ ) # Commit to ensure Electric SQL syncs the notification update
elif documents_processed > 0:
# Update notification to storing stage
if notification:
@@ -1174,7 +1192,9 @@ async def _run_indexing_with_notifications(
indexed_count=documents_processed,
error_message=error_or_warning, # Show errors even if some documents were indexed
)
- await session.commit() # Commit to ensure Electric SQL syncs the notification update
+ await (
+ session.commit()
+ ) # Commit to ensure Electric SQL syncs the notification update
else:
# No new documents processed - check if this is an error or just no changes
if error_or_warning:
@@ -1189,7 +1209,9 @@ async def _run_indexing_with_notifications(
indexed_count=0,
error_message=error_or_warning,
)
- await session.commit() # Commit to ensure Electric SQL syncs the notification update
+ await (
+ session.commit()
+ ) # Commit to ensure Electric SQL syncs the notification update
else:
# Success - just no new documents to index (all skipped/unchanged)
logger.info(
@@ -1208,7 +1230,9 @@ async def _run_indexing_with_notifications(
indexed_count=0,
error_message=None, # No error - sync succeeded
)
- await session.commit() # Commit to ensure Electric SQL syncs the notification update
+ await (
+ session.commit()
+ ) # Commit to ensure Electric SQL syncs the notification update
except Exception as e:
logger.error(f"Error in indexing task: {e!s}", exc_info=True)
diff --git a/surfsense_backend/app/services/composio_service.py b/surfsense_backend/app/services/composio_service.py
index 1173cfb6a..0d6189cd9 100644
--- a/surfsense_backend/app/services/composio_service.py
+++ b/surfsense_backend/app/services/composio_service.py
@@ -111,7 +111,7 @@ class ComposioService:
config_toolkit = getattr(auth_config, "toolkit", None)
if config_toolkit is None:
continue
-
+
# Extract toolkit name/slug from the object
toolkit_name = None
if isinstance(config_toolkit, str):
@@ -122,18 +122,22 @@ class ComposioService:
toolkit_name = config_toolkit.name
elif hasattr(config_toolkit, "id"):
toolkit_name = config_toolkit.id
-
+
# Compare case-insensitively
if toolkit_name and toolkit_name.lower() == toolkit_id.lower():
- logger.info(f"Found auth config {auth_config.id} for toolkit {toolkit_id}")
+ logger.info(
+ f"Found auth config {auth_config.id} for toolkit {toolkit_id}"
+ )
return auth_config.id
-
+
# Log available auth configs for debugging
- logger.warning(f"No auth config found for toolkit '{toolkit_id}'. Available auth configs:")
+ logger.warning(
+ f"No auth config found for toolkit '{toolkit_id}'. Available auth configs:"
+ )
for auth_config in auth_configs.items:
config_toolkit = getattr(auth_config, "toolkit", None)
logger.warning(f" - {auth_config.id}: toolkit={config_toolkit}")
-
+
return None
except Exception as e:
logger.error(f"Failed to list auth configs: {e!s}")
@@ -162,7 +166,7 @@ class ComposioService:
try:
# First, get the auth_config_id for this toolkit
auth_config_id = self._get_auth_config_for_toolkit(toolkit_id)
-
+
if not auth_config_id:
raise ValueError(
f"No auth config found for toolkit '{toolkit_id}'. "
@@ -214,7 +218,9 @@ class ComposioService:
"user_id": getattr(account, "user_id", None),
}
except Exception as e:
- logger.error(f"Failed to get connected account {connected_account_id}: {e!s}")
+ logger.error(
+ f"Failed to get connected account {connected_account_id}: {e!s}"
+ )
return None
async def list_all_connections(self) -> list[dict[str, Any]]:
@@ -226,15 +232,17 @@ class ComposioService:
"""
try:
accounts_response = self.client.connected_accounts.list()
-
+
if hasattr(accounts_response, "items"):
accounts = accounts_response.items
elif hasattr(accounts_response, "__iter__"):
accounts = accounts_response
else:
- logger.warning(f"Unexpected accounts response type: {type(accounts_response)}")
+ logger.warning(
+ f"Unexpected accounts response type: {type(accounts_response)}"
+ )
return []
-
+
result = []
for acc in accounts:
toolkit_raw = getattr(acc, "toolkit", None)
@@ -248,14 +256,16 @@ class ComposioService:
toolkit_info = toolkit_raw.name
else:
toolkit_info = str(toolkit_raw)
-
- result.append({
- "id": acc.id,
- "status": getattr(acc, "status", None),
- "toolkit": toolkit_info,
- "user_id": getattr(acc, "user_id", None),
- })
-
+
+ result.append(
+ {
+ "id": acc.id,
+ "status": getattr(acc, "status", None),
+ "toolkit": toolkit_info,
+ "user_id": getattr(acc, "user_id", None),
+ }
+ )
+
return result
except Exception as e:
logger.error(f"Failed to list all connections: {e!s}")
@@ -273,16 +283,18 @@ class ComposioService:
"""
try:
accounts_response = self.client.connected_accounts.list(user_id=user_id)
-
+
# Handle paginated response (may have .items attribute) or direct list
if hasattr(accounts_response, "items"):
accounts = accounts_response.items
elif hasattr(accounts_response, "__iter__"):
accounts = accounts_response
else:
- logger.warning(f"Unexpected accounts response type: {type(accounts_response)}")
+ logger.warning(
+ f"Unexpected accounts response type: {type(accounts_response)}"
+ )
return []
-
+
result = []
for acc in accounts:
# Extract toolkit info - might be string or object
@@ -297,13 +309,15 @@ class ComposioService:
toolkit_info = toolkit_raw.name
else:
toolkit_info = toolkit_raw
-
- result.append({
- "id": acc.id,
- "status": getattr(acc, "status", None),
- "toolkit": toolkit_info,
- })
-
+
+ result.append(
+ {
+ "id": acc.id,
+ "status": getattr(acc, "status", None),
+ "toolkit": toolkit_info,
+ }
+ )
+
logger.info(f"Found {len(result)} connections for user {user_id}: {result}")
return result
except Exception as e:
@@ -324,10 +338,14 @@ class ComposioService:
"""
try:
self.client.connected_accounts.delete(connected_account_id)
- logger.info(f"Successfully deleted Composio connected account: {connected_account_id}")
+ logger.info(
+ f"Successfully deleted Composio connected account: {connected_account_id}"
+ )
return True
except Exception as e:
- logger.error(f"Failed to delete Composio connected account {connected_account_id}: {e!s}")
+ logger.error(
+ f"Failed to delete Composio connected account {connected_account_id}: {e!s}"
+ )
return False
async def execute_tool(
@@ -398,10 +416,14 @@ class ComposioService:
}
if folder_id:
# List contents of a specific folder (exclude shortcuts - we don't have access to them)
- params["q"] = f"'{folder_id}' in parents and trashed = false and mimeType != 'application/vnd.google-apps.shortcut'"
+ params["q"] = (
+ f"'{folder_id}' in parents and trashed = false and mimeType != 'application/vnd.google-apps.shortcut'"
+ )
else:
# List root-level items only (My Drive root), exclude shortcuts
- params["q"] = "'root' in parents and trashed = false and mimeType != 'application/vnd.google-apps.shortcut'"
+ params["q"] = (
+ "'root' in parents and trashed = false and mimeType != 'application/vnd.google-apps.shortcut'"
+ )
if page_token:
params["page_token"] = page_token
@@ -416,17 +438,21 @@ class ComposioService:
return [], None, result.get("error", "Unknown error")
data = result.get("data", {})
-
+
# Handle nested response structure from Composio
files = []
next_token = None
if isinstance(data, dict):
# Try direct access first, then nested
files = data.get("files", []) or data.get("data", {}).get("files", [])
- next_token = data.get("nextPageToken") or data.get("next_page_token") or data.get("data", {}).get("nextPageToken")
+ next_token = (
+ data.get("nextPageToken")
+ or data.get("next_page_token")
+ or data.get("data", {}).get("nextPageToken")
+ )
elif isinstance(data, list):
files = data
-
+
return files, next_token, None
except Exception as e:
@@ -459,13 +485,13 @@ class ComposioService:
return None, result.get("error", "Unknown error")
data = result.get("data")
-
+
# Composio GOOGLEDRIVE_DOWNLOAD_FILE returns a dict with file info
# The actual content is in "downloaded_file_content" field
if isinstance(data, dict):
# Try known Composio response fields in order of preference
content = None
-
+
# Primary field from GOOGLEDRIVE_DOWNLOAD_FILE
if "downloaded_file_content" in data:
content = data["downloaded_file_content"]
@@ -474,19 +500,24 @@ class ComposioService:
# Try to extract actual content from nested dict
# Note: Composio nests downloaded_file_content inside another downloaded_file_content
actual_content = (
- content.get("downloaded_file_content") or
- content.get("content") or
- content.get("data") or
- content.get("file_content") or
- content.get("body") or
- content.get("text")
+ content.get("downloaded_file_content")
+ or content.get("content")
+ or content.get("data")
+ or content.get("file_content")
+ or content.get("body")
+ or content.get("text")
)
if actual_content is not None:
content = actual_content
else:
# Log structure for debugging
- logger.warning(f"downloaded_file_content is dict with keys: {list(content.keys())}")
- return None, f"Cannot extract content from downloaded_file_content. Keys: {list(content.keys())}"
+ logger.warning(
+ f"downloaded_file_content is dict with keys: {list(content.keys())}"
+ )
+ return (
+ None,
+ f"Cannot extract content from downloaded_file_content. Keys: {list(content.keys())}",
+ )
# Fallback fields for compatibility
elif "content" in data:
content = data["content"]
@@ -494,16 +525,20 @@ class ComposioService:
content = data["file_content"]
elif "data" in data:
content = data["data"]
-
+
if content is None:
# Log available keys for debugging
logger.warning(f"Composio response dict keys: {list(data.keys())}")
- return None, f"No file content found in Composio response. Available keys: {list(data.keys())}"
-
+ return (
+ None,
+ f"No file content found in Composio response. Available keys: {list(data.keys())}",
+ )
+
# Convert content to bytes
if isinstance(content, str):
# Check if it's base64 encoded
import base64
+
try:
# Try to decode as base64 first
content = base64.b64decode(content)
@@ -514,11 +549,19 @@ class ComposioService:
pass # Already bytes
elif isinstance(content, dict):
# Still a dict after all extraction attempts - log structure
- logger.warning(f"Content still dict after extraction: {list(content.keys())}")
- return None, f"Unexpected nested content structure: {list(content.keys())}"
+ logger.warning(
+ f"Content still dict after extraction: {list(content.keys())}"
+ )
+ return (
+ None,
+ f"Unexpected nested content structure: {list(content.keys())}",
+ )
else:
- return None, f"Unexpected content type in Composio response: {type(content).__name__}"
-
+ return (
+ None,
+ f"Unexpected content type in Composio response: {type(content).__name__}",
+ )
+
return content, None
elif isinstance(data, str):
return data.encode("utf-8"), None
@@ -527,7 +570,10 @@ class ComposioService:
elif data is None:
return None, "No data returned from Composio"
else:
- return None, f"Unexpected data type from Composio: {type(data).__name__}"
+ return (
+ None,
+ f"Unexpected data type from Composio: {type(data).__name__}",
+ )
except Exception as e:
logger.error(f"Failed to get Drive file content: {e!s}")
@@ -576,17 +622,21 @@ class ComposioService:
return [], None, result.get("error", "Unknown error")
data = result.get("data", {})
-
+
# Try different possible response structures
messages = []
next_token = None
result_size_estimate = None
if isinstance(data, dict):
- messages = data.get("messages", []) or data.get("data", {}).get("messages", []) or data.get("emails", [])
+ messages = (
+ data.get("messages", [])
+ or data.get("data", {}).get("messages", [])
+ or data.get("emails", [])
+ )
# Check for pagination token in various possible locations
next_token = (
- data.get("nextPageToken")
- or data.get("next_page_token")
+ data.get("nextPageToken")
+ or data.get("next_page_token")
or data.get("data", {}).get("nextPageToken")
or data.get("data", {}).get("next_page_token")
)
@@ -599,7 +649,7 @@ class ComposioService:
)
elif isinstance(data, list):
messages = data
-
+
return messages, next_token, result_size_estimate, None
except Exception as e:
@@ -683,14 +733,18 @@ class ComposioService:
return [], result.get("error", "Unknown error")
data = result.get("data", {})
-
+
# Try different possible response structures
events = []
if isinstance(data, dict):
- events = data.get("items", []) or data.get("data", {}).get("items", []) or data.get("events", [])
+ events = (
+ data.get("items", [])
+ or data.get("data", {}).get("items", [])
+ or data.get("events", [])
+ )
elif isinstance(data, list):
events = data
-
+
return events, None
except Exception as e:
diff --git a/surfsense_backend/app/tasks/composio_indexer.py b/surfsense_backend/app/tasks/composio_indexer.py
index 6f40e6d66..e5c8b701e 100644
--- a/surfsense_backend/app/tasks/composio_indexer.py
+++ b/surfsense_backend/app/tasks/composio_indexer.py
@@ -64,10 +64,14 @@ async def check_document_by_unique_identifier(
async def get_connector_by_id(
- session: AsyncSession, connector_id: int, connector_type: SearchSourceConnectorType | None
+ session: AsyncSession,
+ connector_id: int,
+ connector_type: SearchSourceConnectorType | None,
) -> SearchSourceConnector | None:
"""Get a connector by ID and optionally by type from the database."""
- query = select(SearchSourceConnector).filter(SearchSourceConnector.id == connector_id)
+ query = select(SearchSourceConnector).filter(
+ SearchSourceConnector.id == connector_id
+ )
if connector_type is not None:
query = query.filter(SearchSourceConnector.connector_type == connector_type)
result = await session.execute(query)
@@ -81,40 +85,90 @@ async def update_connector_last_indexed(
) -> None:
"""Update the last_indexed_at timestamp for a connector."""
if update_last_indexed:
- connector.last_indexed_at = datetime.now(UTC) # Use UTC for timezone consistency
+ connector.last_indexed_at = datetime.now(
+ UTC
+ ) # Use UTC for timezone consistency
logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
# Binary file extensions that need file processor
BINARY_FILE_EXTENSIONS = {
- ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
- ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp",
- ".zip", ".tar", ".gz", ".rar", ".7z",
- ".mp3", ".mp4", ".wav", ".avi", ".mov",
- ".exe", ".dll", ".so", ".bin",
+ ".pdf",
+ ".doc",
+ ".docx",
+ ".xls",
+ ".xlsx",
+ ".ppt",
+ ".pptx",
+ ".png",
+ ".jpg",
+ ".jpeg",
+ ".gif",
+ ".bmp",
+ ".tiff",
+ ".webp",
+ ".zip",
+ ".tar",
+ ".gz",
+ ".rar",
+ ".7z",
+ ".mp3",
+ ".mp4",
+ ".wav",
+ ".avi",
+ ".mov",
+ ".exe",
+ ".dll",
+ ".so",
+ ".bin",
}
# Text file extensions that can be decoded as UTF-8
TEXT_FILE_EXTENSIONS = {
- ".txt", ".md", ".markdown", ".json", ".xml", ".html", ".htm",
- ".css", ".js", ".ts", ".py", ".java", ".c", ".cpp", ".h",
- ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf",
- ".sh", ".bash", ".zsh", ".fish",
- ".sql", ".csv", ".tsv",
- ".rst", ".tex", ".log",
+ ".txt",
+ ".md",
+ ".markdown",
+ ".json",
+ ".xml",
+ ".html",
+ ".htm",
+ ".css",
+ ".js",
+ ".ts",
+ ".py",
+ ".java",
+ ".c",
+ ".cpp",
+ ".h",
+ ".yaml",
+ ".yml",
+ ".toml",
+ ".ini",
+ ".cfg",
+ ".conf",
+ ".sh",
+ ".bash",
+ ".zsh",
+ ".fish",
+ ".sql",
+ ".csv",
+ ".tsv",
+ ".rst",
+ ".tex",
+ ".log",
}
def _is_binary_file(file_name: str, mime_type: str) -> bool:
"""Check if a file is binary based on extension or mime type."""
extension = Path(file_name).suffix.lower()
-
+
# Check extension first
if extension in BINARY_FILE_EXTENSIONS:
return True
if extension in TEXT_FILE_EXTENSIONS:
return False
-
+
# Check mime type
if mime_type:
if mime_type.startswith(("image/", "audio/", "video/", "application/pdf")):
@@ -122,9 +176,13 @@ def _is_binary_file(file_name: str, mime_type: str) -> bool:
if mime_type.startswith(("text/", "application/json", "application/xml")):
return False
# Office documents
- if "spreadsheet" in mime_type or "document" in mime_type or "presentation" in mime_type:
+ if (
+ "spreadsheet" in mime_type
+ or "document" in mime_type
+ or "presentation" in mime_type
+ ):
return True
-
+
# Default to text for unknown types
return False
@@ -143,10 +201,10 @@ async def _process_file_content(
) -> str:
"""
Process file content and return markdown text.
-
+
For binary files (PDFs, images, etc.), uses Surfsense's ETL service.
For text files, decodes as UTF-8.
-
+
Args:
content: File content as bytes or string
file_name: Name of the file
@@ -158,14 +216,14 @@ async def _process_file_content(
task_logger: Task logging service
log_entry: Log entry for tracking
processing_errors: List to append errors to
-
+
Returns:
Markdown content string
"""
# Ensure content is bytes
if isinstance(content, str):
content = content.encode("utf-8")
-
+
# Check if this is a binary file
if _is_binary_file(file_name, mime_type):
# Use ETL service for binary files (PDF, Office docs, etc.)
@@ -173,24 +231,26 @@ async def _process_file_content(
try:
# Get file extension
extension = Path(file_name).suffix or ".bin"
-
+
# Write to temp file
- with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as tmp_file:
+ with tempfile.NamedTemporaryFile(
+ delete=False, suffix=extension
+ ) as tmp_file:
tmp_file.write(content)
temp_file_path = tmp_file.name
-
+
# Use the configured ETL service to extract text
extracted_text = await _extract_text_with_etl(
temp_file_path, file_name, task_logger, log_entry
)
-
+
if extracted_text:
return extracted_text
else:
# Fallback if extraction fails
logger.warning(f"Could not extract text from binary file {file_name}")
return f"# {file_name}\n\n[Binary file - text extraction failed]\n\n**File ID:** {file_id}\n**Type:** {mime_type}\n"
-
+
except Exception as e:
error_msg = f"Error processing binary file {file_name}: {e!s}"
logger.error(error_msg)
@@ -214,7 +274,7 @@ async def _process_file_content(
return content.decode(encoding)
except UnicodeDecodeError:
continue
-
+
# If all encodings fail, treat as binary
error_msg = f"Could not decode text file {file_name} with any encoding"
logger.warning(error_msg)
@@ -230,27 +290,27 @@ async def _extract_text_with_etl(
) -> str | None:
"""
Extract text from a file using the configured ETL service.
-
+
Args:
file_path: Path to the file
file_name: Name of the file
task_logger: Task logging service
log_entry: Log entry for tracking
-
+
Returns:
Extracted text as markdown, or None if extraction fails
"""
import warnings
from logging import ERROR, getLogger
-
+
etl_service = config.ETL_SERVICE
-
+
try:
if etl_service == "UNSTRUCTURED":
from langchain_unstructured import UnstructuredLoader
from app.utils.document_converters import convert_document_to_markdown
-
+
loader = UnstructuredLoader(
file_path,
mode="elements",
@@ -260,57 +320,67 @@ async def _extract_text_with_etl(
include_metadata=False,
strategy="auto",
)
-
+
docs = await loader.aload()
if docs:
return await convert_document_to_markdown(docs)
return None
-
+
elif etl_service == "LLAMACLOUD":
- from app.tasks.document_processors.file_processors import parse_with_llamacloud_retry
-
+ from app.tasks.document_processors.file_processors import (
+ parse_with_llamacloud_retry,
+ )
+
# Estimate pages (rough estimate based on file size)
file_size = os.path.getsize(file_path)
estimated_pages = max(1, file_size // (80 * 1024))
-
+
result = await parse_with_llamacloud_retry(
file_path=file_path,
estimated_pages=estimated_pages,
task_logger=task_logger,
log_entry=log_entry,
)
-
- markdown_documents = await result.aget_markdown_documents(split_by_page=False)
+
+ markdown_documents = await result.aget_markdown_documents(
+ split_by_page=False
+ )
if markdown_documents:
return markdown_documents[0].text
return None
-
+
elif etl_service == "DOCLING":
from app.services.docling_service import create_docling_service
-
+
docling_service = create_docling_service()
-
+
# Suppress pdfminer warnings
pdfminer_logger = getLogger("pdfminer")
original_level = pdfminer_logger.level
-
+
with warnings.catch_warnings():
- warnings.filterwarnings("ignore", category=UserWarning, module="pdfminer")
- warnings.filterwarnings("ignore", message=".*Cannot set gray non-stroke color.*")
+ warnings.filterwarnings(
+ "ignore", category=UserWarning, module="pdfminer"
+ )
+ warnings.filterwarnings(
+ "ignore", message=".*Cannot set gray non-stroke color.*"
+ )
warnings.filterwarnings("ignore", message=".*invalid float value.*")
-
+
pdfminer_logger.setLevel(ERROR)
-
+
try:
- result = await docling_service.process_document(file_path, file_name)
+ result = await docling_service.process_document(
+ file_path, file_name
+ )
finally:
pdfminer_logger.setLevel(original_level)
-
+
return result.get("content")
else:
logger.warning(f"Unknown ETL service: {etl_service}")
return None
-
+
except Exception as e:
logger.error(f"ETL extraction failed for {file_name}: {e!s}")
return None
@@ -367,9 +437,11 @@ async def index_composio_connector(
# Get connector by id - accept any Composio connector type
# We'll check the actual type after loading
connector = await get_connector_by_id(
- session, connector_id, None # Don't filter by type, we'll validate after
+ session,
+ connector_id,
+ None, # Don't filter by type, we'll validate after
)
-
+
# Validate it's a Composio connector
if connector and connector.connector_type not in [
SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
@@ -392,7 +464,9 @@ async def index_composio_connector(
# Get toolkit ID from config
toolkit_id = connector.config.get("toolkit_id")
if not toolkit_id:
- error_msg = f"Composio connector {connector_id} has no toolkit_id configured"
+ error_msg = (
+ f"Composio connector {connector_id} has no toolkit_id configured"
+ )
await task_logger.log_task_failure(
log_entry, error_msg, {"error_type": "MissingToolkitId"}
)
@@ -488,7 +562,7 @@ async def _index_composio_google_drive(
max_items: int = 1000,
) -> tuple[int, str]:
"""Index Google Drive files via Composio.
-
+
Supports folder/file selection via connector config:
- selected_folders: List of {id, name} for folders to index
- selected_files: List of {id, name} for individual files to index
@@ -502,14 +576,18 @@ async def _index_composio_google_drive(
selected_folders = connector_config.get("selected_folders", [])
selected_files = connector_config.get("selected_files", [])
indexing_options = connector_config.get("indexing_options", {})
-
+
max_files_per_folder = indexing_options.get("max_files_per_folder", 100)
include_subfolders = indexing_options.get("include_subfolders", True)
await task_logger.log_task_progress(
log_entry,
f"Fetching Google Drive files via Composio for connector {connector_id}",
- {"stage": "fetching_files", "selected_folders": len(selected_folders), "selected_files": len(selected_files)},
+ {
+ "stage": "fetching_files",
+ "selected_folders": len(selected_folders),
+ "selected_files": len(selected_files),
+ },
)
all_files = []
@@ -520,34 +598,42 @@ async def _index_composio_google_drive(
for folder in selected_folders:
folder_id = folder.get("id")
folder_name = folder.get("name", "Unknown")
-
+
if not folder_id:
continue
-
+
# Handle special case for "root" folder
actual_folder_id = None if folder_id == "root" else folder_id
-
+
logger.info(f"Fetching files from folder: {folder_name} ({folder_id})")
-
+
# Fetch files from this folder
folder_files = []
page_token = None
-
+
while len(folder_files) < max_files_per_folder:
- files, next_token, error = await composio_connector.list_drive_files(
+ (
+ files,
+ next_token,
+ error,
+ ) = await composio_connector.list_drive_files(
folder_id=actual_folder_id,
page_token=page_token,
page_size=min(100, max_files_per_folder - len(folder_files)),
)
if error:
- logger.warning(f"Failed to fetch files from folder {folder_name}: {error}")
+ logger.warning(
+ f"Failed to fetch files from folder {folder_name}: {error}"
+ )
break
# Process files
for file_info in files:
- mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
-
+ mime_type = file_info.get("mimeType", "") or file_info.get(
+ "mime_type", ""
+ )
+
# If it's a folder and include_subfolders is enabled, recursively fetch
if mime_type == "application/vnd.google-apps.folder":
if include_subfolders:
@@ -565,7 +651,7 @@ async def _index_composio_google_drive(
if not next_token:
break
page_token = next_token
-
+
all_files.extend(folder_files[:max_files_per_folder])
logger.info(f"Found {len(folder_files)} files in folder {folder_name}")
@@ -573,16 +659,18 @@ async def _index_composio_google_drive(
for selected_file in selected_files:
file_id = selected_file.get("id")
file_name = selected_file.get("name", "Unknown")
-
+
if not file_id:
continue
-
+
# Add file info (we'll fetch content later during indexing)
- all_files.append({
- "id": file_id,
- "name": file_name,
- "mimeType": "", # Will be determined later
- })
+ all_files.append(
+ {
+ "id": file_id,
+ "name": file_name,
+ "mimeType": "", # Will be determined later
+ }
+ )
else:
# No selection specified - fetch all files (original behavior)
page_token = None
@@ -613,7 +701,10 @@ async def _index_composio_google_drive(
# CRITICAL: Update timestamp even when no files found so Electric SQL syncs and UI shows indexed status
await update_connector_last_indexed(session, connector, update_last_indexed)
await session.commit()
- return 0, None # Return None (not error) when no items found - this is success with 0 items
+ return (
+ 0,
+ None,
+ ) # Return None (not error) when no items found - this is success with 0 items
logger.info(f"Found {len(all_files)} Google Drive files to index via Composio")
@@ -625,8 +716,14 @@ async def _index_composio_google_drive(
try:
# Handle both standard Google API and potential Composio variations
file_id = file_info.get("id", "") or file_info.get("fileId", "")
- file_name = file_info.get("name", "") or file_info.get("fileName", "") or "Untitled"
- mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
+ file_name = (
+ file_info.get("name", "")
+ or file_info.get("fileName", "")
+ or "Untitled"
+ )
+ mime_type = file_info.get("mimeType", "") or file_info.get(
+ "mime_type", ""
+ )
if not file_id:
documents_skipped += 1
@@ -648,12 +745,15 @@ async def _index_composio_google_drive(
)
# Get file content
- content, content_error = await composio_connector.get_drive_file_content(
- file_id
- )
+ (
+ content,
+ content_error,
+ ) = await composio_connector.get_drive_file_content(file_id)
if content_error or not content:
- logger.warning(f"Could not get content for file {file_name}: {content_error}")
+ logger.warning(
+ f"Could not get content for file {file_name}: {content_error}"
+ )
# Use metadata as content fallback
markdown_content = f"# {file_name}\n\n"
markdown_content += f"**File ID:** {file_id}\n"
@@ -700,12 +800,19 @@ async def _index_composio_google_drive(
"mime_type": mime_type,
"document_type": "Google Drive File (Composio)",
}
- summary_content, summary_embedding = await generate_document_summary(
+ (
+ summary_content,
+ summary_embedding,
+ ) = await generate_document_summary(
markdown_content, user_llm, document_metadata
)
else:
- summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
- summary_embedding = config.embedding_model_instance.embed(summary_content)
+ summary_content = (
+ f"Google Drive File: {file_name}\n\nType: {mime_type}"
+ )
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
chunks = await create_document_chunks(markdown_content)
@@ -724,8 +831,8 @@ async def _index_composio_google_drive(
existing_document.updated_at = get_current_timestamp()
documents_indexed += 1
-
- # Batch commit every 10 documents
+
+ # Batch commit every 10 documents
if documents_indexed % 10 == 0:
logger.info(
f"Committing batch: {documents_indexed} Google Drive files processed so far"
@@ -745,12 +852,19 @@ async def _index_composio_google_drive(
"mime_type": mime_type,
"document_type": "Google Drive File (Composio)",
}
- summary_content, summary_embedding = await generate_document_summary(
+ (
+ summary_content,
+ summary_embedding,
+ ) = await generate_document_summary(
markdown_content, user_llm, document_metadata
)
else:
- summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
- summary_embedding = config.embedding_model_instance.embed(summary_content)
+ summary_content = (
+ f"Google Drive File: {file_name}\n\nType: {mime_type}"
+ )
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
chunks = await create_document_chunks(markdown_content)
@@ -776,7 +890,7 @@ async def _index_composio_google_drive(
session.add(document)
documents_indexed += 1
- # Batch commit every 10 documents
+ # Batch commit every 10 documents
if documents_indexed % 10 == 0:
logger.info(
f"Committing batch: {documents_indexed} Google Drive files processed so far"
@@ -784,7 +898,9 @@ async def _index_composio_google_drive(
await session.commit()
except Exception as e:
- error_msg = f"Error processing Drive file {file_name or 'unknown'}: {e!s}"
+ error_msg = (
+ f"Error processing Drive file {file_name or 'unknown'}: {e!s}"
+ )
logger.error(error_msg, exc_info=True)
processing_errors.append(error_msg)
documents_skipped += 1
@@ -848,7 +964,7 @@ async def _fetch_folder_files_recursively(
) -> list[dict[str, Any]]:
"""
Recursively fetch files from a Google Drive folder via Composio.
-
+
Args:
composio_connector: The Composio connector instance
folder_id: Google Drive folder ID
@@ -856,20 +972,20 @@ async def _fetch_folder_files_recursively(
current_count: Current number of files already fetched
depth: Current recursion depth
max_depth: Maximum recursion depth to prevent infinite loops
-
+
Returns:
List of file info dictionaries
"""
if depth >= max_depth:
logger.warning(f"Max recursion depth reached for folder {folder_id}")
return []
-
+
if current_count >= max_files:
return []
-
+
all_files = []
page_token = None
-
+
try:
while len(all_files) + current_count < max_files:
files, next_token, error = await composio_connector.list_drive_files(
@@ -877,14 +993,18 @@ async def _fetch_folder_files_recursively(
page_token=page_token,
page_size=min(100, max_files - len(all_files) - current_count),
)
-
+
if error:
- logger.warning(f"Error fetching files from subfolder {folder_id}: {error}")
+ logger.warning(
+ f"Error fetching files from subfolder {folder_id}: {error}"
+ )
break
-
+
for file_info in files:
- mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
-
+ mime_type = file_info.get("mimeType", "") or file_info.get(
+ "mime_type", ""
+ )
+
if mime_type == "application/vnd.google-apps.folder":
# Recursively fetch from subfolders
subfolder_files = await _fetch_folder_files_recursively(
@@ -898,16 +1018,16 @@ async def _fetch_folder_files_recursively(
all_files.extend(subfolder_files)
else:
all_files.append(file_info)
-
+
if len(all_files) + current_count >= max_files:
break
-
+
if not next_token:
break
page_token = next_token
-
- return all_files[:max_files - current_count]
-
+
+ return all_files[: max_files - current_count]
+
except Exception as e:
logger.error(f"Error in recursive folder fetch: {e!s}")
return all_files
@@ -924,10 +1044,10 @@ async def _process_gmail_message_batch(
) -> tuple[int, int]:
"""
Process a batch of Gmail messages and index them.
-
+
Args:
total_documents_indexed: Running total of documents indexed so far (for batch commits).
-
+
Returns:
Tuple of (documents_indexed, documents_skipped)
"""
@@ -965,7 +1085,9 @@ async def _process_gmail_message_batch(
date_str = value
# Format to markdown using the full message data
- markdown_content = composio_connector.format_gmail_message_to_markdown(message)
+ markdown_content = composio_connector.format_gmail_message_to_markdown(
+ message
+ )
# Check for empty content (defensive parsing per Composio best practices)
if not markdown_content.strip():
@@ -1008,12 +1130,19 @@ async def _process_gmail_message_batch(
"sender": sender,
"document_type": "Gmail Message (Composio)",
}
- summary_content, summary_embedding = await generate_document_summary(
+ (
+ summary_content,
+ summary_embedding,
+ ) = await generate_document_summary(
markdown_content, user_llm, document_metadata
)
else:
- summary_content = f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
- summary_embedding = config.embedding_model_instance.embed(summary_content)
+ summary_content = (
+ f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
+ )
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
chunks = await create_document_chunks(markdown_content)
@@ -1035,8 +1164,8 @@ async def _process_gmail_message_batch(
existing_document.updated_at = get_current_timestamp()
documents_indexed += 1
-
- # Batch commit every 10 documents
+
+ # Batch commit every 10 documents
current_total = total_documents_indexed + documents_indexed
if current_total % 10 == 0:
logger.info(
@@ -1062,8 +1191,12 @@ async def _process_gmail_message_batch(
markdown_content, user_llm, document_metadata
)
else:
- summary_content = f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
- summary_embedding = config.embedding_model_instance.embed(summary_content)
+ summary_content = (
+ f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
+ )
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
chunks = await create_document_chunks(markdown_content)
@@ -1092,7 +1225,7 @@ async def _process_gmail_message_batch(
session.add(document)
documents_indexed += 1
- # Batch commit every 10 documents
+ # Batch commit every 10 documents
current_total = total_documents_indexed + documents_indexed
if current_total % 10 == 0:
logger.info(
@@ -1107,7 +1240,9 @@ async def _process_gmail_message_batch(
try:
await session.rollback()
except Exception as rollback_error:
- logger.error(f"Error during rollback: {rollback_error!s}", exc_info=True)
+ logger.error(
+ f"Error during rollback: {rollback_error!s}", exc_info=True
+ )
continue
return documents_indexed, documents_skipped
@@ -1169,7 +1304,9 @@ async def _index_composio_gmail(
current_batch_size = min(batch_size, remaining)
# Use result_size_estimate if available, otherwise fall back to max_items
- estimated_total = result_size_estimate if result_size_estimate is not None else max_items
+ estimated_total = (
+ result_size_estimate if result_size_estimate is not None else max_items
+ )
# Cap estimated_total at max_items to avoid showing misleading progress
estimated_total = min(estimated_total, max_items)
@@ -1187,7 +1324,12 @@ async def _index_composio_gmail(
)
# Fetch batch of messages
- messages, next_token, result_size_estimate_batch, error = await composio_connector.list_gmail_messages(
+ (
+ messages,
+ next_token,
+ result_size_estimate_batch,
+ error,
+ ) = await composio_connector.list_gmail_messages(
query=query,
max_results=current_batch_size,
page_token=page_token,
@@ -1206,13 +1348,17 @@ async def _index_composio_gmail(
# Update result_size_estimate from first response (Gmail provides this estimate)
if result_size_estimate is None and result_size_estimate_batch is not None:
result_size_estimate = result_size_estimate_batch
- logger.info(f"Gmail API estimated {result_size_estimate} total messages for query: '{query}'")
+ logger.info(
+ f"Gmail API estimated {result_size_estimate} total messages for query: '{query}'"
+ )
total_messages_fetched += len(messages)
# Recalculate estimated_total after potentially updating result_size_estimate
- estimated_total = result_size_estimate if result_size_estimate is not None else max_items
+ estimated_total = (
+ result_size_estimate if result_size_estimate is not None else max_items
+ )
estimated_total = min(estimated_total, max_items)
-
+
logger.info(
f"Fetched batch of {len(messages)} Gmail messages "
f"(total: {total_messages_fetched}/{estimated_total})"
@@ -1357,7 +1503,10 @@ async def _index_composio_google_calendar(
# CRITICAL: Update timestamp even when no events found so Electric SQL syncs and UI shows indexed status
await update_connector_last_indexed(session, connector, update_last_indexed)
await session.commit()
- return 0, None # Return None (not error) when no items found - this is success with 0 items
+ return (
+ 0,
+ None,
+ ) # Return None (not error) when no items found - this is success with 0 items
logger.info(f"Found {len(events)} Google Calendar events to index via Composio")
@@ -1368,14 +1517,18 @@ async def _index_composio_google_calendar(
try:
# Handle both standard Google API and potential Composio variations
event_id = event.get("id", "") or event.get("eventId", "")
- summary = event.get("summary", "") or event.get("title", "") or "No Title"
+ summary = (
+ event.get("summary", "") or event.get("title", "") or "No Title"
+ )
if not event_id:
documents_skipped += 1
continue
# Format to markdown
- markdown_content = composio_connector.format_calendar_event_to_markdown(event)
+ markdown_content = composio_connector.format_calendar_event_to_markdown(
+ event
+ )
# Generate unique identifier
document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"])
@@ -1413,14 +1566,19 @@ async def _index_composio_google_calendar(
"start_time": start_time,
"document_type": "Google Calendar Event (Composio)",
}
- summary_content, summary_embedding = await generate_document_summary(
+ (
+ summary_content,
+ summary_embedding,
+ ) = await generate_document_summary(
markdown_content, user_llm, document_metadata
)
else:
summary_content = f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
if location:
summary_content += f"\nLocation: {location}"
- summary_embedding = config.embedding_model_instance.embed(summary_content)
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
chunks = await create_document_chunks(markdown_content)
@@ -1441,8 +1599,8 @@ async def _index_composio_google_calendar(
existing_document.updated_at = get_current_timestamp()
documents_indexed += 1
-
- # Batch commit every 10 documents
+
+ # Batch commit every 10 documents
if documents_indexed % 10 == 0:
logger.info(
f"Committing batch: {documents_indexed} Google Calendar events processed so far"
@@ -1462,21 +1620,30 @@ async def _index_composio_google_calendar(
"start_time": start_time,
"document_type": "Google Calendar Event (Composio)",
}
- summary_content, summary_embedding = await generate_document_summary(
+ (
+ summary_content,
+ summary_embedding,
+ ) = await generate_document_summary(
markdown_content, user_llm, document_metadata
)
else:
- summary_content = f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
+ summary_content = (
+ f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
+ )
if location:
summary_content += f"\nLocation: {location}"
- summary_embedding = config.embedding_model_instance.embed(summary_content)
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
chunks = await create_document_chunks(markdown_content)
document = Document(
search_space_id=search_space_id,
title=f"Calendar: {summary}",
- document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"]),
+ document_type=DocumentType(
+ TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"]
+ ),
document_metadata={
"event_id": event_id,
"summary": summary,
@@ -1497,7 +1664,7 @@ async def _index_composio_google_calendar(
session.add(document)
documents_indexed += 1
- # Batch commit every 10 documents
+ # Batch commit every 10 documents
if documents_indexed % 10 == 0:
logger.info(
f"Committing batch: {documents_indexed} Google Calendar events processed so far"
@@ -1535,5 +1702,7 @@ async def _index_composio_google_calendar(
return documents_indexed, None
except Exception as e:
- logger.error(f"Failed to index Google Calendar via Composio: {e!s}", exc_info=True)
+ logger.error(
+ f"Failed to index Google Calendar via Composio: {e!s}", exc_info=True
+ )
return 0, f"Failed to index Google Calendar via Composio: {e!s}"
diff --git a/surfsense_backend/app/tasks/connector_indexers/__init__.py b/surfsense_backend/app/tasks/connector_indexers/__init__.py
index 35b5fde4c..8f25e6fdd 100644
--- a/surfsense_backend/app/tasks/connector_indexers/__init__.py
+++ b/surfsense_backend/app/tasks/connector_indexers/__init__.py
@@ -26,6 +26,7 @@ Available indexers:
# Calendar and scheduling
from .airtable_indexer import index_airtable_records
from .bookstack_indexer import index_bookstack_pages
+
# Note: composio_indexer is imported directly in connector_tasks.py to avoid circular imports
from .clickup_indexer import index_clickup_tasks
from .confluence_indexer import index_confluence_pages
diff --git a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
index f16ee0156..4a8df4918 100644
--- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
@@ -128,7 +128,9 @@ async def index_github_repos(
if github_pat:
logger.info("Using GitHub PAT for authentication (private repos supported)")
else:
- logger.info("No GitHub PAT provided - only public repositories can be indexed")
+ logger.info(
+ "No GitHub PAT provided - only public repositories can be indexed"
+ )
# 3. Initialize GitHub connector with gitingest backend
await task_logger.log_task_progress(
@@ -308,9 +310,7 @@ async def _process_repository_digest(
if existing_document:
# Document exists - check if content has changed
if existing_document.content_hash == content_hash:
- logger.info(
- f"Repository {repo_full_name} unchanged. Skipping."
- )
+ logger.info(f"Repository {repo_full_name} unchanged. Skipping.")
return 0
else:
logger.info(
@@ -341,7 +341,7 @@ async def _process_repository_digest(
summary_content = (
f"# Repository: {repo_full_name}\n\n"
f"## File Structure\n\n{digest.tree}\n\n"
- f"## File Contents (truncated)\n\n{digest.content[:MAX_DIGEST_CHARS - len(digest.tree) - 200]}..."
+ f"## File Contents (truncated)\n\n{digest.content[: MAX_DIGEST_CHARS - len(digest.tree) - 200]}..."
)
summary_text, summary_embedding = await generate_document_summary(
@@ -362,9 +362,7 @@ async def _process_repository_digest(
# This preserves file-level granularity in search
chunks_data = await create_document_chunks(digest.content)
except Exception as chunk_err:
- logger.error(
- f"Failed to chunk repository {repo_full_name}: {chunk_err}"
- )
+ logger.error(f"Failed to chunk repository {repo_full_name}: {chunk_err}")
# Fall back to a simpler chunking approach
chunks_data = await _simple_chunk_content(digest.content)
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
index 255d0cef4..fdff956e5 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
@@ -211,7 +211,9 @@ export const ComposioConfig: FC = ({ connector, onConfigCha
);
}
if (selectedFiles.length > 0) {
- parts.push(`${selectedFiles.length} file${selectedFiles.length > 1 ? "s" : ""}`);
+ parts.push(
+ `${selectedFiles.length} file${selectedFiles.length > 1 ? "s" : ""}`
+ );
}
return parts.length > 0 ? `(${parts.join(" ")})` : "";
})()}
@@ -338,7 +340,9 @@ export const ComposioConfig: FC = ({ connector, onConfigCha
handleIndexingOptionChange("include_subfolders", checked)}
+ onCheckedChange={(checked) =>
+ handleIndexingOptionChange("include_subfolders", checked)
+ }
/>
diff --git a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-popup.schemas.ts b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-popup.schemas.ts
index c7e77f666..5a0a8e8c8 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-popup.schemas.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-popup.schemas.ts
@@ -7,7 +7,9 @@ import { searchSourceConnectorTypeEnum } from "@/contracts/types/connector.types
export const connectorPopupQueryParamsSchema = z.object({
modal: z.enum(["connectors"]).optional(),
tab: z.enum(["all", "active"]).optional(),
- view: z.enum(["configure", "edit", "connect", "youtube", "accounts", "mcp-list", "composio"]).optional(),
+ view: z
+ .enum(["configure", "edit", "connect", "youtube", "accounts", "mcp-list", "composio"])
+ .optional(),
connector: z.string().optional(),
connectorId: z.string().optional(),
connectorType: z.string().optional(),
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index 1be8a7983..b30337de3 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -26,7 +26,11 @@ import {
import { cacheKeys } from "@/lib/query-client/cache-keys";
import { queryClient } from "@/lib/query-client/client";
import type { IndexingConfigState } from "../constants/connector-constants";
-import { COMPOSIO_CONNECTORS, OAUTH_CONNECTORS, OTHER_CONNECTORS } from "../constants/connector-constants";
+import {
+ COMPOSIO_CONNECTORS,
+ OAUTH_CONNECTORS,
+ OTHER_CONNECTORS,
+} from "../constants/connector-constants";
import {
dateRangeSchema,
frequencyMinutesSchema,
@@ -83,7 +87,6 @@ export const useConnectorDialog = () => {
// MCP list view state (for managing multiple MCP connectors)
const [viewingMCPList, setViewingMCPList] = useState(false);
-
// Track if we came from accounts list when entering edit mode
const [cameFromAccountsList, setCameFromAccountsList] = useState<{
connectorType: string;
@@ -164,16 +167,14 @@ export const useConnectorDialog = () => {
// Handle accounts view
if (params.view === "accounts" && params.connectorType) {
// Update state if not set, or if connectorType has changed
- const needsUpdate = !viewingAccountsType ||
- viewingAccountsType.connectorType !== params.connectorType;
-
+ const needsUpdate =
+ !viewingAccountsType || viewingAccountsType.connectorType !== params.connectorType;
+
if (needsUpdate) {
// Check both OAUTH_CONNECTORS and COMPOSIO_CONNECTORS
- const oauthConnector = OAUTH_CONNECTORS.find(
- (c) => c.connectorType === params.connectorType
- ) || COMPOSIO_CONNECTORS.find(
- (c) => c.connectorType === params.connectorType
- );
+ const oauthConnector =
+ OAUTH_CONNECTORS.find((c) => c.connectorType === params.connectorType) ||
+ COMPOSIO_CONNECTORS.find((c) => c.connectorType === params.connectorType);
if (oauthConnector) {
setViewingAccountsType({
connectorType: oauthConnector.connectorType,
@@ -395,11 +396,8 @@ export const useConnectorDialog = () => {
// Check if authEndpoint already has query parameters
const separator = connector.authEndpoint.includes("?") ? "&" : "?";
const url = `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}${connector.authEndpoint}${separator}space_id=${searchSpaceId}`;
-
- const response = await authenticatedFetch(
- url,
- { method: "GET" }
- );
+
+ const response = await authenticatedFetch(url, { method: "GET" });
if (!response.ok) {
throw new Error(`Failed to initiate ${connector.title} OAuth`);
diff --git a/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx b/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx
index ffe879d5d..6b38a37d2 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx
@@ -4,7 +4,12 @@ import type { FC } from "react";
import { EnumConnectorName } from "@/contracts/enums/connector";
import type { SearchSourceConnector } from "@/contracts/types/connector.types";
import { ConnectorCard } from "../components/connector-card";
-import { CRAWLERS, OAUTH_CONNECTORS, OTHER_CONNECTORS, COMPOSIO_CONNECTORS } from "../constants/connector-constants";
+import {
+ CRAWLERS,
+ OAUTH_CONNECTORS,
+ OTHER_CONNECTORS,
+ COMPOSIO_CONNECTORS,
+} from "../constants/connector-constants";
import { getDocumentCountForConnector } from "../utils/connector-document-mapping";
/**
@@ -28,7 +33,9 @@ interface AllConnectorsTabProps {
allConnectors: SearchSourceConnector[] | undefined;
documentTypeCounts?: Record;
indexingConnectorIds?: Set;
- onConnectOAuth: (connector: (typeof OAUTH_CONNECTORS)[number] | (typeof COMPOSIO_CONNECTORS)[number]) => void;
+ onConnectOAuth: (
+ connector: (typeof OAUTH_CONNECTORS)[number] | (typeof COMPOSIO_CONNECTORS)[number]
+ ) => void;
onConnectNonOAuth?: (connectorType: string) => void;
onCreateWebcrawler?: () => void;
onCreateYouTubeCrawler?: () => void;
@@ -82,7 +89,9 @@ export const AllConnectorsTab: FC = ({
{filteredComposio.length > 0 && (
-
Managed OAuth (Composio)
+
+ Managed OAuth (Composio)
+
{filteredComposio.map((connector) => {
@@ -99,7 +108,6 @@ export const AllConnectorsTab: FC
= ({
const accountCount = typeConnectors.length;
-
const documentCount = getDocumentCountForConnector(
connector.connectorType,
documentTypeCounts
@@ -154,7 +162,6 @@ export const AllConnectorsTab: FC = ({
const accountCount = typeConnectors.length;
-
const documentCount = getDocumentCountForConnector(
connector.connectorType,
documentTypeCounts
diff --git a/surfsense_web/components/connectors/composio-drive-folder-tree.tsx b/surfsense_web/components/connectors/composio-drive-folder-tree.tsx
index 72c36edd5..76ae218cb 100644
--- a/surfsense_web/components/connectors/composio-drive-folder-tree.tsx
+++ b/surfsense_web/components/connectors/composio-drive-folder-tree.tsx
@@ -362,4 +362,3 @@ export function ComposioDriveFolderTree({
);
}
-
diff --git a/surfsense_web/hooks/use-composio-drive-folders.ts b/surfsense_web/hooks/use-composio-drive-folders.ts
index af8da1a81..31e516286 100644
--- a/surfsense_web/hooks/use-composio-drive-folders.ts
+++ b/surfsense_web/hooks/use-composio-drive-folders.ts
@@ -26,4 +26,3 @@ export function useComposioDriveFolders({
retry: 2,
});
}
-
From 6a41b0f6080f9d050a662fa8d0909f282c1c83d4 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 14:12:57 +0530
Subject: [PATCH 08/51] feat: enhance Composio and Google Drive connector
functionality
- Added support for extracting connected account IDs from query parameters in the Composio callback, accommodating both camelCase and snake_case formats.
- Improved logging for received query parameters in the Composio callback to enhance debugging.
- Updated Google Drive folder listing logs to remove unnecessary emoji for consistency.
- Expanded the connector dialog to include Composio Google Drive as a recognized connector type, improving user interface clarity.
---
.../app/routes/composio_routes.py | 21 ++++++++++---------
.../google_drive_add_connector_route.py | 2 +-
.../hooks/use-connector-dialog.ts | 7 +++++--
.../layout/ui/sidebar/InboxSidebar.tsx | 3 +++
4 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/surfsense_backend/app/routes/composio_routes.py b/surfsense_backend/app/routes/composio_routes.py
index dec9beb02..5af332760 100644
--- a/surfsense_backend/app/routes/composio_routes.py
+++ b/surfsense_backend/app/routes/composio_routes.py
@@ -14,7 +14,7 @@ Endpoints:
import logging
from uuid import UUID
-from fastapi import APIRouter, Depends, HTTPException, Query
+from fastapi import APIRouter, Depends, HTTPException, Query, Request
from fastapi.responses import RedirectResponse
from pydantic import ValidationError
from sqlalchemy.exc import IntegrityError
@@ -170,9 +170,8 @@ async def initiate_composio_auth(
@router.get("/auth/composio/connector/callback")
async def composio_callback(
+ request: Request,
state: str | None = None,
- connectedAccountId: str | None = None, # Composio sends camelCase
- connected_account_id: str | None = None, # Fallback snake_case
error: str | None = None,
session: AsyncSession = Depends(get_async_session),
):
@@ -239,14 +238,16 @@ async def composio_callback(
# Initialize Composio service
service = ComposioService()
- entity_id = f"surfsense_{user_id}"
- # Use camelCase param if provided (Composio's format), fallback to snake_case
- final_connected_account_id = connectedAccountId or connected_account_id
+ # Extract connected_account_id from query params (accepts both camelCase and snake_case)
+ query_params = request.query_params
+ final_connected_account_id = query_params.get(
+ "connectedAccountId"
+ ) or query_params.get("connected_account_id")
- # DEBUG: Log all query parameters received
+ # DEBUG: Log query parameter received
logger.info(
- f"DEBUG: Callback received - connectedAccountId: {connectedAccountId}, connected_account_id: {connected_account_id}, using: {final_connected_account_id}"
+ f"DEBUG: Callback received - connectedAccountId: {query_params.get('connectedAccountId')}, connected_account_id: {query_params.get('connected_account_id')}, using: {final_connected_account_id}"
)
# If we still don't have a connected_account_id, warn but continue
@@ -448,7 +449,7 @@ async def list_composio_drive_folders(
entity_id = f"surfsense_{user.id}"
# Fetch files/folders from Composio Google Drive
- files, next_token, error = await service.get_drive_files(
+ files, _next_token, error = await service.get_drive_files(
connected_account_id=composio_connected_account_id,
entity_id=entity_id,
folder_id=parent_id,
@@ -502,7 +503,7 @@ async def list_composio_drive_folders(
file_count = len(files_list)
logger.info(
- f"✅ Listed {len(items)} total items ({folder_count} folders, {file_count} files) for Composio connector {connector_id}"
+ f"Listed {len(items)} total items ({folder_count} folders, {file_count} files) for Composio connector {connector_id}"
+ (f" in folder {parent_id}" if parent_id else " in ROOT")
)
diff --git a/surfsense_backend/app/routes/google_drive_add_connector_route.py b/surfsense_backend/app/routes/google_drive_add_connector_route.py
index e15aed762..6b4159d29 100644
--- a/surfsense_backend/app/routes/google_drive_add_connector_route.py
+++ b/surfsense_backend/app/routes/google_drive_add_connector_route.py
@@ -402,7 +402,7 @@ async def list_google_drive_folders(
file_count = len(items) - folder_count
logger.info(
- f"✅ Listed {len(items)} total items ({folder_count} folders, {file_count} files) for connector {connector_id}"
+ f"Listed {len(items)} total items ({folder_count} folders, {file_count} files) for connector {connector_id}"
+ (f" in folder {parent_id}" if parent_id else " in ROOT")
)
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index b30337de3..2923ab823 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -1182,8 +1182,11 @@ export const useConnectorDialog = () => {
if (!editingConnector.is_indexable) {
// Non-indexable connectors (like Tavily API) don't need re-indexing
indexingDescription = "Settings saved.";
- } else if (editingConnector.connector_type === "GOOGLE_DRIVE_CONNECTOR") {
- // Google Drive uses folder selection from config, not date ranges
+ } else if (
+ editingConnector.connector_type === "GOOGLE_DRIVE_CONNECTOR" ||
+ editingConnector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"
+ ) {
+ // Google Drive (both regular and Composio) uses folder selection from config, not date ranges
const selectedFolders = (connectorConfig || editingConnector.config)?.selected_folders as
| Array<{ id: string; name: string }>
| undefined;
diff --git a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
index a3fd3ea14..4dee8888a 100644
--- a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
@@ -79,6 +79,9 @@ function getConnectorTypeDisplayName(connectorType: string): string {
GOOGLE_CALENDAR_CONNECTOR: "Google Calendar",
GOOGLE_GMAIL_CONNECTOR: "Gmail",
GOOGLE_DRIVE_CONNECTOR: "Google Drive",
+ COMPOSIO_GOOGLE_DRIVE_CONNECTOR: "Composio Google Drive",
+ COMPOSIO_GMAIL_CONNECTOR: "Composio Gmail",
+ COMPOSIO_GOOGLE_CALENDAR_CONNECTOR: "Composio Google Calendar",
LINEAR_CONNECTOR: "Linear",
NOTION_CONNECTOR: "Notion",
SLACK_CONNECTOR: "Slack",
From 9c5c925fcaf60e2ad7c8a76a6bd66e7e5a227904 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 14:50:06 +0530
Subject: [PATCH 09/51] feat: update Obsidian connector UI and icon
---
.../components/obsidian-connect-form.tsx | 4 +-
surfsense_web/lib/connectors/utils.ts | 1 +
surfsense_web/public/connectors/obsidian.svg | 56 +++++++++++++++----
3 files changed, 48 insertions(+), 13 deletions(-)
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx
index 064e10e2f..94839b03b 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx
@@ -1,7 +1,7 @@
"use client";
import { zodResolver } from "@hookform/resolvers/zod";
-import { FolderOpen, Info } from "lucide-react";
+import { Info } from "lucide-react";
import type { FC } from "react";
import { useRef, useState } from "react";
import { useForm } from "react-hook-form";
@@ -109,7 +109,7 @@ export const ObsidianConnectForm: FC = ({ onSubmit, isSubmitti
return (
-
+
Self-Hosted Only
diff --git a/surfsense_web/lib/connectors/utils.ts b/surfsense_web/lib/connectors/utils.ts
index a85b912ed..34721a6aa 100644
--- a/surfsense_web/lib/connectors/utils.ts
+++ b/surfsense_web/lib/connectors/utils.ts
@@ -21,6 +21,7 @@ export const getConnectorTypeDisplay = (type: string): string => {
ELASTICSEARCH_CONNECTOR: "Elasticsearch",
WEBCRAWLER_CONNECTOR: "Web Pages",
CIRCLEBACK_CONNECTOR: "Circleback",
+ OBSIDIAN_CONNECTOR: "Obsidian",
};
return typeMap[type] || type;
};
diff --git a/surfsense_web/public/connectors/obsidian.svg b/surfsense_web/public/connectors/obsidian.svg
index 9fe15c4a3..b5afd5724 100644
--- a/surfsense_web/public/connectors/obsidian.svg
+++ b/surfsense_web/public/connectors/obsidian.svg
@@ -1,12 +1,46 @@
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
From 29382070aaa2583a18a0b5dcb0c078d13ebb62f2 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 18:37:09 +0530
Subject: [PATCH 10/51] feat: enhance Composio connector functionality with
Google Drive delta sync support
- Added methods to retrieve the starting page token and list changes in Google Drive, enabling delta sync capabilities.
- Updated Composio service to handle file download directory configuration.
- Modified indexing tasks to support delta sync, improving efficiency by processing only changed files.
- Adjusted date handling in connector tasks to allow optional start and end dates.
- Improved error handling and logging throughout the Composio indexing process.
---
.../app/connectors/composio_connector.py | 49 +
.../routes/search_source_connectors_routes.py | 4 +-
.../app/services/composio_service.py | 333 +++++--
.../app/tasks/celery_tasks/connector_tasks.py | 8 +-
.../celery_tasks/schedule_checker_task.py | 5 +
.../app/tasks/composio_indexer.py | 909 +++++++++++-------
.../(manage)/components/RowActions.tsx | 2 +-
.../assistant-ui/connector-popup.tsx | 13 +-
.../views/connector-edit-view.tsx | 2 +-
.../views/indexing-configuration-view.tsx | 11 +-
.../hooks/use-connector-dialog.ts | 5 +-
.../tabs/active-connectors-tab.tsx | 7 +-
.../components/settings/llm-role-manager.tsx | 2 +-
.../settings/model-config-manager.tsx | 18 +-
.../settings/prompt-config-manager.tsx | 2 +-
surfsense_web/messages/en.json | 6 +-
16 files changed, 905 insertions(+), 471 deletions(-)
diff --git a/surfsense_backend/app/connectors/composio_connector.py b/surfsense_backend/app/connectors/composio_connector.py
index b49988887..8cb91355d 100644
--- a/surfsense_backend/app/connectors/composio_connector.py
+++ b/surfsense_backend/app/connectors/composio_connector.py
@@ -146,6 +146,55 @@ class ComposioConnector:
file_id=file_id,
)
+ async def get_drive_start_page_token(self) -> tuple[str | None, str | None]:
+ """
+ Get the starting page token for Google Drive change tracking.
+
+ Returns:
+ Tuple of (start_page_token, error message).
+ """
+ connected_account_id = await self.get_connected_account_id()
+ if not connected_account_id:
+ return None, "No connected account ID found"
+
+ entity_id = await self.get_entity_id()
+ service = await self._get_service()
+ return await service.get_drive_start_page_token(
+ connected_account_id=connected_account_id,
+ entity_id=entity_id,
+ )
+
+ async def list_drive_changes(
+ self,
+ page_token: str | None = None,
+ page_size: int = 100,
+ include_removed: bool = True,
+ ) -> tuple[list[dict[str, Any]], str | None, str | None]:
+ """
+ List changes in Google Drive since the given page token.
+
+ Args:
+ page_token: Page token from previous sync (optional).
+ page_size: Number of changes per page.
+ include_removed: Whether to include removed items.
+
+ Returns:
+ Tuple of (changes list, new_start_page_token, error message).
+ """
+ connected_account_id = await self.get_connected_account_id()
+ if not connected_account_id:
+ return [], None, "No connected account ID found"
+
+ entity_id = await self.get_entity_id()
+ service = await self._get_service()
+ return await service.list_drive_changes(
+ connected_account_id=connected_account_id,
+ entity_id=entity_id,
+ page_token=page_token,
+ page_size=page_size,
+ include_removed=include_removed,
+ )
+
# ===== Gmail Methods =====
async def list_gmail_messages(
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index db1b884e0..82f452c61 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -2288,8 +2288,8 @@ async def run_composio_indexing(
connector_id: int,
search_space_id: int,
user_id: str,
- start_date: str,
- end_date: str,
+ start_date: str | None,
+ end_date: str | None,
):
"""
Run Composio connector indexing with real-time notifications.
diff --git a/surfsense_backend/app/services/composio_service.py b/surfsense_backend/app/services/composio_service.py
index 0d6189cd9..3810f03a4 100644
--- a/surfsense_backend/app/services/composio_service.py
+++ b/surfsense_backend/app/services/composio_service.py
@@ -57,17 +57,30 @@ TOOLKIT_TO_DOCUMENT_TYPE = {
class ComposioService:
"""Service for interacting with Composio API."""
- def __init__(self, api_key: str | None = None):
+ # Default download directory for files from Composio
+ DEFAULT_DOWNLOAD_DIR = "/tmp/composio_downloads"
+
+ def __init__(self, api_key: str | None = None, file_download_dir: str | None = None):
"""
Initialize the Composio service.
Args:
api_key: Composio API key. If not provided, uses config.COMPOSIO_API_KEY.
+ file_download_dir: Directory for downloaded files. Defaults to /tmp/composio_downloads.
"""
+ import os
+
self.api_key = api_key or config.COMPOSIO_API_KEY
if not self.api_key:
raise ValueError("COMPOSIO_API_KEY is required but not configured")
- self.client = Composio(api_key=self.api_key)
+
+ # Set up download directory
+ self.file_download_dir = file_download_dir or self.DEFAULT_DOWNLOAD_DIR
+ os.makedirs(self.file_download_dir, exist_ok=True)
+
+ # Initialize Composio client with download directory
+ # Per docs: file_download_dir configures where files are downloaded
+ self.client = Composio(api_key=self.api_key, file_download_dir=self.file_download_dir)
@staticmethod
def is_enabled() -> bool:
@@ -465,6 +478,10 @@ class ComposioService:
"""
Download file content from Google Drive via Composio.
+ Per Composio docs: When tools return files, they are automatically downloaded
+ to a local directory, and the local file path is provided in the response.
+ Response includes: file_path, file_name, size fields.
+
Args:
connected_account_id: Composio connected account ID.
entity_id: The entity/user ID that owns the connected account.
@@ -473,11 +490,13 @@ class ComposioService:
Returns:
Tuple of (file content bytes, error message).
"""
+ from pathlib import Path
+
try:
result = await self.execute_tool(
connected_account_id=connected_account_id,
tool_name="GOOGLEDRIVE_DOWNLOAD_FILE",
- params={"file_id": file_id}, # snake_case
+ params={"file_id": file_id},
entity_id=entity_id,
)
@@ -485,100 +504,234 @@ class ComposioService:
return None, result.get("error", "Unknown error")
data = result.get("data")
-
- # Composio GOOGLEDRIVE_DOWNLOAD_FILE returns a dict with file info
- # The actual content is in "downloaded_file_content" field
- if isinstance(data, dict):
- # Try known Composio response fields in order of preference
- content = None
-
- # Primary field from GOOGLEDRIVE_DOWNLOAD_FILE
- if "downloaded_file_content" in data:
- content = data["downloaded_file_content"]
- # downloaded_file_content might itself be a dict with the actual content inside
- if isinstance(content, dict):
- # Try to extract actual content from nested dict
- # Note: Composio nests downloaded_file_content inside another downloaded_file_content
- actual_content = (
- content.get("downloaded_file_content")
- or content.get("content")
- or content.get("data")
- or content.get("file_content")
- or content.get("body")
- or content.get("text")
- )
- if actual_content is not None:
- content = actual_content
- else:
- # Log structure for debugging
- logger.warning(
- f"downloaded_file_content is dict with keys: {list(content.keys())}"
- )
- return (
- None,
- f"Cannot extract content from downloaded_file_content. Keys: {list(content.keys())}",
- )
- # Fallback fields for compatibility
- elif "content" in data:
- content = data["content"]
- elif "file_content" in data:
- content = data["file_content"]
- elif "data" in data:
- content = data["data"]
-
- if content is None:
- # Log available keys for debugging
- logger.warning(f"Composio response dict keys: {list(data.keys())}")
- return (
- None,
- f"No file content found in Composio response. Available keys: {list(data.keys())}",
- )
-
- # Convert content to bytes
- if isinstance(content, str):
- # Check if it's base64 encoded
- import base64
-
- try:
- # Try to decode as base64 first
- content = base64.b64decode(content)
- except Exception:
- # If not base64, encode as UTF-8
- content = content.encode("utf-8")
- elif isinstance(content, bytes):
- pass # Already bytes
- elif isinstance(content, dict):
- # Still a dict after all extraction attempts - log structure
- logger.warning(
- f"Content still dict after extraction: {list(content.keys())}"
- )
- return (
- None,
- f"Unexpected nested content structure: {list(content.keys())}",
- )
- else:
- return (
- None,
- f"Unexpected content type in Composio response: {type(content).__name__}",
- )
-
- return content, None
- elif isinstance(data, str):
- return data.encode("utf-8"), None
- elif isinstance(data, bytes):
- return data, None
- elif data is None:
+ if not data:
return None, "No data returned from Composio"
- else:
- return (
- None,
- f"Unexpected data type from Composio: {type(data).__name__}",
+
+ # Per Composio docs, response includes file_path where file was downloaded
+ # Response structure: {data: {...}, error: ..., successful: ...}
+ # The actual file info is nested inside data["data"]
+ file_path = None
+
+ if isinstance(data, dict):
+ # Handle nested response structure: data contains {data, error, successful}
+ # The actual file info is in data["data"]
+ inner_data = data
+ if "data" in data and isinstance(data["data"], dict):
+ inner_data = data["data"]
+ logger.debug(f"Found nested data structure. Inner keys: {list(inner_data.keys())}")
+ elif "successful" in data and "data" in data:
+ # Standard Composio response wrapper
+ inner_data = data["data"] if data["data"] else data
+
+ # Try documented fields: file_path, downloaded_file_content, path, uri
+ file_path = (
+ inner_data.get("file_path") or
+ inner_data.get("downloaded_file_content") or
+ inner_data.get("path") or
+ inner_data.get("uri")
)
+
+ # Handle nested dict case where downloaded_file_content contains the path
+ if isinstance(file_path, dict):
+ file_path = (
+ file_path.get("file_path") or
+ file_path.get("downloaded_file_content") or
+ file_path.get("path") or
+ file_path.get("uri")
+ )
+
+ # If still no path, check if inner_data itself has the nested structure
+ if not file_path and isinstance(inner_data, dict):
+ for key in ["downloaded_file_content", "file_path", "path", "uri"]:
+ if key in inner_data:
+ val = inner_data[key]
+ if isinstance(val, str):
+ file_path = val
+ break
+ elif isinstance(val, dict):
+ # One more level of nesting
+ file_path = (
+ val.get("file_path") or
+ val.get("downloaded_file_content") or
+ val.get("path") or
+ val.get("uri")
+ )
+ if file_path:
+ break
+
+ logger.debug(f"Composio response keys: {list(data.keys())}, inner keys: {list(inner_data.keys()) if isinstance(inner_data, dict) else 'N/A'}, extracted path: {file_path}")
+ elif isinstance(data, str):
+ # Direct string response (could be path or content)
+ file_path = data
+ elif isinstance(data, bytes):
+ # Direct bytes response
+ return data, None
+
+ # Read file from the path
+ if file_path and isinstance(file_path, str):
+ path_obj = Path(file_path)
+
+ # Check if it's a valid file path (absolute or in .composio directory)
+ if path_obj.is_absolute() or '.composio' in str(path_obj):
+ try:
+ if path_obj.exists():
+ content = path_obj.read_bytes()
+ logger.info(f"Successfully read {len(content)} bytes from Composio file: {file_path}")
+ return content, None
+ else:
+ logger.warning(f"File path from Composio does not exist: {file_path}")
+ return None, f"File not found at path: {file_path}"
+ except Exception as e:
+ logger.error(f"Failed to read file from Composio path {file_path}: {e!s}")
+ return None, f"Failed to read file: {e!s}"
+ else:
+ # Not a file path - might be base64 encoded content
+ try:
+ import base64
+ content = base64.b64decode(file_path)
+ return content, None
+ except Exception:
+ # Not base64, return as UTF-8 bytes
+ return file_path.encode("utf-8"), None
+
+ # If we got here, couldn't extract file path
+ if isinstance(data, dict):
+ # Log full structure for debugging
+ inner_data = data.get("data", {})
+ logger.warning(
+ f"Could not extract file path from Composio response. "
+ f"Top keys: {list(data.keys())}, "
+ f"Inner data keys: {list(inner_data.keys()) if isinstance(inner_data, dict) else type(inner_data).__name__}, "
+ f"Full inner data: {inner_data}"
+ )
+ return None, f"No file path in Composio response. Keys: {list(data.keys())}, inner: {list(inner_data.keys()) if isinstance(inner_data, dict) else 'N/A'}"
+
+ return None, f"Unexpected data type from Composio: {type(data).__name__}"
except Exception as e:
logger.error(f"Failed to get Drive file content: {e!s}")
return None, str(e)
+ async def get_drive_start_page_token(
+ self, connected_account_id: str, entity_id: str
+ ) -> tuple[str | None, str | None]:
+ """
+ Get the starting page token for Google Drive change tracking.
+
+ This token represents the current state and is used for future delta syncs.
+ Per Composio docs: Use GOOGLEDRIVE_GET_CHANGES_START_PAGE_TOKEN to get initial token.
+
+ Args:
+ connected_account_id: Composio connected account ID.
+ entity_id: The entity/user ID that owns the connected account.
+
+ Returns:
+ Tuple of (start_page_token, error message).
+ """
+ try:
+ result = await self.execute_tool(
+ connected_account_id=connected_account_id,
+ tool_name="GOOGLEDRIVE_GET_CHANGES_START_PAGE_TOKEN",
+ params={},
+ entity_id=entity_id,
+ )
+
+ if not result.get("success"):
+ return None, result.get("error", "Unknown error")
+
+ data = result.get("data", {})
+ # Handle nested response: {data: {startPageToken: ...}, successful: ...}
+ if isinstance(data, dict):
+ inner_data = data.get("data", data)
+ token = (
+ inner_data.get("startPageToken") or
+ inner_data.get("start_page_token") or
+ data.get("startPageToken") or
+ data.get("start_page_token")
+ )
+ if token:
+ logger.info(f"Got Drive start page token: {token}")
+ return token, None
+
+ logger.warning(f"Could not extract start page token from response: {data}")
+ return None, "No start page token in response"
+
+ except Exception as e:
+ logger.error(f"Failed to get Drive start page token: {e!s}")
+ return None, str(e)
+
+ async def list_drive_changes(
+ self,
+ connected_account_id: str,
+ entity_id: str,
+ page_token: str | None = None,
+ page_size: int = 100,
+ include_removed: bool = True,
+ ) -> tuple[list[dict[str, Any]], str | None, str | None]:
+ """
+ List changes in Google Drive since the given page token.
+
+ Per Composio docs: GOOGLEDRIVE_LIST_CHANGES tracks modifications to files/folders.
+ If pageToken is not provided, it auto-fetches the current start page token.
+ Response includes nextPageToken for pagination and newStartPageToken for future syncs.
+
+ Args:
+ connected_account_id: Composio connected account ID.
+ entity_id: The entity/user ID that owns the connected account.
+ page_token: Page token from previous sync (optional - will auto-fetch if not provided).
+ page_size: Number of changes per page.
+ include_removed: Whether to include removed items in the response.
+
+ Returns:
+ Tuple of (changes list, new_start_page_token, error message).
+ """
+ try:
+ params = {
+ "pageSize": min(page_size, 100),
+ "includeRemoved": include_removed,
+ }
+ if page_token:
+ params["pageToken"] = page_token
+
+ result = await self.execute_tool(
+ connected_account_id=connected_account_id,
+ tool_name="GOOGLEDRIVE_LIST_CHANGES",
+ params=params,
+ entity_id=entity_id,
+ )
+
+ if not result.get("success"):
+ return [], None, result.get("error", "Unknown error")
+
+ data = result.get("data", {})
+
+ # Handle nested response structure
+ changes = []
+ new_start_token = None
+
+ if isinstance(data, dict):
+ inner_data = data.get("data", data)
+ changes = inner_data.get("changes", []) or data.get("changes", [])
+
+ # Get the token for next sync
+ # newStartPageToken is returned when all changes have been fetched
+ # nextPageToken is for pagination within the current fetch
+ new_start_token = (
+ inner_data.get("newStartPageToken") or
+ inner_data.get("new_start_page_token") or
+ inner_data.get("nextPageToken") or
+ inner_data.get("next_page_token") or
+ data.get("newStartPageToken") or
+ data.get("nextPageToken")
+ )
+
+ logger.info(f"Got {len(changes)} Drive changes, new token: {new_start_token[:20] if new_start_token else 'None'}...")
+ return changes, new_start_token, None
+
+ except Exception as e:
+ logger.error(f"Failed to list Drive changes: {e!s}")
+ return [], None, str(e)
+
# ===== Gmail specific methods =====
async def get_gmail_messages(
diff --git a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
index 81cafaa2c..d0710d246 100644
--- a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
@@ -810,8 +810,8 @@ def index_composio_connector_task(
connector_id: int,
search_space_id: int,
user_id: str,
- start_date: str,
- end_date: str,
+ start_date: str | None,
+ end_date: str | None,
):
"""Celery task to index Composio connector content (Google Drive, Gmail, Calendar via Composio)."""
import asyncio
@@ -833,8 +833,8 @@ async def _index_composio_connector(
connector_id: int,
search_space_id: int,
user_id: str,
- start_date: str,
- end_date: str,
+ start_date: str | None,
+ end_date: str | None,
):
"""Index Composio connector content with new session and real-time notifications."""
# Import from routes to use the notification-wrapped version
diff --git a/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py b/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py
index 21855f73f..bf80cbe78 100644
--- a/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py
+++ b/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py
@@ -66,6 +66,7 @@ async def _check_and_trigger_schedules():
from app.tasks.celery_tasks.connector_tasks import (
index_airtable_records_task,
index_clickup_tasks_task,
+ index_composio_connector_task,
index_confluence_pages_task,
index_crawled_urls_task,
index_discord_messages_task,
@@ -98,6 +99,10 @@ async def _check_and_trigger_schedules():
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR: index_google_drive_files_task,
+ # Composio connector types
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR: index_composio_connector_task,
+ SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR: index_composio_connector_task,
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR: index_composio_connector_task,
}
# Trigger indexing for each due connector
diff --git a/surfsense_backend/app/tasks/composio_indexer.py b/surfsense_backend/app/tasks/composio_indexer.py
index e5c8b701e..3eed8470e 100644
--- a/surfsense_backend/app/tasks/composio_indexer.py
+++ b/surfsense_backend/app/tasks/composio_indexer.py
@@ -561,8 +561,12 @@ async def _index_composio_google_drive(
update_last_indexed: bool = True,
max_items: int = 1000,
) -> tuple[int, str]:
- """Index Google Drive files via Composio.
+ """Index Google Drive files via Composio with delta sync support.
+ Delta Sync Flow:
+ 1. First sync: Full scan + get initial page token
+ 2. Subsequent syncs: Use LIST_CHANGES to process only changed files
+
Supports folder/file selection via connector config:
- selected_folders: List of {id, name} for folders to index
- selected_files: List of {id, name} for individual files to index
@@ -576,354 +580,88 @@ async def _index_composio_google_drive(
selected_folders = connector_config.get("selected_folders", [])
selected_files = connector_config.get("selected_files", [])
indexing_options = connector_config.get("indexing_options", {})
+
+ # Check for stored page token for delta sync
+ stored_page_token = connector_config.get("drive_page_token")
+ use_delta_sync = stored_page_token and connector.last_indexed_at
max_files_per_folder = indexing_options.get("max_files_per_folder", 100)
include_subfolders = indexing_options.get("include_subfolders", True)
- await task_logger.log_task_progress(
- log_entry,
- f"Fetching Google Drive files via Composio for connector {connector_id}",
- {
- "stage": "fetching_files",
- "selected_folders": len(selected_folders),
- "selected_files": len(selected_files),
- },
- )
-
- all_files = []
-
- # If specific folders/files are selected, fetch from those
- if selected_folders or selected_files:
- # Fetch files from selected folders
- for folder in selected_folders:
- folder_id = folder.get("id")
- folder_name = folder.get("name", "Unknown")
-
- if not folder_id:
- continue
-
- # Handle special case for "root" folder
- actual_folder_id = None if folder_id == "root" else folder_id
-
- logger.info(f"Fetching files from folder: {folder_name} ({folder_id})")
-
- # Fetch files from this folder
- folder_files = []
- page_token = None
-
- while len(folder_files) < max_files_per_folder:
- (
- files,
- next_token,
- error,
- ) = await composio_connector.list_drive_files(
- folder_id=actual_folder_id,
- page_token=page_token,
- page_size=min(100, max_files_per_folder - len(folder_files)),
- )
-
- if error:
- logger.warning(
- f"Failed to fetch files from folder {folder_name}: {error}"
- )
- break
-
- # Process files
- for file_info in files:
- mime_type = file_info.get("mimeType", "") or file_info.get(
- "mime_type", ""
- )
-
- # If it's a folder and include_subfolders is enabled, recursively fetch
- if mime_type == "application/vnd.google-apps.folder":
- if include_subfolders:
- # Add subfolder files recursively
- subfolder_files = await _fetch_folder_files_recursively(
- composio_connector,
- file_info.get("id"),
- max_files=max_files_per_folder,
- current_count=len(folder_files),
- )
- folder_files.extend(subfolder_files)
- else:
- folder_files.append(file_info)
-
- if not next_token:
- break
- page_token = next_token
-
- all_files.extend(folder_files[:max_files_per_folder])
- logger.info(f"Found {len(folder_files)} files in folder {folder_name}")
-
- # Add specifically selected files
- for selected_file in selected_files:
- file_id = selected_file.get("id")
- file_name = selected_file.get("name", "Unknown")
-
- if not file_id:
- continue
-
- # Add file info (we'll fetch content later during indexing)
- all_files.append(
- {
- "id": file_id,
- "name": file_name,
- "mimeType": "", # Will be determined later
- }
- )
- else:
- # No selection specified - fetch all files (original behavior)
- page_token = None
-
- while len(all_files) < max_items:
- files, next_token, error = await composio_connector.list_drive_files(
- page_token=page_token,
- page_size=min(100, max_items - len(all_files)),
- )
-
- if error:
- await task_logger.log_task_failure(
- log_entry, f"Failed to fetch Drive files: {error}", {}
- )
- return 0, f"Failed to fetch Drive files: {error}"
-
- all_files.extend(files)
-
- if not next_token:
- break
- page_token = next_token
-
- if not all_files:
- success_msg = "No Google Drive files found"
- await task_logger.log_task_success(
- log_entry, success_msg, {"files_count": 0}
+ # Route to delta sync or full scan
+ if use_delta_sync:
+ logger.info(f"Using delta sync for Composio Google Drive connector {connector_id}")
+ await task_logger.log_task_progress(
+ log_entry,
+ f"Starting delta sync for Google Drive via Composio (connector {connector_id})",
+ {"stage": "delta_sync", "token": stored_page_token[:20] + "..."},
+ )
+
+ documents_indexed, documents_skipped, processing_errors = await _index_composio_drive_delta_sync(
+ session=session,
+ composio_connector=composio_connector,
+ connector_id=connector_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ page_token=stored_page_token,
+ max_items=max_items,
+ task_logger=task_logger,
+ log_entry=log_entry,
+ )
+ else:
+ logger.info(f"Using full scan for Composio Google Drive connector {connector_id} (first sync or no token)")
+ await task_logger.log_task_progress(
+ log_entry,
+ f"Fetching Google Drive files via Composio for connector {connector_id}",
+ {
+ "stage": "full_scan",
+ "selected_folders": len(selected_folders),
+ "selected_files": len(selected_files),
+ },
+ )
+
+ documents_indexed, documents_skipped, processing_errors = await _index_composio_drive_full_scan(
+ session=session,
+ composio_connector=composio_connector,
+ connector_id=connector_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ selected_folders=selected_folders,
+ selected_files=selected_files,
+ max_files_per_folder=max_files_per_folder,
+ include_subfolders=include_subfolders,
+ max_items=max_items,
+ task_logger=task_logger,
+ log_entry=log_entry,
)
- # CRITICAL: Update timestamp even when no files found so Electric SQL syncs and UI shows indexed status
- await update_connector_last_indexed(session, connector, update_last_indexed)
- await session.commit()
- return (
- 0,
- None,
- ) # Return None (not error) when no items found - this is success with 0 items
- logger.info(f"Found {len(all_files)} Google Drive files to index via Composio")
+ # Get new page token for next sync (always update after successful sync)
+ new_token, token_error = await composio_connector.get_drive_start_page_token()
+ if new_token and not token_error:
+ from sqlalchemy.orm.attributes import flag_modified
+
+ # Refresh connector to avoid stale state
+ await session.refresh(connector)
+
+ if not connector.config:
+ connector.config = {}
+ connector.config["drive_page_token"] = new_token
+ flag_modified(connector, "config")
+ logger.info(f"Updated drive_page_token for connector {connector_id}")
+ elif token_error:
+ logger.warning(f"Failed to get new page token: {token_error}")
- documents_indexed = 0
- documents_skipped = 0
- processing_errors = []
-
- for file_info in all_files:
- try:
- # Handle both standard Google API and potential Composio variations
- file_id = file_info.get("id", "") or file_info.get("fileId", "")
- file_name = (
- file_info.get("name", "")
- or file_info.get("fileName", "")
- or "Untitled"
- )
- mime_type = file_info.get("mimeType", "") or file_info.get(
- "mime_type", ""
- )
-
- if not file_id:
- documents_skipped += 1
- continue
-
- # Skip folders
- if mime_type == "application/vnd.google-apps.folder":
- continue
-
- # Generate unique identifier hash
- document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
- unique_identifier_hash = generate_unique_identifier_hash(
- document_type, f"drive_{file_id}", search_space_id
- )
-
- # Check if document exists
- existing_document = await check_document_by_unique_identifier(
- session, unique_identifier_hash
- )
-
- # Get file content
- (
- content,
- content_error,
- ) = await composio_connector.get_drive_file_content(file_id)
-
- if content_error or not content:
- logger.warning(
- f"Could not get content for file {file_name}: {content_error}"
- )
- # Use metadata as content fallback
- markdown_content = f"# {file_name}\n\n"
- markdown_content += f"**File ID:** {file_id}\n"
- markdown_content += f"**Type:** {mime_type}\n"
- elif isinstance(content, dict):
- # Safety check: if content is still a dict, log error and use fallback
- error_msg = f"Unexpected dict content format for file {file_name}: {list(content.keys())}"
- logger.error(error_msg)
- processing_errors.append(error_msg)
- markdown_content = f"# {file_name}\n\n"
- markdown_content += f"**File ID:** {file_id}\n"
- markdown_content += f"**Type:** {mime_type}\n"
- else:
- # Process content based on file type
- markdown_content = await _process_file_content(
- content=content,
- file_name=file_name,
- file_id=file_id,
- mime_type=mime_type,
- search_space_id=search_space_id,
- user_id=user_id,
- session=session,
- task_logger=task_logger,
- log_entry=log_entry,
- processing_errors=processing_errors,
- )
-
- content_hash = generate_content_hash(markdown_content, search_space_id)
-
- if existing_document:
- if existing_document.content_hash == content_hash:
- documents_skipped += 1
- continue
-
- # Update existing document
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
-
- if user_llm:
- document_metadata = {
- "file_id": file_id,
- "file_name": file_name,
- "mime_type": mime_type,
- "document_type": "Google Drive File (Composio)",
- }
- (
- summary_content,
- summary_embedding,
- ) = await generate_document_summary(
- markdown_content, user_llm, document_metadata
- )
- else:
- summary_content = (
- f"Google Drive File: {file_name}\n\nType: {mime_type}"
- )
- summary_embedding = config.embedding_model_instance.embed(
- summary_content
- )
-
- chunks = await create_document_chunks(markdown_content)
-
- existing_document.title = f"Drive: {file_name}"
- existing_document.content = summary_content
- existing_document.content_hash = content_hash
- existing_document.embedding = summary_embedding
- existing_document.document_metadata = {
- "file_id": file_id,
- "file_name": file_name,
- "mime_type": mime_type,
- "connector_id": connector_id,
- "source": "composio",
- }
- existing_document.chunks = chunks
- existing_document.updated_at = get_current_timestamp()
-
- documents_indexed += 1
-
- # Batch commit every 10 documents
- if documents_indexed % 10 == 0:
- logger.info(
- f"Committing batch: {documents_indexed} Google Drive files processed so far"
- )
- await session.commit()
- continue
-
- # Create new document
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
-
- if user_llm:
- document_metadata = {
- "file_id": file_id,
- "file_name": file_name,
- "mime_type": mime_type,
- "document_type": "Google Drive File (Composio)",
- }
- (
- summary_content,
- summary_embedding,
- ) = await generate_document_summary(
- markdown_content, user_llm, document_metadata
- )
- else:
- summary_content = (
- f"Google Drive File: {file_name}\n\nType: {mime_type}"
- )
- summary_embedding = config.embedding_model_instance.embed(
- summary_content
- )
-
- chunks = await create_document_chunks(markdown_content)
-
- document = Document(
- search_space_id=search_space_id,
- title=f"Drive: {file_name}",
- document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
- document_metadata={
- "file_id": file_id,
- "file_name": file_name,
- "mime_type": mime_type,
- "connector_id": connector_id,
- "toolkit_id": "googledrive",
- "source": "composio",
- },
- content=summary_content,
- content_hash=content_hash,
- unique_identifier_hash=unique_identifier_hash,
- embedding=summary_embedding,
- chunks=chunks,
- updated_at=get_current_timestamp(),
- )
- session.add(document)
- documents_indexed += 1
-
- # Batch commit every 10 documents
- if documents_indexed % 10 == 0:
- logger.info(
- f"Committing batch: {documents_indexed} Google Drive files processed so far"
- )
- await session.commit()
-
- except Exception as e:
- error_msg = (
- f"Error processing Drive file {file_name or 'unknown'}: {e!s}"
- )
- logger.error(error_msg, exc_info=True)
- processing_errors.append(error_msg)
- documents_skipped += 1
- continue
-
- # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
- # This ensures the UI shows "Last indexed" instead of "Never indexed"
+ # CRITICAL: Always update timestamp so Electric SQL syncs and UI shows indexed status
await update_connector_last_indexed(session, connector, update_last_indexed)
- # Final commit to ensure all documents are persisted (safety net)
- # This matches the pattern used in non-Composio Gmail indexer
- logger.info(
- f"Final commit: Total {documents_indexed} Google Drive files processed"
- )
+ # Final commit
+ logger.info(f"Final commit: Total {documents_indexed} Google Drive files processed")
await session.commit()
- logger.info(
- "Successfully committed all Composio Google Drive document changes to database"
- )
+ logger.info("Successfully committed all Composio Google Drive document changes to database")
- # If there were processing errors, return them so notification can show them
+ # Handle processing errors
error_message = None
if processing_errors:
- # Combine all errors into a single message
if len(processing_errors) == 1:
error_message = processing_errors[0]
else:
@@ -934,6 +672,7 @@ async def _index_composio_google_drive(
{
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
+ "sync_type": "delta" if use_delta_sync else "full",
"errors": processing_errors,
},
)
@@ -944,6 +683,7 @@ async def _index_composio_google_drive(
{
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
+ "sync_type": "delta" if use_delta_sync else "full",
},
)
@@ -954,6 +694,469 @@ async def _index_composio_google_drive(
return 0, f"Failed to index Google Drive via Composio: {e!s}"
+async def _index_composio_drive_delta_sync(
+ session: AsyncSession,
+ composio_connector: ComposioConnector,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ page_token: str,
+ max_items: int,
+ task_logger: TaskLoggingService,
+ log_entry,
+) -> tuple[int, int, list[str]]:
+ """Index Google Drive files using delta sync (only changed files).
+
+ Uses GOOGLEDRIVE_LIST_CHANGES to fetch only files that changed since last sync.
+ Handles: new files, modified files, and deleted files.
+ """
+ documents_indexed = 0
+ documents_skipped = 0
+ processing_errors = []
+
+ # Fetch all changes with pagination
+ all_changes = []
+ current_token = page_token
+
+ while len(all_changes) < max_items:
+ changes, next_token, error = await composio_connector.list_drive_changes(
+ page_token=current_token,
+ page_size=100,
+ include_removed=True,
+ )
+
+ if error:
+ logger.error(f"Error fetching Drive changes: {error}")
+ processing_errors.append(f"Failed to fetch changes: {error}")
+ break
+
+ all_changes.extend(changes)
+
+ if not next_token or next_token == current_token:
+ break
+ current_token = next_token
+
+ if not all_changes:
+ logger.info("No changes detected since last sync")
+ return 0, 0, []
+
+ logger.info(f"Processing {len(all_changes)} changes from delta sync")
+
+ for change in all_changes[:max_items]:
+ try:
+ # Handle removed files
+ is_removed = change.get("removed", False)
+ file_info = change.get("file", {})
+ file_id = change.get("fileId") or file_info.get("id", "")
+
+ if not file_id:
+ documents_skipped += 1
+ continue
+
+ # Check if file was trashed or removed
+ if is_removed or file_info.get("trashed", False):
+ # Remove document from database
+ document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
+ unique_identifier_hash = generate_unique_identifier_hash(
+ document_type, f"drive_{file_id}", search_space_id
+ )
+ existing_document = await check_document_by_unique_identifier(
+ session, unique_identifier_hash
+ )
+ if existing_document:
+ await session.delete(existing_document)
+ documents_indexed += 1
+ logger.info(f"Deleted document for removed/trashed file: {file_id}")
+ continue
+
+ # Process changed file
+ file_name = file_info.get("name", "") or "Untitled"
+ mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
+
+ # Skip folders
+ if mime_type == "application/vnd.google-apps.folder":
+ continue
+
+ # Process the file
+ indexed, skipped, errors = await _process_single_drive_file(
+ session=session,
+ composio_connector=composio_connector,
+ file_id=file_id,
+ file_name=file_name,
+ mime_type=mime_type,
+ connector_id=connector_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ task_logger=task_logger,
+ log_entry=log_entry,
+ )
+
+ documents_indexed += indexed
+ documents_skipped += skipped
+ processing_errors.extend(errors)
+
+ # Batch commit every 10 documents
+ if documents_indexed > 0 and documents_indexed % 10 == 0:
+ await session.commit()
+ logger.info(f"Committed batch: {documents_indexed} changes processed")
+
+ except Exception as e:
+ error_msg = f"Error processing change for file {file_id}: {e!s}"
+ logger.error(error_msg, exc_info=True)
+ processing_errors.append(error_msg)
+ documents_skipped += 1
+
+ logger.info(f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped")
+ return documents_indexed, documents_skipped, processing_errors
+
+
+async def _index_composio_drive_full_scan(
+ session: AsyncSession,
+ composio_connector: ComposioConnector,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ selected_folders: list[dict],
+ selected_files: list[dict],
+ max_files_per_folder: int,
+ include_subfolders: bool,
+ max_items: int,
+ task_logger: TaskLoggingService,
+ log_entry,
+) -> tuple[int, int, list[str]]:
+ """Index Google Drive files using full scan (first sync or when no delta token)."""
+ documents_indexed = 0
+ documents_skipped = 0
+ processing_errors = []
+
+ all_files = []
+
+ # If specific folders/files are selected, fetch from those
+ if selected_folders or selected_files:
+ # Fetch files from selected folders
+ for folder in selected_folders:
+ folder_id = folder.get("id")
+ folder_name = folder.get("name", "Unknown")
+
+ if not folder_id:
+ continue
+
+ # Handle special case for "root" folder
+ actual_folder_id = None if folder_id == "root" else folder_id
+
+ logger.info(f"Fetching files from folder: {folder_name} ({folder_id})")
+
+ # Fetch files from this folder
+ folder_files = []
+ page_token = None
+
+ while len(folder_files) < max_files_per_folder:
+ (
+ files,
+ next_token,
+ error,
+ ) = await composio_connector.list_drive_files(
+ folder_id=actual_folder_id,
+ page_token=page_token,
+ page_size=min(100, max_files_per_folder - len(folder_files)),
+ )
+
+ if error:
+ logger.warning(
+ f"Failed to fetch files from folder {folder_name}: {error}"
+ )
+ break
+
+ # Process files
+ for file_info in files:
+ mime_type = file_info.get("mimeType", "") or file_info.get(
+ "mime_type", ""
+ )
+
+ # If it's a folder and include_subfolders is enabled, recursively fetch
+ if mime_type == "application/vnd.google-apps.folder":
+ if include_subfolders:
+ # Add subfolder files recursively
+ subfolder_files = await _fetch_folder_files_recursively(
+ composio_connector,
+ file_info.get("id"),
+ max_files=max_files_per_folder,
+ current_count=len(folder_files),
+ )
+ folder_files.extend(subfolder_files)
+ else:
+ folder_files.append(file_info)
+
+ if not next_token:
+ break
+ page_token = next_token
+
+ all_files.extend(folder_files[:max_files_per_folder])
+ logger.info(f"Found {len(folder_files)} files in folder {folder_name}")
+
+ # Add specifically selected files
+ for selected_file in selected_files:
+ file_id = selected_file.get("id")
+ file_name = selected_file.get("name", "Unknown")
+
+ if not file_id:
+ continue
+
+ # Add file info (we'll fetch content later during indexing)
+ all_files.append(
+ {
+ "id": file_id,
+ "name": file_name,
+ "mimeType": "", # Will be determined later
+ }
+ )
+ else:
+ # No selection specified - fetch all files (original behavior)
+ page_token = None
+
+ while len(all_files) < max_items:
+ files, next_token, error = await composio_connector.list_drive_files(
+ page_token=page_token,
+ page_size=min(100, max_items - len(all_files)),
+ )
+
+ if error:
+ return 0, 0, [f"Failed to fetch Drive files: {error}"]
+
+ all_files.extend(files)
+
+ if not next_token:
+ break
+ page_token = next_token
+
+ if not all_files:
+ logger.info("No Google Drive files found")
+ return 0, 0, []
+
+ logger.info(f"Found {len(all_files)} Google Drive files to index via Composio (full scan)")
+
+ for file_info in all_files:
+ try:
+ # Handle both standard Google API and potential Composio variations
+ file_id = file_info.get("id", "") or file_info.get("fileId", "")
+ file_name = (
+ file_info.get("name", "")
+ or file_info.get("fileName", "")
+ or "Untitled"
+ )
+ mime_type = file_info.get("mimeType", "") or file_info.get(
+ "mime_type", ""
+ )
+
+ if not file_id:
+ documents_skipped += 1
+ continue
+
+ # Skip folders
+ if mime_type == "application/vnd.google-apps.folder":
+ continue
+
+ # Process the file
+ indexed, skipped, errors = await _process_single_drive_file(
+ session=session,
+ composio_connector=composio_connector,
+ file_id=file_id,
+ file_name=file_name,
+ mime_type=mime_type,
+ connector_id=connector_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ task_logger=task_logger,
+ log_entry=log_entry,
+ )
+
+ documents_indexed += indexed
+ documents_skipped += skipped
+ processing_errors.extend(errors)
+
+ # Batch commit every 10 documents
+ if documents_indexed > 0 and documents_indexed % 10 == 0:
+ logger.info(f"Committing batch: {documents_indexed} Google Drive files processed so far")
+ await session.commit()
+
+ except Exception as e:
+ error_msg = f"Error processing Drive file {file_name or 'unknown'}: {e!s}"
+ logger.error(error_msg, exc_info=True)
+ processing_errors.append(error_msg)
+ documents_skipped += 1
+
+ logger.info(f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped")
+ return documents_indexed, documents_skipped, processing_errors
+
+
+async def _process_single_drive_file(
+ session: AsyncSession,
+ composio_connector: ComposioConnector,
+ file_id: str,
+ file_name: str,
+ mime_type: str,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ task_logger: TaskLoggingService,
+ log_entry,
+) -> tuple[int, int, list[str]]:
+ """Process a single Google Drive file for indexing.
+
+ Returns:
+ Tuple of (documents_indexed, documents_skipped, processing_errors)
+ """
+ processing_errors = []
+
+ # Generate unique identifier hash
+ document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
+ unique_identifier_hash = generate_unique_identifier_hash(
+ document_type, f"drive_{file_id}", search_space_id
+ )
+
+ # Check if document exists
+ existing_document = await check_document_by_unique_identifier(
+ session, unique_identifier_hash
+ )
+
+ # Get file content
+ content, content_error = await composio_connector.get_drive_file_content(file_id)
+
+ if content_error or not content:
+ logger.warning(
+ f"Could not get content for file {file_name}: {content_error}"
+ )
+ # Use metadata as content fallback
+ markdown_content = f"# {file_name}\n\n"
+ markdown_content += f"**File ID:** {file_id}\n"
+ markdown_content += f"**Type:** {mime_type}\n"
+ elif isinstance(content, dict):
+ # Safety check: if content is still a dict, log error and use fallback
+ error_msg = f"Unexpected dict content format for file {file_name}: {list(content.keys())}"
+ logger.error(error_msg)
+ processing_errors.append(error_msg)
+ markdown_content = f"# {file_name}\n\n"
+ markdown_content += f"**File ID:** {file_id}\n"
+ markdown_content += f"**Type:** {mime_type}\n"
+ else:
+ # Process content based on file type
+ markdown_content = await _process_file_content(
+ content=content,
+ file_name=file_name,
+ file_id=file_id,
+ mime_type=mime_type,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ session=session,
+ task_logger=task_logger,
+ log_entry=log_entry,
+ processing_errors=processing_errors,
+ )
+
+ content_hash = generate_content_hash(markdown_content, search_space_id)
+
+ if existing_document:
+ if existing_document.content_hash == content_hash:
+ return 0, 1, processing_errors # Skipped
+
+ # Update existing document
+ user_llm = await get_user_long_context_llm(
+ session, user_id, search_space_id
+ )
+
+ if user_llm:
+ document_metadata = {
+ "file_id": file_id,
+ "file_name": file_name,
+ "mime_type": mime_type,
+ "document_type": "Google Drive File (Composio)",
+ }
+ (
+ summary_content,
+ summary_embedding,
+ ) = await generate_document_summary(
+ markdown_content, user_llm, document_metadata
+ )
+ else:
+ summary_content = (
+ f"Google Drive File: {file_name}\n\nType: {mime_type}"
+ )
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
+
+ chunks = await create_document_chunks(markdown_content)
+
+ existing_document.title = f"Drive: {file_name}"
+ existing_document.content = summary_content
+ existing_document.content_hash = content_hash
+ existing_document.embedding = summary_embedding
+ existing_document.document_metadata = {
+ "file_id": file_id,
+ "file_name": file_name,
+ "FILE_NAME": file_name, # For compatibility
+ "mime_type": mime_type,
+ "connector_id": connector_id,
+ "source": "composio",
+ }
+ existing_document.chunks = chunks
+ existing_document.updated_at = get_current_timestamp()
+
+ return 1, 0, processing_errors # Indexed
+
+ # Create new document
+ user_llm = await get_user_long_context_llm(
+ session, user_id, search_space_id
+ )
+
+ if user_llm:
+ document_metadata = {
+ "file_id": file_id,
+ "file_name": file_name,
+ "mime_type": mime_type,
+ "document_type": "Google Drive File (Composio)",
+ }
+ (
+ summary_content,
+ summary_embedding,
+ ) = await generate_document_summary(
+ markdown_content, user_llm, document_metadata
+ )
+ else:
+ summary_content = (
+ f"Google Drive File: {file_name}\n\nType: {mime_type}"
+ )
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
+
+ chunks = await create_document_chunks(markdown_content)
+
+ document = Document(
+ search_space_id=search_space_id,
+ title=f"Drive: {file_name}",
+ document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
+ document_metadata={
+ "file_id": file_id,
+ "file_name": file_name,
+ "FILE_NAME": file_name, # For compatibility
+ "mime_type": mime_type,
+ "connector_id": connector_id,
+ "toolkit_id": "googledrive",
+ "source": "composio",
+ },
+ content=summary_content,
+ content_hash=content_hash,
+ unique_identifier_hash=unique_identifier_hash,
+ embedding=summary_embedding,
+ chunks=chunks,
+ updated_at=get_current_timestamp(),
+ )
+ session.add(document)
+
+ return 1, 0, processing_errors # Indexed
+
+
async def _fetch_folder_files_recursively(
composio_connector: ComposioConnector,
folder_id: str,
@@ -1271,11 +1474,18 @@ async def _index_composio_gmail(
if end_date == "undefined" or end_date == "":
end_date = None
- # Calculate date range with defaults (uses last_indexed_at or 365 days back)
- # This ensures indexing works even when user doesn't specify dates
- start_date_str, end_date_str = calculate_date_range(
- connector, start_date, end_date, default_days_back=365
- )
+ # Use provided dates directly if both are provided, otherwise calculate from last_indexed_at
+ # This ensures user-selected dates are respected (matching non-Composio Gmail connector behavior)
+ if start_date is not None and end_date is not None:
+ # User provided both dates - use them directly
+ start_date_str = start_date
+ end_date_str = end_date
+ else:
+ # Calculate date range with defaults (uses last_indexed_at or 365 days back)
+ # This ensures indexing works even when user doesn't specify dates
+ start_date_str, end_date_str = calculate_date_range(
+ connector, start_date, end_date, default_days_back=365
+ )
# Build query with date range
query_parts = []
@@ -1468,11 +1678,18 @@ async def _index_composio_google_calendar(
if end_date == "undefined" or end_date == "":
end_date = None
- # Calculate date range with defaults (uses last_indexed_at or 365 days back)
- # This ensures indexing works even when user doesn't specify dates
- start_date_str, end_date_str = calculate_date_range(
- connector, start_date, end_date, default_days_back=365
- )
+ # Use provided dates directly if both are provided, otherwise calculate from last_indexed_at
+ # This ensures user-selected dates are respected (matching non-Composio Calendar connector behavior)
+ if start_date is not None and end_date is not None:
+ # User provided both dates - use them directly
+ start_date_str = start_date
+ end_date_str = end_date
+ else:
+ # Calculate date range with defaults (uses last_indexed_at or 365 days back)
+ # This ensures indexing works even when user doesn't specify dates
+ start_date_str, end_date_str = calculate_date_range(
+ connector, start_date, end_date, default_days_back=365
+ )
# Build time range for API call
time_min = f"{start_date_str}T00:00:00Z"
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
index d277a84ee..d9a894e5a 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
@@ -209,7 +209,7 @@ export function RowActions({
disabled={isDeleting}
className="bg-destructive text-destructive-foreground hover:bg-destructive/90"
>
- {isDeleting ? "Deleting..." : "Delete"}
+ {isDeleting ? "Deleting" : "Delete"}
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index a1108f7c8..045c3c586 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -16,7 +16,7 @@ import { ConnectorDialogHeader } from "./connector-popup/components/connector-di
import { ConnectorConnectView } from "./connector-popup/connector-configs/views/connector-connect-view";
import { ConnectorEditView } from "./connector-popup/connector-configs/views/connector-edit-view";
import { IndexingConfigurationView } from "./connector-popup/connector-configs/views/indexing-configuration-view";
-import { OAUTH_CONNECTORS } from "./connector-popup/constants/connector-constants";
+import { COMPOSIO_CONNECTORS, OAUTH_CONNECTORS } from "./connector-popup/constants/connector-constants";
import { useConnectorDialog } from "./connector-popup/hooks/use-connector-dialog";
import { useIndexingConnectors } from "./connector-popup/hooks/use-indexing-connectors";
import { ActiveConnectorsTab } from "./connector-popup/tabs/active-connectors-tab";
@@ -196,9 +196,14 @@ export const ConnectorIndicator: FC = () => {
onBack={handleBackFromAccountsList}
onManage={handleStartEdit}
onAddAccount={() => {
- const oauthConnector = OAUTH_CONNECTORS.find(
- (c) => c.connectorType === viewingAccountsType.connectorType
- );
+ // Check both OAUTH_CONNECTORS and COMPOSIO_CONNECTORS
+ const oauthConnector =
+ OAUTH_CONNECTORS.find(
+ (c) => c.connectorType === viewingAccountsType.connectorType
+ ) ||
+ COMPOSIO_CONNECTORS.find(
+ (c) => c.connectorType === viewingAccountsType.connectorType
+ );
if (oauthConnector) {
handleConnectOAuth(oauthConnector);
}
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
index 71258a519..234898922 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@@ -341,7 +341,7 @@ export const ConnectorEditView: FC = ({
{isSaving ? (
<>
- Saving...
+ Saving
>
) : (
"Save Changes"
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
index ea489aec8..68fc688c3 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
@@ -9,7 +9,11 @@ import { getConnectorTypeDisplay } from "@/lib/connectors/utils";
import { cn } from "@/lib/utils";
import { DateRangeSelector } from "../../components/date-range-selector";
import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
-import { type IndexingConfigState, OAUTH_CONNECTORS } from "../../constants/connector-constants";
+import {
+ COMPOSIO_CONNECTORS,
+ type IndexingConfigState,
+ OAUTH_CONNECTORS,
+} from "../../constants/connector-constants";
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
import { getConnectorConfigComponent } from "../index";
@@ -91,7 +95,10 @@ export const IndexingConfigurationView: FC = ({
};
}, [checkScrollState]);
- const authConnector = OAUTH_CONNECTORS.find((c) => c.connectorType === connector?.connector_type);
+ // Check both OAUTH_CONNECTORS and COMPOSIO_CONNECTORS
+ const authConnector =
+ OAUTH_CONNECTORS.find((c) => c.connectorType === connector?.connector_type) ||
+ COMPOSIO_CONNECTORS.find((c) => c.connectorType === connector?.connector_type);
return (
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index 2923ab823..a2b1168bd 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -191,7 +191,10 @@ export const useConnectorDialog = () => {
// Handle configure view (for page refresh support)
if (params.view === "configure" && params.connector && !indexingConfig && allConnectors) {
- const oauthConnector = OAUTH_CONNECTORS.find((c) => c.id === params.connector);
+ // Check both OAUTH_CONNECTORS and COMPOSIO_CONNECTORS
+ const oauthConnector =
+ OAUTH_CONNECTORS.find((c) => c.id === params.connector) ||
+ COMPOSIO_CONNECTORS.find((c) => c.id === params.connector);
if (oauthConnector) {
let existingConnector: SearchSourceConnector | undefined;
if (params.connectorId) {
diff --git a/surfsense_web/components/assistant-ui/connector-popup/tabs/active-connectors-tab.tsx b/surfsense_web/components/assistant-ui/connector-popup/tabs/active-connectors-tab.tsx
index a518d63a6..e45888bb1 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/tabs/active-connectors-tab.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/tabs/active-connectors-tab.tsx
@@ -13,7 +13,7 @@ import type { SearchSourceConnector } from "@/contracts/types/connector.types";
import type { LogActiveTask, LogSummary } from "@/contracts/types/log.types";
import { connectorsApiService } from "@/lib/apis/connectors-api.service";
import { cn } from "@/lib/utils";
-import { OAUTH_CONNECTORS } from "../constants/connector-constants";
+import { COMPOSIO_CONNECTORS, OAUTH_CONNECTORS } from "../constants/connector-constants";
import { getDocumentCountForConnector } from "../utils/connector-document-mapping";
interface ActiveConnectorsTabProps {
@@ -113,7 +113,10 @@ export const ActiveConnectorsTab: FC
= ({
// Get display info for OAuth connector type
const getOAuthConnectorTypeInfo = (connectorType: string) => {
- const oauthConnector = OAUTH_CONNECTORS.find((c) => c.connectorType === connectorType);
+ // Check both OAUTH_CONNECTORS and COMPOSIO_CONNECTORS
+ const oauthConnector =
+ OAUTH_CONNECTORS.find((c) => c.connectorType === connectorType) ||
+ COMPOSIO_CONNECTORS.find((c) => c.connectorType === connectorType);
return {
title:
oauthConnector?.title ||
diff --git a/surfsense_web/components/settings/llm-role-manager.tsx b/surfsense_web/components/settings/llm-role-manager.tsx
index ba4c4970c..c41a2d3bf 100644
--- a/surfsense_web/components/settings/llm-role-manager.tsx
+++ b/surfsense_web/components/settings/llm-role-manager.tsx
@@ -398,7 +398,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
className="flex items-center gap-2 text-xs md:text-sm h-9 md:h-10"
>
- {isSaving ? "Saving..." : "Save Changes"}
+ {isSaving ? "Saving" : "Save Changes"}
{
try {
if (editingConfig) {
+ const { search_space_id, ...updateData } = formData;
await updateConfig({
id: editingConfig.id,
- data: {
- ...formData,
- search_space_id: undefined, // Can't change search_space_id
- },
+ data: updateData,
});
} else {
await createConfig(formData);
@@ -156,9 +152,6 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {
setEditingConfig(null);
};
- const getProviderInfo = (providerValue: string) =>
- LLM_PROVIDERS.find((p) => p.value === providerValue);
-
return (
{/* Header */}
@@ -180,9 +173,9 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {
{/* Error Alerts */}
{errors.length > 0 &&
- errors.map((err, i) => (
+ errors.map((err) => (
{configs?.map((config) => {
- const providerInfo = getProviderInfo(config.provider);
return (
- Deleting...
+ Deleting
>
) : (
<>
diff --git a/surfsense_web/components/settings/prompt-config-manager.tsx b/surfsense_web/components/settings/prompt-config-manager.tsx
index 7c5a8978b..64f6adf23 100644
--- a/surfsense_web/components/settings/prompt-config-manager.tsx
+++ b/surfsense_web/components/settings/prompt-config-manager.tsx
@@ -200,7 +200,7 @@ export function PromptConfigManager({ searchSpaceId }: PromptConfigManagerProps)
className="flex items-center gap-2 text-xs md:text-sm h-9 md:h-10"
>
- {saving ? "Saving..." : "Save Instructions"}
+ {saving ? "Saving" : "Save Instructions"}
diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json
index 94e44c8ec..8ca382669 100644
--- a/surfsense_web/messages/en.json
+++ b/surfsense_web/messages/en.json
@@ -157,7 +157,7 @@
"delete_note": "Delete Note",
"delete_note_confirm": "Are you sure you want to delete",
"action_cannot_undone": "This action cannot be undone.",
- "deleting": "Deleting...",
+ "deleting": "Deleting",
"surfsense_dashboard": "SurfSense Dashboard",
"welcome_message": "Welcome to your SurfSense dashboard.",
"your_search_spaces": "Your Search Spaces",
@@ -498,7 +498,7 @@
"base": "Base",
"all_roles_assigned": "All roles are assigned and ready to use! Your LLM configuration is complete.",
"save_changes": "Save Changes",
- "saving": "Saving...",
+ "saving": "Saving",
"reset": "Reset",
"status": "Status",
"status_ready": "Ready",
@@ -548,7 +548,7 @@
"log_deleted_error": "Failed to delete log",
"confirm_delete_log_title": "Are you sure?",
"confirm_delete_log_desc": "This action cannot be undone. This will permanently delete the log entry.",
- "deleting": "Deleting..."
+ "deleting": "Deleting"
},
"onboard": {
"welcome_title": "Welcome to SurfSense",
From 8d8f69545ee869242fe27fb2f4d4512429cdb240 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 18:57:10 +0530
Subject: [PATCH 11/51] feat: improve Google Calendar and Gmail connectors with
enhanced error handling
- Added user-friendly re-authentication messages for expired or revoked tokens in both Google Calendar and Gmail connectors.
- Updated error handling in indexing tasks to log specific authentication errors and provide clearer feedback to users.
- Enhanced the connector UI to handle indexing failures more effectively, improving overall user experience.
---
.../connectors/google_calendar_connector.py | 14 +++++++
.../app/connectors/google_gmail_connector.py | 14 +++++++
.../google_calendar_indexer.py | 15 +++++--
.../google_gmail_indexer.py | 14 ++++++-
.../assistant-ui/connector-popup.tsx | 18 ++++++--
.../views/connector-edit-view.tsx | 2 +-
.../hooks/use-connector-dialog.ts | 6 ++-
.../hooks/use-indexing-connectors.ts | 42 ++++++++++++++++++-
8 files changed, 113 insertions(+), 12 deletions(-)
diff --git a/surfsense_backend/app/connectors/google_calendar_connector.py b/surfsense_backend/app/connectors/google_calendar_connector.py
index 6d389ddd5..ac60b02a8 100644
--- a/surfsense_backend/app/connectors/google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/google_calendar_connector.py
@@ -142,6 +142,12 @@ class GoogleCalendarConnector:
flag_modified(connector, "config")
await self._session.commit()
except Exception as e:
+ error_str = str(e)
+ # Check if this is an invalid_grant error (token expired/revoked)
+ if "invalid_grant" in error_str.lower() or "token has been expired or revoked" in error_str.lower():
+ raise Exception(
+ "Google Calendar authentication failed. Please re-authenticate."
+ ) from e
raise Exception(
f"Failed to refresh Google OAuth credentials: {e!s}"
) from e
@@ -165,6 +171,10 @@ class GoogleCalendarConnector:
self.service = build("calendar", "v3", credentials=credentials)
return self.service
except Exception as e:
+ error_str = str(e)
+ # If the error already contains a user-friendly re-authentication message, preserve it
+ if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
+ raise Exception(error_str) from e
raise Exception(f"Failed to create Google Calendar service: {e!s}") from e
async def get_calendars(self) -> tuple[list[dict[str, Any]], str | None]:
@@ -271,6 +281,10 @@ class GoogleCalendarConnector:
return events, None
except Exception as e:
+ error_str = str(e)
+ # If the error already contains a user-friendly re-authentication message, preserve it
+ if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
+ return [], error_str
return [], f"Error fetching events: {e!s}"
def format_event_to_markdown(self, event: dict[str, Any]) -> str:
diff --git a/surfsense_backend/app/connectors/google_gmail_connector.py b/surfsense_backend/app/connectors/google_gmail_connector.py
index 10008ad73..8c0e4690e 100644
--- a/surfsense_backend/app/connectors/google_gmail_connector.py
+++ b/surfsense_backend/app/connectors/google_gmail_connector.py
@@ -141,6 +141,12 @@ class GoogleGmailConnector:
flag_modified(connector, "config")
await self._session.commit()
except Exception as e:
+ error_str = str(e)
+ # Check if this is an invalid_grant error (token expired/revoked)
+ if "invalid_grant" in error_str.lower() or "token has been expired or revoked" in error_str.lower():
+ raise Exception(
+ "Gmail authentication failed. Please re-authenticate."
+ ) from e
raise Exception(
f"Failed to refresh Google OAuth credentials: {e!s}"
) from e
@@ -164,6 +170,10 @@ class GoogleGmailConnector:
self.service = build("gmail", "v1", credentials=credentials)
return self.service
except Exception as e:
+ error_str = str(e)
+ # If the error already contains a user-friendly re-authentication message, preserve it
+ if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
+ raise Exception(error_str) from e
raise Exception(f"Failed to create Gmail service: {e!s}") from e
async def get_user_profile(self) -> tuple[dict[str, Any], str | None]:
@@ -225,6 +235,10 @@ class GoogleGmailConnector:
return messages, None
except Exception as e:
+ error_str = str(e)
+ # If the error already contains a user-friendly re-authentication message, preserve it
+ if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
+ return [], error_str
return [], f"Error fetching messages list: {e!s}"
async def get_message_details(
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
index b8c0e564d..09bb8de4b 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@@ -246,13 +246,20 @@ async def index_google_calendar_events(
)
return 0, None
else:
+ # Check if this is an authentication error that requires re-authentication
+ error_message = error
+ error_type = "APIError"
+ if "re-authenticate" in error.lower() or "expired or been revoked" in error.lower() or "authentication failed" in error.lower():
+ error_message = "Google Calendar authentication failed. Please re-authenticate."
+ error_type = "AuthenticationError"
+
await task_logger.log_task_failure(
log_entry,
- f"Failed to get Google Calendar events: {error}",
- "API Error",
- {"error_type": "APIError"},
+ error_message,
+ error,
+ {"error_type": error_type},
)
- return 0, f"Failed to get Google Calendar events: {error}"
+ return 0, error_message
logger.info(f"Retrieved {len(events)} events from Google Calendar API")
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
index e10297057..6a3057437 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@@ -170,10 +170,20 @@ async def index_google_gmail_messages(
)
if error:
+ # Check if this is an authentication error that requires re-authentication
+ error_message = error
+ error_type = "APIError"
+ if "re-authenticate" in error.lower() or "expired or been revoked" in error.lower() or "authentication failed" in error.lower():
+ error_message = "Gmail authentication failed. Please re-authenticate."
+ error_type = "AuthenticationError"
+
await task_logger.log_task_failure(
- log_entry, f"Failed to fetch messages: {error}", {}
+ log_entry,
+ error_message,
+ error,
+ {"error_type": error_type}
)
- return 0, f"Failed to fetch Gmail messages: {error}"
+ return 0, error_message
if not messages:
success_msg = "No Google gmail messages found in the specified date range"
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index 045c3c586..a04e2a9fd 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -5,12 +5,14 @@ import { Cable, Loader2 } from "lucide-react";
import { useSearchParams } from "next/navigation";
import type { FC } from "react";
import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
+import { currentUserAtom } from "@/atoms/user/user-query.atoms";
import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
import { Dialog, DialogContent } from "@/components/ui/dialog";
import { Tabs, TabsContent } from "@/components/ui/tabs";
import type { SearchSourceConnector } from "@/contracts/types/connector.types";
import { useConnectorsElectric } from "@/hooks/use-connectors-electric";
import { useDocumentsElectric } from "@/hooks/use-documents-electric";
+import { useInbox } from "@/hooks/use-inbox";
import { cn } from "@/lib/utils";
import { ConnectorDialogHeader } from "./connector-popup/components/connector-dialog-header";
import { ConnectorConnectView } from "./connector-popup/connector-configs/views/connector-connect-view";
@@ -27,10 +29,18 @@ import { YouTubeCrawlerView } from "./connector-popup/views/youtube-crawler-view
export const ConnectorIndicator: FC = () => {
const searchSpaceId = useAtomValue(activeSearchSpaceIdAtom);
const searchParams = useSearchParams();
+ const { data: currentUser } = useAtomValue(currentUserAtom);
// Fetch document type counts using Electric SQL + PGlite for real-time updates
const { documentTypeCounts, loading: documentTypesLoading } = useDocumentsElectric(searchSpaceId);
+ // Fetch notifications to detect indexing failures
+ const { inboxItems = [] } = useInbox(
+ currentUser?.id ?? null,
+ searchSpaceId ? Number(searchSpaceId) : null,
+ "connector_indexing"
+ );
+
// Check if YouTube view is active
const isYouTubeView = searchParams.get("view") === "youtube";
@@ -116,8 +126,10 @@ export const ConnectorIndicator: FC = () => {
};
// Track indexing state locally - clears automatically when Electric SQL detects last_indexed_at changed
- const { indexingConnectorIds, startIndexing } = useIndexingConnectors(
- connectors as SearchSourceConnector[]
+ // Also clears when failed notifications are detected
+ const { indexingConnectorIds, startIndexing, stopIndexing } = useIndexingConnectors(
+ connectors as SearchSourceConnector[],
+ inboxItems
);
const isLoading = connectorsLoading || documentTypesLoading;
@@ -246,7 +258,7 @@ export const ConnectorIndicator: FC = () => {
editingConnector.connector_type !== "GOOGLE_DRIVE_CONNECTOR"
? () => {
startIndexing(editingConnector.id);
- handleQuickIndexConnector(editingConnector.id, editingConnector.connector_type);
+ handleQuickIndexConnector(editingConnector.id, editingConnector.connector_type, stopIndexing);
}
: undefined
}
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
index 234898922..fbdffed7a 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@@ -97,7 +97,7 @@ export const ConnectorEditView: FC = ({
};
}, [checkScrollState]);
- // Reset local quick indexing state when indexing completes
+ // Reset local quick indexing state when indexing completes or fails
useEffect(() => {
if (!isIndexing) {
setIsQuickIndexing(false);
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index a2b1168bd..f505d8f83 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -1375,7 +1375,7 @@ export const useConnectorDialog = () => {
// Handle quick index (index without date picker, uses backend defaults)
const handleQuickIndexConnector = useCallback(
- async (connectorId: number, connectorType?: string) => {
+ async (connectorId: number, connectorType?: string, stopIndexing?: (id: number) => void) => {
if (!searchSpaceId) return;
// Track quick index clicked event
@@ -1401,6 +1401,10 @@ export const useConnectorDialog = () => {
} catch (error) {
console.error("Error indexing connector content:", error);
toast.error(error instanceof Error ? error.message : "Failed to start indexing");
+ // Stop indexing state on error
+ if (stopIndexing) {
+ stopIndexing(connectorId);
+ }
}
},
[searchSpaceId, indexConnector]
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts
index 2ac8d340a..e82a8eb29 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts
@@ -2,6 +2,8 @@
import { useCallback, useEffect, useRef, useState } from "react";
import type { SearchSourceConnector } from "@/contracts/types/connector.types";
+import type { InboxItem } from "@/contracts/types/inbox.types";
+import { isConnectorIndexingMetadata } from "@/contracts/types/inbox.types";
/**
* Hook to track which connectors are currently indexing using local state.
@@ -9,10 +11,14 @@ import type { SearchSourceConnector } from "@/contracts/types/connector.types";
* This provides a better UX than polling by:
* 1. Setting indexing state immediately when user triggers indexing (optimistic)
* 2. Clearing indexing state when Electric SQL detects last_indexed_at changed
+ * 3. Clearing indexing state when a failed notification is detected
*
* The actual `last_indexed_at` value comes from Electric SQL/PGlite, not local state.
*/
-export function useIndexingConnectors(connectors: SearchSourceConnector[]) {
+export function useIndexingConnectors(
+ connectors: SearchSourceConnector[],
+ inboxItems?: InboxItem[]
+) {
// Set of connector IDs that are currently indexing
const [indexingConnectorIds, setIndexingConnectorIds] = useState>(new Set());
@@ -48,6 +54,40 @@ export function useIndexingConnectors(connectors: SearchSourceConnector[]) {
}
}, [connectors, indexingConnectorIds]);
+ // Detect failed notifications and stop indexing state
+ useEffect(() => {
+ if (!inboxItems || inboxItems.length === 0) return;
+
+ const newIndexingIds = new Set(indexingConnectorIds);
+ let hasChanges = false;
+
+ for (const item of inboxItems) {
+ // Only check connector_indexing notifications
+ if (item.type !== "connector_indexing") continue;
+
+ // Check if this notification indicates a failure
+ const metadata = isConnectorIndexingMetadata(item.metadata)
+ ? item.metadata
+ : null;
+ if (!metadata) continue;
+
+ // Check if status is "failed" or if there's an error_message
+ const isFailed =
+ metadata.status === "failed" ||
+ (metadata.error_message && metadata.error_message.trim().length > 0);
+
+ // If failed and connector is in indexing state, clear it
+ if (isFailed && indexingConnectorIds.has(metadata.connector_id)) {
+ newIndexingIds.delete(metadata.connector_id);
+ hasChanges = true;
+ }
+ }
+
+ if (hasChanges) {
+ setIndexingConnectorIds(newIndexingIds);
+ }
+ }, [inboxItems, indexingConnectorIds]);
+
// Add a connector to the indexing set (called when indexing starts)
const startIndexing = useCallback((connectorId: number) => {
setIndexingConnectorIds((prev) => {
From 1343fabeee9cfe7d101e031abb00f3dbd29ad631 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 19:56:19 +0530
Subject: [PATCH 12/51] feat: refactor composio connectors for modularity
---
.../app/connectors/composio_connector.py | 366 +---
.../connectors/composio_gmail_connector.py | 614 ++++++
.../composio_google_calendar_connector.py | 453 ++++
.../composio_google_drive_connector.py | 1162 +++++++++++
.../app/routes/composio_routes.py | 19 +-
.../app/services/composio_service.py | 21 +
.../app/tasks/composio_indexer.py | 1819 +----------------
.../components/composio-connector-card.tsx | 78 -
.../components/composio-calendar-config.tsx | 220 ++
.../components/composio-config.tsx | 353 ----
.../components/composio-drive-config.tsx | 313 +++
.../components/composio-gmail-config.tsx | 174 ++
.../connector-configs/index.tsx | 8 +-
.../views/connector-edit-view.tsx | 4 +-
.../views/indexing-configuration-view.tsx | 20 +-
.../hooks/use-connector-dialog.ts | 113 +-
surfsense_web/lib/connectors/utils.ts | 3 +
17 files changed, 3128 insertions(+), 2612 deletions(-)
create mode 100644 surfsense_backend/app/connectors/composio_gmail_connector.py
create mode 100644 surfsense_backend/app/connectors/composio_google_calendar_connector.py
create mode 100644 surfsense_backend/app/connectors/composio_google_drive_connector.py
delete mode 100644 surfsense_web/components/assistant-ui/connector-popup/components/composio-connector-card.tsx
create mode 100644 surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx
delete mode 100644 surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
create mode 100644 surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx
create mode 100644 surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx
diff --git a/surfsense_backend/app/connectors/composio_connector.py b/surfsense_backend/app/connectors/composio_connector.py
index 8cb91355d..301296378 100644
--- a/surfsense_backend/app/connectors/composio_connector.py
+++ b/surfsense_backend/app/connectors/composio_connector.py
@@ -1,7 +1,7 @@
"""
-Composio Connector Module.
+Composio Connector Base Module.
-Provides a unified interface for interacting with various services via Composio,
+Provides a base class for interacting with various services via Composio,
primarily used during indexing operations.
"""
@@ -19,10 +19,10 @@ logger = logging.getLogger(__name__)
class ComposioConnector:
"""
- Generic Composio connector for data retrieval.
+ Base Composio connector for data retrieval.
Wraps the ComposioService to provide toolkit-specific data access
- for indexing operations.
+ for indexing operations. Subclasses implement toolkit-specific methods.
"""
def __init__(
@@ -89,354 +89,12 @@ class ComposioConnector:
toolkit_id = await self.get_toolkit_id()
return toolkit_id in INDEXABLE_TOOLKITS
- # ===== Google Drive Methods =====
+ @property
+ def session(self) -> AsyncSession:
+ """Get the database session."""
+ return self._session
- async def list_drive_files(
- self,
- folder_id: str | None = None,
- page_token: str | None = None,
- page_size: int = 100,
- ) -> tuple[list[dict[str, Any]], str | None, str | None]:
- """
- List files from Google Drive via Composio.
-
- Args:
- folder_id: Optional folder ID to list contents of.
- page_token: Pagination token.
- page_size: Number of files per page.
-
- Returns:
- Tuple of (files list, next_page_token, error message).
- """
- connected_account_id = await self.get_connected_account_id()
- if not connected_account_id:
- return [], None, "No connected account ID found"
-
- entity_id = await self.get_entity_id()
- service = await self._get_service()
- return await service.get_drive_files(
- connected_account_id=connected_account_id,
- entity_id=entity_id,
- folder_id=folder_id,
- page_token=page_token,
- page_size=page_size,
- )
-
- async def get_drive_file_content(
- self, file_id: str
- ) -> tuple[bytes | None, str | None]:
- """
- Download file content from Google Drive via Composio.
-
- Args:
- file_id: Google Drive file ID.
-
- Returns:
- Tuple of (file content bytes, error message).
- """
- connected_account_id = await self.get_connected_account_id()
- if not connected_account_id:
- return None, "No connected account ID found"
-
- entity_id = await self.get_entity_id()
- service = await self._get_service()
- return await service.get_drive_file_content(
- connected_account_id=connected_account_id,
- entity_id=entity_id,
- file_id=file_id,
- )
-
- async def get_drive_start_page_token(self) -> tuple[str | None, str | None]:
- """
- Get the starting page token for Google Drive change tracking.
-
- Returns:
- Tuple of (start_page_token, error message).
- """
- connected_account_id = await self.get_connected_account_id()
- if not connected_account_id:
- return None, "No connected account ID found"
-
- entity_id = await self.get_entity_id()
- service = await self._get_service()
- return await service.get_drive_start_page_token(
- connected_account_id=connected_account_id,
- entity_id=entity_id,
- )
-
- async def list_drive_changes(
- self,
- page_token: str | None = None,
- page_size: int = 100,
- include_removed: bool = True,
- ) -> tuple[list[dict[str, Any]], str | None, str | None]:
- """
- List changes in Google Drive since the given page token.
-
- Args:
- page_token: Page token from previous sync (optional).
- page_size: Number of changes per page.
- include_removed: Whether to include removed items.
-
- Returns:
- Tuple of (changes list, new_start_page_token, error message).
- """
- connected_account_id = await self.get_connected_account_id()
- if not connected_account_id:
- return [], None, "No connected account ID found"
-
- entity_id = await self.get_entity_id()
- service = await self._get_service()
- return await service.list_drive_changes(
- connected_account_id=connected_account_id,
- entity_id=entity_id,
- page_token=page_token,
- page_size=page_size,
- include_removed=include_removed,
- )
-
- # ===== Gmail Methods =====
-
- async def list_gmail_messages(
- self,
- query: str = "",
- max_results: int = 50,
- page_token: str | None = None,
- ) -> tuple[list[dict[str, Any]], str | None, int | None, str | None]:
- """
- List Gmail messages via Composio with pagination support.
-
- Args:
- query: Gmail search query.
- max_results: Maximum number of messages per page (default: 50).
- page_token: Optional pagination token for next page.
-
- Returns:
- Tuple of (messages list, next_page_token, result_size_estimate, error message).
- """
- connected_account_id = await self.get_connected_account_id()
- if not connected_account_id:
- return [], None, None, "No connected account ID found"
-
- entity_id = await self.get_entity_id()
- service = await self._get_service()
- return await service.get_gmail_messages(
- connected_account_id=connected_account_id,
- entity_id=entity_id,
- query=query,
- max_results=max_results,
- page_token=page_token,
- )
-
- async def get_gmail_message_detail(
- self, message_id: str
- ) -> tuple[dict[str, Any] | None, str | None]:
- """
- Get full details of a Gmail message via Composio.
-
- Args:
- message_id: Gmail message ID.
-
- Returns:
- Tuple of (message details, error message).
- """
- connected_account_id = await self.get_connected_account_id()
- if not connected_account_id:
- return None, "No connected account ID found"
-
- entity_id = await self.get_entity_id()
- service = await self._get_service()
- return await service.get_gmail_message_detail(
- connected_account_id=connected_account_id,
- entity_id=entity_id,
- message_id=message_id,
- )
-
- # ===== Google Calendar Methods =====
-
- async def list_calendar_events(
- self,
- time_min: str | None = None,
- time_max: str | None = None,
- max_results: int = 250,
- ) -> tuple[list[dict[str, Any]], str | None]:
- """
- List Google Calendar events via Composio.
-
- Args:
- time_min: Start time (RFC3339 format).
- time_max: End time (RFC3339 format).
- max_results: Maximum number of events.
-
- Returns:
- Tuple of (events list, error message).
- """
- connected_account_id = await self.get_connected_account_id()
- if not connected_account_id:
- return [], "No connected account ID found"
-
- entity_id = await self.get_entity_id()
- service = await self._get_service()
- return await service.get_calendar_events(
- connected_account_id=connected_account_id,
- entity_id=entity_id,
- time_min=time_min,
- time_max=time_max,
- max_results=max_results,
- )
-
- # ===== Utility Methods =====
-
- def format_gmail_message_to_markdown(self, message: dict[str, Any]) -> str:
- """
- Format a Gmail message to markdown.
-
- Args:
- message: Message object from Composio's GMAIL_FETCH_EMAILS response.
- Composio structure: messageId, messageText, messageTimestamp,
- payload.headers, labelIds, attachmentList
-
- Returns:
- Formatted markdown string.
- """
- try:
- # Composio uses 'messageId' (camelCase)
- message_id = message.get("messageId", "") or message.get("id", "")
- label_ids = message.get("labelIds", [])
-
- # Extract headers from payload
- payload = message.get("payload", {})
- headers = payload.get("headers", [])
-
- # Parse headers into a dict
- header_dict = {}
- for header in headers:
- name = header.get("name", "").lower()
- value = header.get("value", "")
- header_dict[name] = value
-
- # Extract key information
- subject = header_dict.get("subject", "No Subject")
- from_email = header_dict.get("from", "Unknown Sender")
- to_email = header_dict.get("to", "Unknown Recipient")
- # Composio provides messageTimestamp directly
- date_str = message.get("messageTimestamp", "") or header_dict.get(
- "date", "Unknown Date"
- )
-
- # Build markdown content
- markdown_content = f"# {subject}\n\n"
- markdown_content += f"**From:** {from_email}\n"
- markdown_content += f"**To:** {to_email}\n"
- markdown_content += f"**Date:** {date_str}\n"
-
- if label_ids:
- markdown_content += f"**Labels:** {', '.join(label_ids)}\n"
-
- markdown_content += "\n---\n\n"
-
- # Composio provides full message text in 'messageText'
- message_text = message.get("messageText", "")
- if message_text:
- markdown_content += f"## Content\n\n{message_text}\n\n"
- else:
- # Fallback to snippet if no messageText
- snippet = message.get("snippet", "")
- if snippet:
- markdown_content += f"## Preview\n\n{snippet}\n\n"
-
- # Add attachment info if present
- attachments = message.get("attachmentList", [])
- if attachments:
- markdown_content += "## Attachments\n\n"
- for att in attachments:
- att_name = att.get("filename", att.get("name", "Unknown"))
- markdown_content += f"- {att_name}\n"
- markdown_content += "\n"
-
- # Add message metadata
- markdown_content += "## Message Details\n\n"
- markdown_content += f"- **Message ID:** {message_id}\n"
-
- return markdown_content
-
- except Exception as e:
- return f"Error formatting message to markdown: {e!s}"
-
- def format_calendar_event_to_markdown(self, event: dict[str, Any]) -> str:
- """
- Format a Google Calendar event to markdown.
-
- Args:
- event: Event object from Google Calendar API.
-
- Returns:
- Formatted markdown string.
- """
- from datetime import datetime
-
- try:
- # Extract basic event information
- summary = event.get("summary", "No Title")
- description = event.get("description", "")
- location = event.get("location", "")
-
- # Extract start and end times
- start = event.get("start", {})
- end = event.get("end", {})
-
- start_time = start.get("dateTime") or start.get("date", "")
- end_time = end.get("dateTime") or end.get("date", "")
-
- # Format times for display
- def format_time(time_str: str) -> str:
- if not time_str:
- return "Unknown"
- try:
- if "T" in time_str:
- dt = datetime.fromisoformat(time_str.replace("Z", "+00:00"))
- return dt.strftime("%Y-%m-%d %H:%M")
- return time_str
- except Exception:
- return time_str
-
- start_formatted = format_time(start_time)
- end_formatted = format_time(end_time)
-
- # Extract attendees
- attendees = event.get("attendees", [])
- attendee_list = []
- for attendee in attendees:
- email = attendee.get("email", "")
- display_name = attendee.get("displayName", email)
- response_status = attendee.get("responseStatus", "")
- attendee_list.append(f"- {display_name} ({response_status})")
-
- # Build markdown content
- markdown_content = f"# {summary}\n\n"
- markdown_content += f"**Start:** {start_formatted}\n"
- markdown_content += f"**End:** {end_formatted}\n"
-
- if location:
- markdown_content += f"**Location:** {location}\n"
-
- markdown_content += "\n"
-
- if description:
- markdown_content += f"## Description\n\n{description}\n\n"
-
- if attendee_list:
- markdown_content += "## Attendees\n\n"
- markdown_content += "\n".join(attendee_list)
- markdown_content += "\n\n"
-
- # Add event metadata
- markdown_content += "## Event Details\n\n"
- markdown_content += f"- **Event ID:** {event.get('id', 'Unknown')}\n"
- markdown_content += f"- **Created:** {event.get('created', 'Unknown')}\n"
- markdown_content += f"- **Updated:** {event.get('updated', 'Unknown')}\n"
-
- return markdown_content
-
- except Exception as e:
- return f"Error formatting event to markdown: {e!s}"
+ @property
+ def connector_id(self) -> int:
+ """Get the connector ID."""
+ return self._connector_id
diff --git a/surfsense_backend/app/connectors/composio_gmail_connector.py b/surfsense_backend/app/connectors/composio_gmail_connector.py
new file mode 100644
index 000000000..5a9645a66
--- /dev/null
+++ b/surfsense_backend/app/connectors/composio_gmail_connector.py
@@ -0,0 +1,614 @@
+"""
+Composio Gmail Connector Module.
+
+Provides Gmail specific methods for data retrieval and indexing via Composio.
+"""
+
+import logging
+from datetime import UTC, datetime
+from typing import Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+from sqlalchemy.orm import selectinload
+
+from app.config import config
+from app.connectors.composio_connector import ComposioConnector
+from app.db import Document, DocumentType
+from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
+from app.services.llm_service import get_user_long_context_llm
+from app.services.task_logging_service import TaskLoggingService
+from app.tasks.connector_indexers.base import calculate_date_range
+from app.utils.document_converters import (
+ create_document_chunks,
+ generate_content_hash,
+ generate_document_summary,
+ generate_unique_identifier_hash,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def get_current_timestamp() -> datetime:
+ """Get the current timestamp with timezone for updated_at field."""
+ return datetime.now(UTC)
+
+
+async def check_document_by_unique_identifier(
+ session: AsyncSession, unique_identifier_hash: str
+) -> Document | None:
+ """Check if a document with the given unique identifier hash already exists."""
+ existing_doc_result = await session.execute(
+ select(Document)
+ .options(selectinload(Document.chunks))
+ .where(Document.unique_identifier_hash == unique_identifier_hash)
+ )
+ return existing_doc_result.scalars().first()
+
+
+async def update_connector_last_indexed(
+ session: AsyncSession,
+ connector,
+ update_last_indexed: bool = True,
+) -> None:
+ """Update the last_indexed_at timestamp for a connector."""
+ if update_last_indexed:
+ connector.last_indexed_at = datetime.now(UTC)
+ logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+
+
+class ComposioGmailConnector(ComposioConnector):
+ """
+ Gmail specific Composio connector.
+
+ Provides methods for listing messages, getting message details, and formatting
+ Gmail messages from Gmail via Composio.
+ """
+
+ async def list_gmail_messages(
+ self,
+ query: str = "",
+ max_results: int = 50,
+ page_token: str | None = None,
+ ) -> tuple[list[dict[str, Any]], str | None, int | None, str | None]:
+ """
+ List Gmail messages via Composio with pagination support.
+
+ Args:
+ query: Gmail search query.
+ max_results: Maximum number of messages per page (default: 50).
+ page_token: Optional pagination token for next page.
+
+ Returns:
+ Tuple of (messages list, next_page_token, result_size_estimate, error message).
+ """
+ connected_account_id = await self.get_connected_account_id()
+ if not connected_account_id:
+ return [], None, None, "No connected account ID found"
+
+ entity_id = await self.get_entity_id()
+ service = await self._get_service()
+ return await service.get_gmail_messages(
+ connected_account_id=connected_account_id,
+ entity_id=entity_id,
+ query=query,
+ max_results=max_results,
+ page_token=page_token,
+ )
+
+ async def get_gmail_message_detail(
+ self, message_id: str
+ ) -> tuple[dict[str, Any] | None, str | None]:
+ """
+ Get full details of a Gmail message via Composio.
+
+ Args:
+ message_id: Gmail message ID.
+
+ Returns:
+ Tuple of (message details, error message).
+ """
+ connected_account_id = await self.get_connected_account_id()
+ if not connected_account_id:
+ return None, "No connected account ID found"
+
+ entity_id = await self.get_entity_id()
+ service = await self._get_service()
+ return await service.get_gmail_message_detail(
+ connected_account_id=connected_account_id,
+ entity_id=entity_id,
+ message_id=message_id,
+ )
+
+ def format_gmail_message_to_markdown(self, message: dict[str, Any]) -> str:
+ """
+ Format a Gmail message to markdown.
+
+ Args:
+ message: Message object from Composio's GMAIL_FETCH_EMAILS response.
+ Composio structure: messageId, messageText, messageTimestamp,
+ payload.headers, labelIds, attachmentList
+
+ Returns:
+ Formatted markdown string.
+ """
+ try:
+ # Composio uses 'messageId' (camelCase)
+ message_id = message.get("messageId", "") or message.get("id", "")
+ label_ids = message.get("labelIds", [])
+
+ # Extract headers from payload
+ payload = message.get("payload", {})
+ headers = payload.get("headers", [])
+
+ # Parse headers into a dict
+ header_dict = {}
+ for header in headers:
+ name = header.get("name", "").lower()
+ value = header.get("value", "")
+ header_dict[name] = value
+
+ # Extract key information
+ subject = header_dict.get("subject", "No Subject")
+ from_email = header_dict.get("from", "Unknown Sender")
+ to_email = header_dict.get("to", "Unknown Recipient")
+ # Composio provides messageTimestamp directly
+ date_str = message.get("messageTimestamp", "") or header_dict.get(
+ "date", "Unknown Date"
+ )
+
+ # Build markdown content
+ markdown_content = f"# {subject}\n\n"
+ markdown_content += f"**From:** {from_email}\n"
+ markdown_content += f"**To:** {to_email}\n"
+ markdown_content += f"**Date:** {date_str}\n"
+
+ if label_ids:
+ markdown_content += f"**Labels:** {', '.join(label_ids)}\n"
+
+ markdown_content += "\n---\n\n"
+
+ # Composio provides full message text in 'messageText'
+ message_text = message.get("messageText", "")
+ if message_text:
+ markdown_content += f"## Content\n\n{message_text}\n\n"
+ else:
+ # Fallback to snippet if no messageText
+ snippet = message.get("snippet", "")
+ if snippet:
+ markdown_content += f"## Preview\n\n{snippet}\n\n"
+
+ # Add attachment info if present
+ attachments = message.get("attachmentList", [])
+ if attachments:
+ markdown_content += "## Attachments\n\n"
+ for att in attachments:
+ att_name = att.get("filename", att.get("name", "Unknown"))
+ markdown_content += f"- {att_name}\n"
+ markdown_content += "\n"
+
+ # Add message metadata
+ markdown_content += "## Message Details\n\n"
+ markdown_content += f"- **Message ID:** {message_id}\n"
+
+ return markdown_content
+
+ except Exception as e:
+ return f"Error formatting message to markdown: {e!s}"
+
+
+# ============ Indexer Functions ============
+
+
+async def _process_gmail_message_batch(
+ session: AsyncSession,
+ messages: list[dict[str, Any]],
+ composio_connector: ComposioGmailConnector,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ total_documents_indexed: int = 0,
+) -> tuple[int, int]:
+ """
+ Process a batch of Gmail messages and index them.
+
+ Args:
+ total_documents_indexed: Running total of documents indexed so far (for batch commits).
+
+ Returns:
+ Tuple of (documents_indexed, documents_skipped)
+ """
+ documents_indexed = 0
+ documents_skipped = 0
+
+ for message in messages:
+ try:
+ # Composio uses 'messageId' (camelCase), not 'id'
+ message_id = message.get("messageId", "") or message.get("id", "")
+ if not message_id:
+ documents_skipped += 1
+ continue
+
+ # Composio's GMAIL_FETCH_EMAILS already returns full message content
+ # No need for a separate detail API call
+
+ # Extract message info from Composio response
+ # Composio structure: messageId, messageText, messageTimestamp, payload.headers, labelIds
+ payload = message.get("payload", {})
+ headers = payload.get("headers", [])
+
+ subject = "No Subject"
+ sender = "Unknown Sender"
+ date_str = message.get("messageTimestamp", "Unknown Date")
+
+ for header in headers:
+ name = header.get("name", "").lower()
+ value = header.get("value", "")
+ if name == "subject":
+ subject = value
+ elif name == "from":
+ sender = value
+ elif name == "date":
+ date_str = value
+
+ # Format to markdown using the full message data
+ markdown_content = composio_connector.format_gmail_message_to_markdown(
+ message
+ )
+
+ # Check for empty content (defensive parsing per Composio best practices)
+ if not markdown_content.strip():
+ logger.warning(f"Skipping Gmail message with no content: {subject}")
+ documents_skipped += 1
+ continue
+
+ # Generate unique identifier
+ document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"])
+ unique_identifier_hash = generate_unique_identifier_hash(
+ document_type, f"gmail_{message_id}", search_space_id
+ )
+
+ content_hash = generate_content_hash(markdown_content, search_space_id)
+
+ existing_document = await check_document_by_unique_identifier(
+ session, unique_identifier_hash
+ )
+
+ # Get label IDs from Composio response
+ label_ids = message.get("labelIds", [])
+ # Extract thread_id if available (for consistency with non-Composio implementation)
+ thread_id = message.get("threadId", "") or message.get("thread_id", "")
+
+ if existing_document:
+ if existing_document.content_hash == content_hash:
+ documents_skipped += 1
+ continue
+
+ # Update existing
+ user_llm = await get_user_long_context_llm(
+ session, user_id, search_space_id
+ )
+
+ if user_llm:
+ document_metadata = {
+ "message_id": message_id,
+ "thread_id": thread_id,
+ "subject": subject,
+ "sender": sender,
+ "document_type": "Gmail Message (Composio)",
+ }
+ (
+ summary_content,
+ summary_embedding,
+ ) = await generate_document_summary(
+ markdown_content, user_llm, document_metadata
+ )
+ else:
+ summary_content = (
+ f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
+ )
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
+
+ chunks = await create_document_chunks(markdown_content)
+
+ existing_document.title = f"Gmail: {subject}"
+ existing_document.content = summary_content
+ existing_document.content_hash = content_hash
+ existing_document.embedding = summary_embedding
+ existing_document.document_metadata = {
+ "message_id": message_id,
+ "thread_id": thread_id,
+ "subject": subject,
+ "sender": sender,
+ "date": date_str,
+ "labels": label_ids,
+ "connector_id": connector_id,
+ "source": "composio",
+ }
+ existing_document.chunks = chunks
+ existing_document.updated_at = get_current_timestamp()
+
+ documents_indexed += 1
+
+ # Batch commit every 10 documents
+ current_total = total_documents_indexed + documents_indexed
+ if current_total % 10 == 0:
+ logger.info(
+ f"Committing batch: {current_total} Gmail messages processed so far"
+ )
+ await session.commit()
+ continue
+
+ # Create new document
+ user_llm = await get_user_long_context_llm(
+ session, user_id, search_space_id
+ )
+
+ if user_llm:
+ document_metadata = {
+ "message_id": message_id,
+ "thread_id": thread_id,
+ "subject": subject,
+ "sender": sender,
+ "document_type": "Gmail Message (Composio)",
+ }
+ summary_content, summary_embedding = await generate_document_summary(
+ markdown_content, user_llm, document_metadata
+ )
+ else:
+ summary_content = (
+ f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
+ )
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
+
+ chunks = await create_document_chunks(markdown_content)
+
+ document = Document(
+ search_space_id=search_space_id,
+ title=f"Gmail: {subject}",
+ document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]),
+ document_metadata={
+ "message_id": message_id,
+ "thread_id": thread_id,
+ "subject": subject,
+ "sender": sender,
+ "date": date_str,
+ "labels": label_ids,
+ "connector_id": connector_id,
+ "toolkit_id": "gmail",
+ "source": "composio",
+ },
+ content=summary_content,
+ content_hash=content_hash,
+ unique_identifier_hash=unique_identifier_hash,
+ embedding=summary_embedding,
+ chunks=chunks,
+ updated_at=get_current_timestamp(),
+ )
+ session.add(document)
+ documents_indexed += 1
+
+ # Batch commit every 10 documents
+ current_total = total_documents_indexed + documents_indexed
+ if current_total % 10 == 0:
+ logger.info(
+ f"Committing batch: {current_total} Gmail messages processed so far"
+ )
+ await session.commit()
+
+ except Exception as e:
+ logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
+ documents_skipped += 1
+ # Rollback on error to avoid partial state (per Composio best practices)
+ try:
+ await session.rollback()
+ except Exception as rollback_error:
+ logger.error(
+ f"Error during rollback: {rollback_error!s}", exc_info=True
+ )
+ continue
+
+ return documents_indexed, documents_skipped
+
+
+async def index_composio_gmail(
+ session: AsyncSession,
+ connector,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ start_date: str | None,
+ end_date: str | None,
+ task_logger: TaskLoggingService,
+ log_entry,
+ update_last_indexed: bool = True,
+ max_items: int = 1000,
+) -> tuple[int, str]:
+ """Index Gmail messages via Composio with pagination and incremental processing."""
+ try:
+ composio_connector = ComposioGmailConnector(session, connector_id)
+
+ # Normalize date values - handle "undefined" strings from frontend
+ if start_date == "undefined" or start_date == "":
+ start_date = None
+ if end_date == "undefined" or end_date == "":
+ end_date = None
+
+ # Use provided dates directly if both are provided, otherwise calculate from last_indexed_at
+ # This ensures user-selected dates are respected (matching non-Composio Gmail connector behavior)
+ if start_date is not None and end_date is not None:
+ # User provided both dates - use them directly
+ start_date_str = start_date
+ end_date_str = end_date
+ else:
+ # Calculate date range with defaults (uses last_indexed_at or 365 days back)
+ # This ensures indexing works even when user doesn't specify dates
+ start_date_str, end_date_str = calculate_date_range(
+ connector, start_date, end_date, default_days_back=365
+ )
+
+ # Build query with date range
+ query_parts = []
+ if start_date_str:
+ query_parts.append(f"after:{start_date_str.replace('-', '/')}")
+ if end_date_str:
+ query_parts.append(f"before:{end_date_str.replace('-', '/')}")
+ query = " ".join(query_parts) if query_parts else ""
+
+ logger.info(
+ f"Gmail query for connector {connector_id}: '{query}' "
+ f"(start_date={start_date_str}, end_date={end_date_str})"
+ )
+
+ # Use smaller batch size to avoid 413 payload too large errors
+ batch_size = 50
+ page_token = None
+ total_documents_indexed = 0
+ total_documents_skipped = 0
+ total_messages_fetched = 0
+ result_size_estimate = None # Will be set from first API response
+
+ while total_messages_fetched < max_items:
+ # Calculate how many messages to fetch in this batch
+ remaining = max_items - total_messages_fetched
+ current_batch_size = min(batch_size, remaining)
+
+ # Use result_size_estimate if available, otherwise fall back to max_items
+ estimated_total = (
+ result_size_estimate if result_size_estimate is not None else max_items
+ )
+ # Cap estimated_total at max_items to avoid showing misleading progress
+ estimated_total = min(estimated_total, max_items)
+
+ await task_logger.log_task_progress(
+ log_entry,
+ f"Fetching Gmail messages batch via Composio for connector {connector_id} "
+ f"({total_messages_fetched}/{estimated_total} fetched, {total_documents_indexed} indexed)",
+ {
+ "stage": "fetching_messages",
+ "batch_size": current_batch_size,
+ "total_fetched": total_messages_fetched,
+ "total_indexed": total_documents_indexed,
+ "estimated_total": estimated_total,
+ },
+ )
+
+ # Fetch batch of messages
+ (
+ messages,
+ next_token,
+ result_size_estimate_batch,
+ error,
+ ) = await composio_connector.list_gmail_messages(
+ query=query,
+ max_results=current_batch_size,
+ page_token=page_token,
+ )
+
+ if error:
+ await task_logger.log_task_failure(
+ log_entry, f"Failed to fetch Gmail messages: {error}", {}
+ )
+ return 0, f"Failed to fetch Gmail messages: {error}"
+
+ if not messages:
+ # No more messages available
+ break
+
+ # Update result_size_estimate from first response (Gmail provides this estimate)
+ if result_size_estimate is None and result_size_estimate_batch is not None:
+ result_size_estimate = result_size_estimate_batch
+ logger.info(
+ f"Gmail API estimated {result_size_estimate} total messages for query: '{query}'"
+ )
+
+ total_messages_fetched += len(messages)
+ # Recalculate estimated_total after potentially updating result_size_estimate
+ estimated_total = (
+ result_size_estimate if result_size_estimate is not None else max_items
+ )
+ estimated_total = min(estimated_total, max_items)
+
+ logger.info(
+ f"Fetched batch of {len(messages)} Gmail messages "
+ f"(total: {total_messages_fetched}/{estimated_total})"
+ )
+
+ # Process batch incrementally
+ batch_indexed, batch_skipped = await _process_gmail_message_batch(
+ session=session,
+ messages=messages,
+ composio_connector=composio_connector,
+ connector_id=connector_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ total_documents_indexed=total_documents_indexed,
+ )
+
+ total_documents_indexed += batch_indexed
+ total_documents_skipped += batch_skipped
+
+ logger.info(
+ f"Processed batch: {batch_indexed} indexed, {batch_skipped} skipped "
+ f"(total: {total_documents_indexed} indexed, {total_documents_skipped} skipped)"
+ )
+
+ # Batch commits happen in _process_gmail_message_batch every 10 documents
+ # This ensures progress is saved incrementally, preventing data loss on crashes
+
+ # Check if we should continue
+ if not next_token:
+ # No more pages available
+ break
+
+ if len(messages) < current_batch_size:
+ # Last page had fewer items than requested, we're done
+ break
+
+ # Continue with next page
+ page_token = next_token
+
+ if total_messages_fetched == 0:
+ success_msg = "No Gmail messages found in the specified date range"
+ await task_logger.log_task_success(
+ log_entry, success_msg, {"messages_count": 0}
+ )
+ # CRITICAL: Update timestamp even when no messages found so Electric SQL syncs and UI shows indexed status
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+ await session.commit()
+ return 0, None # Return None (not error) when no items found
+
+ # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+ # This ensures the UI shows "Last indexed" instead of "Never indexed"
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+
+ # Final commit to ensure all documents are persisted (safety net)
+ # This matches the pattern used in non-Composio Gmail indexer
+ logger.info(
+ f"Final commit: Total {total_documents_indexed} Gmail messages processed"
+ )
+ await session.commit()
+ logger.info(
+ "Successfully committed all Composio Gmail document changes to database"
+ )
+
+ await task_logger.log_task_success(
+ log_entry,
+ f"Successfully completed Gmail indexing via Composio for connector {connector_id}",
+ {
+ "documents_indexed": total_documents_indexed,
+ "documents_skipped": total_documents_skipped,
+ "messages_fetched": total_messages_fetched,
+ },
+ )
+
+ return total_documents_indexed, None
+
+ except Exception as e:
+ logger.error(f"Failed to index Gmail via Composio: {e!s}", exc_info=True)
+ return 0, f"Failed to index Gmail via Composio: {e!s}"
+
diff --git a/surfsense_backend/app/connectors/composio_google_calendar_connector.py b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
new file mode 100644
index 000000000..ab8bde53c
--- /dev/null
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@@ -0,0 +1,453 @@
+"""
+Composio Google Calendar Connector Module.
+
+Provides Google Calendar specific methods for data retrieval and indexing via Composio.
+"""
+
+import logging
+from datetime import UTC, datetime
+from typing import Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+from sqlalchemy.orm import selectinload
+
+from app.config import config
+from app.connectors.composio_connector import ComposioConnector
+from app.db import Document, DocumentType
+from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
+from app.services.llm_service import get_user_long_context_llm
+from app.services.task_logging_service import TaskLoggingService
+from app.tasks.connector_indexers.base import calculate_date_range
+from app.utils.document_converters import (
+ create_document_chunks,
+ generate_content_hash,
+ generate_document_summary,
+ generate_unique_identifier_hash,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def get_current_timestamp() -> datetime:
+ """Get the current timestamp with timezone for updated_at field."""
+ return datetime.now(UTC)
+
+
+async def check_document_by_unique_identifier(
+ session: AsyncSession, unique_identifier_hash: str
+) -> Document | None:
+ """Check if a document with the given unique identifier hash already exists."""
+ existing_doc_result = await session.execute(
+ select(Document)
+ .options(selectinload(Document.chunks))
+ .where(Document.unique_identifier_hash == unique_identifier_hash)
+ )
+ return existing_doc_result.scalars().first()
+
+
+async def update_connector_last_indexed(
+ session: AsyncSession,
+ connector,
+ update_last_indexed: bool = True,
+) -> None:
+ """Update the last_indexed_at timestamp for a connector."""
+ if update_last_indexed:
+ connector.last_indexed_at = datetime.now(UTC)
+ logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+
+
+class ComposioGoogleCalendarConnector(ComposioConnector):
+ """
+ Google Calendar specific Composio connector.
+
+ Provides methods for listing calendar events and formatting them from
+ Google Calendar via Composio.
+ """
+
+ async def list_calendar_events(
+ self,
+ time_min: str | None = None,
+ time_max: str | None = None,
+ max_results: int = 250,
+ ) -> tuple[list[dict[str, Any]], str | None]:
+ """
+ List Google Calendar events via Composio.
+
+ Args:
+ time_min: Start time (RFC3339 format).
+ time_max: End time (RFC3339 format).
+ max_results: Maximum number of events.
+
+ Returns:
+ Tuple of (events list, error message).
+ """
+ connected_account_id = await self.get_connected_account_id()
+ if not connected_account_id:
+ return [], "No connected account ID found"
+
+ entity_id = await self.get_entity_id()
+ service = await self._get_service()
+ return await service.get_calendar_events(
+ connected_account_id=connected_account_id,
+ entity_id=entity_id,
+ time_min=time_min,
+ time_max=time_max,
+ max_results=max_results,
+ )
+
+ def format_calendar_event_to_markdown(self, event: dict[str, Any]) -> str:
+ """
+ Format a Google Calendar event to markdown.
+
+ Args:
+ event: Event object from Google Calendar API.
+
+ Returns:
+ Formatted markdown string.
+ """
+ try:
+ # Extract basic event information
+ summary = event.get("summary", "No Title")
+ description = event.get("description", "")
+ location = event.get("location", "")
+
+ # Extract start and end times
+ start = event.get("start", {})
+ end = event.get("end", {})
+
+ start_time = start.get("dateTime") or start.get("date", "")
+ end_time = end.get("dateTime") or end.get("date", "")
+
+ # Format times for display
+ def format_time(time_str: str) -> str:
+ if not time_str:
+ return "Unknown"
+ try:
+ if "T" in time_str:
+ dt = datetime.fromisoformat(time_str.replace("Z", "+00:00"))
+ return dt.strftime("%Y-%m-%d %H:%M")
+ return time_str
+ except Exception:
+ return time_str
+
+ start_formatted = format_time(start_time)
+ end_formatted = format_time(end_time)
+
+ # Extract attendees
+ attendees = event.get("attendees", [])
+ attendee_list = []
+ for attendee in attendees:
+ email = attendee.get("email", "")
+ display_name = attendee.get("displayName", email)
+ response_status = attendee.get("responseStatus", "")
+ attendee_list.append(f"- {display_name} ({response_status})")
+
+ # Build markdown content
+ markdown_content = f"# {summary}\n\n"
+ markdown_content += f"**Start:** {start_formatted}\n"
+ markdown_content += f"**End:** {end_formatted}\n"
+
+ if location:
+ markdown_content += f"**Location:** {location}\n"
+
+ markdown_content += "\n"
+
+ if description:
+ markdown_content += f"## Description\n\n{description}\n\n"
+
+ if attendee_list:
+ markdown_content += "## Attendees\n\n"
+ markdown_content += "\n".join(attendee_list)
+ markdown_content += "\n\n"
+
+ # Add event metadata
+ markdown_content += "## Event Details\n\n"
+ markdown_content += f"- **Event ID:** {event.get('id', 'Unknown')}\n"
+ markdown_content += f"- **Created:** {event.get('created', 'Unknown')}\n"
+ markdown_content += f"- **Updated:** {event.get('updated', 'Unknown')}\n"
+
+ return markdown_content
+
+ except Exception as e:
+ return f"Error formatting event to markdown: {e!s}"
+
+
+# ============ Indexer Functions ============
+
+
+async def index_composio_google_calendar(
+ session: AsyncSession,
+ connector,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ start_date: str | None,
+ end_date: str | None,
+ task_logger: TaskLoggingService,
+ log_entry,
+ update_last_indexed: bool = True,
+ max_items: int = 2500,
+) -> tuple[int, str]:
+ """Index Google Calendar events via Composio."""
+ try:
+ composio_connector = ComposioGoogleCalendarConnector(session, connector_id)
+
+ await task_logger.log_task_progress(
+ log_entry,
+ f"Fetching Google Calendar events via Composio for connector {connector_id}",
+ {"stage": "fetching_events"},
+ )
+
+ # Normalize date values - handle "undefined" strings from frontend
+ if start_date == "undefined" or start_date == "":
+ start_date = None
+ if end_date == "undefined" or end_date == "":
+ end_date = None
+
+ # Use provided dates directly if both are provided, otherwise calculate from last_indexed_at
+ # This ensures user-selected dates are respected (matching non-Composio Calendar connector behavior)
+ if start_date is not None and end_date is not None:
+ # User provided both dates - use them directly
+ start_date_str = start_date
+ end_date_str = end_date
+ else:
+ # Calculate date range with defaults (uses last_indexed_at or 365 days back)
+ # This ensures indexing works even when user doesn't specify dates
+ start_date_str, end_date_str = calculate_date_range(
+ connector, start_date, end_date, default_days_back=365
+ )
+
+ # Build time range for API call
+ time_min = f"{start_date_str}T00:00:00Z"
+ time_max = f"{end_date_str}T23:59:59Z"
+
+ logger.info(
+ f"Google Calendar query for connector {connector_id}: "
+ f"(start_date={start_date_str}, end_date={end_date_str})"
+ )
+
+ events, error = await composio_connector.list_calendar_events(
+ time_min=time_min,
+ time_max=time_max,
+ max_results=max_items,
+ )
+
+ if error:
+ await task_logger.log_task_failure(
+ log_entry, f"Failed to fetch Calendar events: {error}", {}
+ )
+ return 0, f"Failed to fetch Calendar events: {error}"
+
+ if not events:
+ success_msg = "No Google Calendar events found in the specified date range"
+ await task_logger.log_task_success(
+ log_entry, success_msg, {"events_count": 0}
+ )
+ # CRITICAL: Update timestamp even when no events found so Electric SQL syncs and UI shows indexed status
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+ await session.commit()
+ return (
+ 0,
+ None,
+ ) # Return None (not error) when no items found - this is success with 0 items
+
+ logger.info(f"Found {len(events)} Google Calendar events to index via Composio")
+
+ documents_indexed = 0
+ documents_skipped = 0
+
+ for event in events:
+ try:
+ # Handle both standard Google API and potential Composio variations
+ event_id = event.get("id", "") or event.get("eventId", "")
+ summary = (
+ event.get("summary", "") or event.get("title", "") or "No Title"
+ )
+
+ if not event_id:
+ documents_skipped += 1
+ continue
+
+ # Format to markdown
+ markdown_content = composio_connector.format_calendar_event_to_markdown(
+ event
+ )
+
+ # Generate unique identifier
+ document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"])
+ unique_identifier_hash = generate_unique_identifier_hash(
+ document_type, f"calendar_{event_id}", search_space_id
+ )
+
+ content_hash = generate_content_hash(markdown_content, search_space_id)
+
+ existing_document = await check_document_by_unique_identifier(
+ session, unique_identifier_hash
+ )
+
+ # Extract event times
+ start = event.get("start", {})
+ end = event.get("end", {})
+ start_time = start.get("dateTime") or start.get("date", "")
+ end_time = end.get("dateTime") or end.get("date", "")
+ location = event.get("location", "")
+
+ if existing_document:
+ if existing_document.content_hash == content_hash:
+ documents_skipped += 1
+ continue
+
+ # Update existing
+ user_llm = await get_user_long_context_llm(
+ session, user_id, search_space_id
+ )
+
+ if user_llm:
+ document_metadata = {
+ "event_id": event_id,
+ "summary": summary,
+ "start_time": start_time,
+ "document_type": "Google Calendar Event (Composio)",
+ }
+ (
+ summary_content,
+ summary_embedding,
+ ) = await generate_document_summary(
+ markdown_content, user_llm, document_metadata
+ )
+ else:
+ summary_content = f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
+ if location:
+ summary_content += f"\nLocation: {location}"
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
+
+ chunks = await create_document_chunks(markdown_content)
+
+ existing_document.title = f"Calendar: {summary}"
+ existing_document.content = summary_content
+ existing_document.content_hash = content_hash
+ existing_document.embedding = summary_embedding
+ existing_document.document_metadata = {
+ "event_id": event_id,
+ "summary": summary,
+ "start_time": start_time,
+ "end_time": end_time,
+ "location": location,
+ "connector_id": connector_id,
+ "source": "composio",
+ }
+ existing_document.chunks = chunks
+ existing_document.updated_at = get_current_timestamp()
+
+ documents_indexed += 1
+
+ # Batch commit every 10 documents
+ if documents_indexed % 10 == 0:
+ logger.info(
+ f"Committing batch: {documents_indexed} Google Calendar events processed so far"
+ )
+ await session.commit()
+ continue
+
+ # Create new document
+ user_llm = await get_user_long_context_llm(
+ session, user_id, search_space_id
+ )
+
+ if user_llm:
+ document_metadata = {
+ "event_id": event_id,
+ "summary": summary,
+ "start_time": start_time,
+ "document_type": "Google Calendar Event (Composio)",
+ }
+ (
+ summary_content,
+ summary_embedding,
+ ) = await generate_document_summary(
+ markdown_content, user_llm, document_metadata
+ )
+ else:
+ summary_content = (
+ f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
+ )
+ if location:
+ summary_content += f"\nLocation: {location}"
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
+
+ chunks = await create_document_chunks(markdown_content)
+
+ document = Document(
+ search_space_id=search_space_id,
+ title=f"Calendar: {summary}",
+ document_type=DocumentType(
+ TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"]
+ ),
+ document_metadata={
+ "event_id": event_id,
+ "summary": summary,
+ "start_time": start_time,
+ "end_time": end_time,
+ "location": location,
+ "connector_id": connector_id,
+ "toolkit_id": "googlecalendar",
+ "source": "composio",
+ },
+ content=summary_content,
+ content_hash=content_hash,
+ unique_identifier_hash=unique_identifier_hash,
+ embedding=summary_embedding,
+ chunks=chunks,
+ updated_at=get_current_timestamp(),
+ )
+ session.add(document)
+ documents_indexed += 1
+
+ # Batch commit every 10 documents
+ if documents_indexed % 10 == 0:
+ logger.info(
+ f"Committing batch: {documents_indexed} Google Calendar events processed so far"
+ )
+ await session.commit()
+
+ except Exception as e:
+ logger.error(f"Error processing Calendar event: {e!s}", exc_info=True)
+ documents_skipped += 1
+ continue
+
+ # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+ # This ensures the UI shows "Last indexed" instead of "Never indexed"
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+
+ # Final commit to ensure all documents are persisted (safety net)
+ # This matches the pattern used in non-Composio Gmail indexer
+ logger.info(
+ f"Final commit: Total {documents_indexed} Google Calendar events processed"
+ )
+ await session.commit()
+ logger.info(
+ "Successfully committed all Composio Google Calendar document changes to database"
+ )
+
+ await task_logger.log_task_success(
+ log_entry,
+ f"Successfully completed Google Calendar indexing via Composio for connector {connector_id}",
+ {
+ "documents_indexed": documents_indexed,
+ "documents_skipped": documents_skipped,
+ },
+ )
+
+ return documents_indexed, None
+
+ except Exception as e:
+ logger.error(
+ f"Failed to index Google Calendar via Composio: {e!s}", exc_info=True
+ )
+ return 0, f"Failed to index Google Calendar via Composio: {e!s}"
+
diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py
new file mode 100644
index 000000000..e19436611
--- /dev/null
+++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py
@@ -0,0 +1,1162 @@
+"""
+Composio Google Drive Connector Module.
+
+Provides Google Drive specific methods for data retrieval and indexing via Composio.
+"""
+
+import logging
+import os
+import tempfile
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm.attributes import flag_modified
+
+from app.config import config
+from app.connectors.composio_connector import ComposioConnector
+from app.db import Document, DocumentType, Log
+from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
+from app.services.llm_service import get_user_long_context_llm
+from app.services.task_logging_service import TaskLoggingService
+from app.utils.document_converters import (
+ create_document_chunks,
+ generate_content_hash,
+ generate_document_summary,
+ generate_unique_identifier_hash,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# Binary file extensions that need file processor
+BINARY_FILE_EXTENSIONS = {
+ ".pdf",
+ ".doc",
+ ".docx",
+ ".xls",
+ ".xlsx",
+ ".ppt",
+ ".pptx",
+ ".png",
+ ".jpg",
+ ".jpeg",
+ ".gif",
+ ".bmp",
+ ".tiff",
+ ".webp",
+ ".zip",
+ ".tar",
+ ".gz",
+ ".rar",
+ ".7z",
+ ".mp3",
+ ".mp4",
+ ".wav",
+ ".avi",
+ ".mov",
+ ".exe",
+ ".dll",
+ ".so",
+ ".bin",
+}
+
+# Text file extensions that can be decoded as UTF-8
+TEXT_FILE_EXTENSIONS = {
+ ".txt",
+ ".md",
+ ".markdown",
+ ".json",
+ ".xml",
+ ".html",
+ ".htm",
+ ".css",
+ ".js",
+ ".ts",
+ ".py",
+ ".java",
+ ".c",
+ ".cpp",
+ ".h",
+ ".yaml",
+ ".yml",
+ ".toml",
+ ".ini",
+ ".cfg",
+ ".conf",
+ ".sh",
+ ".bash",
+ ".zsh",
+ ".fish",
+ ".sql",
+ ".csv",
+ ".tsv",
+ ".rst",
+ ".tex",
+ ".log",
+}
+
+
+def get_current_timestamp() -> datetime:
+ """Get the current timestamp with timezone for updated_at field."""
+ return datetime.now(UTC)
+
+
+def _is_binary_file(file_name: str, mime_type: str) -> bool:
+ """Check if a file is binary based on extension or mime type."""
+ extension = Path(file_name).suffix.lower()
+
+ # Check extension first
+ if extension in BINARY_FILE_EXTENSIONS:
+ return True
+ if extension in TEXT_FILE_EXTENSIONS:
+ return False
+
+ # Check mime type
+ if mime_type:
+ if mime_type.startswith(("image/", "audio/", "video/", "application/pdf")):
+ return True
+ if mime_type.startswith(("text/", "application/json", "application/xml")):
+ return False
+ # Office documents
+ if (
+ "spreadsheet" in mime_type
+ or "document" in mime_type
+ or "presentation" in mime_type
+ ):
+ return True
+
+ # Default to text for unknown types
+ return False
+
+
+class ComposioGoogleDriveConnector(ComposioConnector):
+ """
+ Google Drive specific Composio connector.
+
+ Provides methods for listing files, downloading content, and tracking changes
+ from Google Drive via Composio.
+ """
+
+ async def list_drive_files(
+ self,
+ folder_id: str | None = None,
+ page_token: str | None = None,
+ page_size: int = 100,
+ ) -> tuple[list[dict[str, Any]], str | None, str | None]:
+ """
+ List files from Google Drive via Composio.
+
+ Args:
+ folder_id: Optional folder ID to list contents of.
+ page_token: Pagination token.
+ page_size: Number of files per page.
+
+ Returns:
+ Tuple of (files list, next_page_token, error message).
+ """
+ connected_account_id = await self.get_connected_account_id()
+ if not connected_account_id:
+ return [], None, "No connected account ID found"
+
+ entity_id = await self.get_entity_id()
+ service = await self._get_service()
+ return await service.get_drive_files(
+ connected_account_id=connected_account_id,
+ entity_id=entity_id,
+ folder_id=folder_id,
+ page_token=page_token,
+ page_size=page_size,
+ )
+
+ async def get_drive_file_content(
+ self, file_id: str
+ ) -> tuple[bytes | None, str | None]:
+ """
+ Download file content from Google Drive via Composio.
+
+ Args:
+ file_id: Google Drive file ID.
+
+ Returns:
+ Tuple of (file content bytes, error message).
+ """
+ connected_account_id = await self.get_connected_account_id()
+ if not connected_account_id:
+ return None, "No connected account ID found"
+
+ entity_id = await self.get_entity_id()
+ service = await self._get_service()
+ return await service.get_drive_file_content(
+ connected_account_id=connected_account_id,
+ entity_id=entity_id,
+ file_id=file_id,
+ )
+
+ async def get_drive_start_page_token(self) -> tuple[str | None, str | None]:
+ """
+ Get the starting page token for Google Drive change tracking.
+
+ Returns:
+ Tuple of (start_page_token, error message).
+ """
+ connected_account_id = await self.get_connected_account_id()
+ if not connected_account_id:
+ return None, "No connected account ID found"
+
+ entity_id = await self.get_entity_id()
+ service = await self._get_service()
+ return await service.get_drive_start_page_token(
+ connected_account_id=connected_account_id,
+ entity_id=entity_id,
+ )
+
+ async def list_drive_changes(
+ self,
+ page_token: str | None = None,
+ page_size: int = 100,
+ include_removed: bool = True,
+ ) -> tuple[list[dict[str, Any]], str | None, str | None]:
+ """
+ List changes in Google Drive since the given page token.
+
+ Args:
+ page_token: Page token from previous sync (optional).
+ page_size: Number of changes per page.
+ include_removed: Whether to include removed items.
+
+ Returns:
+ Tuple of (changes list, new_start_page_token, error message).
+ """
+ connected_account_id = await self.get_connected_account_id()
+ if not connected_account_id:
+ return [], None, "No connected account ID found"
+
+ entity_id = await self.get_entity_id()
+ service = await self._get_service()
+ return await service.list_drive_changes(
+ connected_account_id=connected_account_id,
+ entity_id=entity_id,
+ page_token=page_token,
+ page_size=page_size,
+ include_removed=include_removed,
+ )
+
+
+# ============ File Processing Utilities ============
+
+
+async def _process_file_content(
+ content: bytes | str,
+ file_name: str,
+ file_id: str,
+ mime_type: str,
+ search_space_id: int,
+ user_id: str,
+ session: AsyncSession,
+ task_logger: TaskLoggingService,
+ log_entry: Log,
+ processing_errors: list[str],
+) -> str:
+ """
+ Process file content and return markdown text.
+
+ For binary files (PDFs, images, etc.), uses Surfsense's ETL service.
+ For text files, decodes as UTF-8.
+
+ Args:
+ content: File content as bytes or string
+ file_name: Name of the file
+ file_id: Google Drive file ID
+ mime_type: MIME type of the file
+ search_space_id: Search space ID
+ user_id: User ID
+ session: Database session
+ task_logger: Task logging service
+ log_entry: Log entry for tracking
+ processing_errors: List to append errors to
+
+ Returns:
+ Markdown content string
+ """
+ # Ensure content is bytes
+ if isinstance(content, str):
+ content = content.encode("utf-8")
+
+ # Check if this is a binary file
+ if _is_binary_file(file_name, mime_type):
+ # Use ETL service for binary files (PDF, Office docs, etc.)
+ temp_file_path = None
+ try:
+ # Get file extension
+ extension = Path(file_name).suffix or ".bin"
+
+ # Write to temp file
+ with tempfile.NamedTemporaryFile(
+ delete=False, suffix=extension
+ ) as tmp_file:
+ tmp_file.write(content)
+ temp_file_path = tmp_file.name
+
+ # Use the configured ETL service to extract text
+ extracted_text = await _extract_text_with_etl(
+ temp_file_path, file_name, task_logger, log_entry
+ )
+
+ if extracted_text:
+ return extracted_text
+ else:
+ # Fallback if extraction fails
+ logger.warning(f"Could not extract text from binary file {file_name}")
+ return f"# {file_name}\n\n[Binary file - text extraction failed]\n\n**File ID:** {file_id}\n**Type:** {mime_type}\n"
+
+ except Exception as e:
+ error_msg = f"Error processing binary file {file_name}: {e!s}"
+ logger.error(error_msg)
+ processing_errors.append(error_msg)
+ return f"# {file_name}\n\n[Binary file - processing error]\n\n**File ID:** {file_id}\n**Type:** {mime_type}\n"
+ finally:
+ # Cleanup temp file
+ if temp_file_path and os.path.exists(temp_file_path):
+ try:
+ os.unlink(temp_file_path)
+ except Exception as e:
+ logger.debug(f"Could not delete temp file {temp_file_path}: {e}")
+ else:
+ # Text file - try to decode as UTF-8
+ try:
+ return content.decode("utf-8")
+ except UnicodeDecodeError:
+ # Try other encodings
+ for encoding in ["latin-1", "cp1252", "iso-8859-1"]:
+ try:
+ return content.decode(encoding)
+ except UnicodeDecodeError:
+ continue
+
+ # If all encodings fail, treat as binary
+ error_msg = f"Could not decode text file {file_name} with any encoding"
+ logger.warning(error_msg)
+ processing_errors.append(error_msg)
+ return f"# {file_name}\n\n[File content could not be decoded]\n\n**File ID:** {file_id}\n**Type:** {mime_type}\n"
+
+
+async def _extract_text_with_etl(
+ file_path: str,
+ file_name: str,
+ task_logger: TaskLoggingService,
+ log_entry: Log,
+) -> str | None:
+ """
+ Extract text from a file using the configured ETL service.
+
+ Args:
+ file_path: Path to the file
+ file_name: Name of the file
+ task_logger: Task logging service
+ log_entry: Log entry for tracking
+
+ Returns:
+ Extracted text as markdown, or None if extraction fails
+ """
+ import warnings
+ from logging import ERROR, getLogger
+
+ etl_service = config.ETL_SERVICE
+
+ try:
+ if etl_service == "UNSTRUCTURED":
+ from langchain_unstructured import UnstructuredLoader
+
+ from app.utils.document_converters import convert_document_to_markdown
+
+ loader = UnstructuredLoader(
+ file_path,
+ mode="elements",
+ post_processors=[],
+ languages=["eng"],
+ include_orig_elements=False,
+ include_metadata=False,
+ strategy="auto",
+ )
+
+ docs = await loader.aload()
+ if docs:
+ return await convert_document_to_markdown(docs)
+ return None
+
+ elif etl_service == "LLAMACLOUD":
+ from app.tasks.document_processors.file_processors import (
+ parse_with_llamacloud_retry,
+ )
+
+ # Estimate pages (rough estimate based on file size)
+ file_size = os.path.getsize(file_path)
+ estimated_pages = max(1, file_size // (80 * 1024))
+
+ result = await parse_with_llamacloud_retry(
+ file_path=file_path,
+ estimated_pages=estimated_pages,
+ task_logger=task_logger,
+ log_entry=log_entry,
+ )
+
+ markdown_documents = await result.aget_markdown_documents(
+ split_by_page=False
+ )
+ if markdown_documents:
+ return markdown_documents[0].text
+ return None
+
+ elif etl_service == "DOCLING":
+ from app.services.docling_service import create_docling_service
+
+ docling_service = create_docling_service()
+
+ # Suppress pdfminer warnings
+ pdfminer_logger = getLogger("pdfminer")
+ original_level = pdfminer_logger.level
+
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore", category=UserWarning, module="pdfminer"
+ )
+ warnings.filterwarnings(
+ "ignore", message=".*Cannot set gray non-stroke color.*"
+ )
+ warnings.filterwarnings("ignore", message=".*invalid float value.*")
+
+ pdfminer_logger.setLevel(ERROR)
+
+ try:
+ result = await docling_service.process_document(
+ file_path, file_name
+ )
+ finally:
+ pdfminer_logger.setLevel(original_level)
+
+ return result.get("content")
+ else:
+ logger.warning(f"Unknown ETL service: {etl_service}")
+ return None
+
+ except Exception as e:
+ logger.error(f"ETL extraction failed for {file_name}: {e!s}")
+ return None
+
+
+# ============ Indexer Functions ============
+
+
+async def check_document_by_unique_identifier(
+ session: AsyncSession, unique_identifier_hash: str
+) -> Document | None:
+ """Check if a document with the given unique identifier hash already exists."""
+ from sqlalchemy.orm import selectinload
+ from sqlalchemy.future import select
+
+ existing_doc_result = await session.execute(
+ select(Document)
+ .options(selectinload(Document.chunks))
+ .where(Document.unique_identifier_hash == unique_identifier_hash)
+ )
+ return existing_doc_result.scalars().first()
+
+
+async def update_connector_last_indexed(
+ session: AsyncSession,
+ connector,
+ update_last_indexed: bool = True,
+) -> None:
+ """Update the last_indexed_at timestamp for a connector."""
+ if update_last_indexed:
+ connector.last_indexed_at = datetime.now(
+ UTC
+ ) # Use UTC for timezone consistency
+ logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+
+
+async def index_composio_google_drive(
+ session: AsyncSession,
+ connector,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ task_logger: TaskLoggingService,
+ log_entry,
+ update_last_indexed: bool = True,
+ max_items: int = 1000,
+) -> tuple[int, str]:
+ """Index Google Drive files via Composio with delta sync support.
+
+ Delta Sync Flow:
+ 1. First sync: Full scan + get initial page token
+ 2. Subsequent syncs: Use LIST_CHANGES to process only changed files
+
+ Supports folder/file selection via connector config:
+ - selected_folders: List of {id, name} for folders to index
+ - selected_files: List of {id, name} for individual files to index
+ - indexing_options: {max_files_per_folder, incremental_sync, include_subfolders}
+ """
+ try:
+ composio_connector = ComposioGoogleDriveConnector(session, connector_id)
+ connector_config = await composio_connector.get_config()
+
+ # Get folder/file selection configuration
+ selected_folders = connector_config.get("selected_folders", [])
+ selected_files = connector_config.get("selected_files", [])
+ indexing_options = connector_config.get("indexing_options", {})
+
+ # Check for stored page token for delta sync
+ stored_page_token = connector_config.get("drive_page_token")
+ use_delta_sync = stored_page_token and connector.last_indexed_at
+
+ max_files_per_folder = indexing_options.get("max_files_per_folder", 100)
+ include_subfolders = indexing_options.get("include_subfolders", True)
+
+ # Route to delta sync or full scan
+ if use_delta_sync:
+ logger.info(f"Using delta sync for Composio Google Drive connector {connector_id}")
+ await task_logger.log_task_progress(
+ log_entry,
+ f"Starting delta sync for Google Drive via Composio (connector {connector_id})",
+ {"stage": "delta_sync", "token": stored_page_token[:20] + "..."},
+ )
+
+ documents_indexed, documents_skipped, processing_errors = await _index_composio_drive_delta_sync(
+ session=session,
+ composio_connector=composio_connector,
+ connector_id=connector_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ page_token=stored_page_token,
+ max_items=max_items,
+ task_logger=task_logger,
+ log_entry=log_entry,
+ )
+ else:
+ logger.info(f"Using full scan for Composio Google Drive connector {connector_id} (first sync or no token)")
+ await task_logger.log_task_progress(
+ log_entry,
+ f"Fetching Google Drive files via Composio for connector {connector_id}",
+ {
+ "stage": "full_scan",
+ "selected_folders": len(selected_folders),
+ "selected_files": len(selected_files),
+ },
+ )
+
+ documents_indexed, documents_skipped, processing_errors = await _index_composio_drive_full_scan(
+ session=session,
+ composio_connector=composio_connector,
+ connector_id=connector_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ selected_folders=selected_folders,
+ selected_files=selected_files,
+ max_files_per_folder=max_files_per_folder,
+ include_subfolders=include_subfolders,
+ max_items=max_items,
+ task_logger=task_logger,
+ log_entry=log_entry,
+ )
+
+ # Get new page token for next sync (always update after successful sync)
+ new_token, token_error = await composio_connector.get_drive_start_page_token()
+ if new_token and not token_error:
+ # Refresh connector to avoid stale state
+ await session.refresh(connector)
+
+ if not connector.config:
+ connector.config = {}
+ connector.config["drive_page_token"] = new_token
+ flag_modified(connector, "config")
+ logger.info(f"Updated drive_page_token for connector {connector_id}")
+ elif token_error:
+ logger.warning(f"Failed to get new page token: {token_error}")
+
+ # CRITICAL: Always update timestamp so Electric SQL syncs and UI shows indexed status
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+
+ # Final commit
+ logger.info(f"Final commit: Total {documents_indexed} Google Drive files processed")
+ await session.commit()
+ logger.info("Successfully committed all Composio Google Drive document changes to database")
+
+ # Handle processing errors
+ error_message = None
+ if processing_errors:
+ if len(processing_errors) == 1:
+ error_message = processing_errors[0]
+ else:
+ error_message = f"Failed to process {len(processing_errors)} file(s). First error: {processing_errors[0]}"
+ await task_logger.log_task_failure(
+ log_entry,
+ f"Completed Google Drive indexing with {len(processing_errors)} error(s) for connector {connector_id}",
+ {
+ "documents_indexed": documents_indexed,
+ "documents_skipped": documents_skipped,
+ "sync_type": "delta" if use_delta_sync else "full",
+ "errors": processing_errors,
+ },
+ )
+ else:
+ await task_logger.log_task_success(
+ log_entry,
+ f"Successfully completed Google Drive indexing via Composio for connector {connector_id}",
+ {
+ "documents_indexed": documents_indexed,
+ "documents_skipped": documents_skipped,
+ "sync_type": "delta" if use_delta_sync else "full",
+ },
+ )
+
+ return documents_indexed, error_message
+
+ except Exception as e:
+ logger.error(f"Failed to index Google Drive via Composio: {e!s}", exc_info=True)
+ return 0, f"Failed to index Google Drive via Composio: {e!s}"
+
+
+async def _index_composio_drive_delta_sync(
+ session: AsyncSession,
+ composio_connector: ComposioGoogleDriveConnector,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ page_token: str,
+ max_items: int,
+ task_logger: TaskLoggingService,
+ log_entry,
+) -> tuple[int, int, list[str]]:
+ """Index Google Drive files using delta sync (only changed files).
+
+ Uses GOOGLEDRIVE_LIST_CHANGES to fetch only files that changed since last sync.
+ Handles: new files, modified files, and deleted files.
+ """
+ documents_indexed = 0
+ documents_skipped = 0
+ processing_errors = []
+
+ # Fetch all changes with pagination
+ all_changes = []
+ current_token = page_token
+
+ while len(all_changes) < max_items:
+ changes, next_token, error = await composio_connector.list_drive_changes(
+ page_token=current_token,
+ page_size=100,
+ include_removed=True,
+ )
+
+ if error:
+ logger.error(f"Error fetching Drive changes: {error}")
+ processing_errors.append(f"Failed to fetch changes: {error}")
+ break
+
+ all_changes.extend(changes)
+
+ if not next_token or next_token == current_token:
+ break
+ current_token = next_token
+
+ if not all_changes:
+ logger.info("No changes detected since last sync")
+ return 0, 0, []
+
+ logger.info(f"Processing {len(all_changes)} changes from delta sync")
+
+ for change in all_changes[:max_items]:
+ try:
+ # Handle removed files
+ is_removed = change.get("removed", False)
+ file_info = change.get("file", {})
+ file_id = change.get("fileId") or file_info.get("id", "")
+
+ if not file_id:
+ documents_skipped += 1
+ continue
+
+ # Check if file was trashed or removed
+ if is_removed or file_info.get("trashed", False):
+ # Remove document from database
+ document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
+ unique_identifier_hash = generate_unique_identifier_hash(
+ document_type, f"drive_{file_id}", search_space_id
+ )
+ existing_document = await check_document_by_unique_identifier(
+ session, unique_identifier_hash
+ )
+ if existing_document:
+ await session.delete(existing_document)
+ documents_indexed += 1
+ logger.info(f"Deleted document for removed/trashed file: {file_id}")
+ continue
+
+ # Process changed file
+ file_name = file_info.get("name", "") or "Untitled"
+ mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
+
+ # Skip folders
+ if mime_type == "application/vnd.google-apps.folder":
+ continue
+
+ # Process the file
+ indexed, skipped, errors = await _process_single_drive_file(
+ session=session,
+ composio_connector=composio_connector,
+ file_id=file_id,
+ file_name=file_name,
+ mime_type=mime_type,
+ connector_id=connector_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ task_logger=task_logger,
+ log_entry=log_entry,
+ )
+
+ documents_indexed += indexed
+ documents_skipped += skipped
+ processing_errors.extend(errors)
+
+ # Batch commit every 10 documents
+ if documents_indexed > 0 and documents_indexed % 10 == 0:
+ await session.commit()
+ logger.info(f"Committed batch: {documents_indexed} changes processed")
+
+ except Exception as e:
+ error_msg = f"Error processing change for file {file_id}: {e!s}"
+ logger.error(error_msg, exc_info=True)
+ processing_errors.append(error_msg)
+ documents_skipped += 1
+
+ logger.info(f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped")
+ return documents_indexed, documents_skipped, processing_errors
+
+
+async def _index_composio_drive_full_scan(
+ session: AsyncSession,
+ composio_connector: ComposioGoogleDriveConnector,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ selected_folders: list[dict],
+ selected_files: list[dict],
+ max_files_per_folder: int,
+ include_subfolders: bool,
+ max_items: int,
+ task_logger: TaskLoggingService,
+ log_entry,
+) -> tuple[int, int, list[str]]:
+ """Index Google Drive files using full scan (first sync or when no delta token)."""
+ documents_indexed = 0
+ documents_skipped = 0
+ processing_errors = []
+
+ all_files = []
+
+ # If specific folders/files are selected, fetch from those
+ if selected_folders or selected_files:
+ # Fetch files from selected folders
+ for folder in selected_folders:
+ folder_id = folder.get("id")
+ folder_name = folder.get("name", "Unknown")
+
+ if not folder_id:
+ continue
+
+ # Handle special case for "root" folder
+ actual_folder_id = None if folder_id == "root" else folder_id
+
+ logger.info(f"Fetching files from folder: {folder_name} ({folder_id})")
+
+ # Fetch files from this folder
+ folder_files = []
+ page_token = None
+
+ while len(folder_files) < max_files_per_folder:
+ (
+ files,
+ next_token,
+ error,
+ ) = await composio_connector.list_drive_files(
+ folder_id=actual_folder_id,
+ page_token=page_token,
+ page_size=min(100, max_files_per_folder - len(folder_files)),
+ )
+
+ if error:
+ logger.warning(
+ f"Failed to fetch files from folder {folder_name}: {error}"
+ )
+ break
+
+ # Process files
+ for file_info in files:
+ mime_type = file_info.get("mimeType", "") or file_info.get(
+ "mime_type", ""
+ )
+
+ # If it's a folder and include_subfolders is enabled, recursively fetch
+ if mime_type == "application/vnd.google-apps.folder":
+ if include_subfolders:
+ # Add subfolder files recursively
+ subfolder_files = await _fetch_folder_files_recursively(
+ composio_connector,
+ file_info.get("id"),
+ max_files=max_files_per_folder,
+ current_count=len(folder_files),
+ )
+ folder_files.extend(subfolder_files)
+ else:
+ folder_files.append(file_info)
+
+ if not next_token:
+ break
+ page_token = next_token
+
+ all_files.extend(folder_files[:max_files_per_folder])
+ logger.info(f"Found {len(folder_files)} files in folder {folder_name}")
+
+ # Add specifically selected files
+ for selected_file in selected_files:
+ file_id = selected_file.get("id")
+ file_name = selected_file.get("name", "Unknown")
+
+ if not file_id:
+ continue
+
+ # Add file info (we'll fetch content later during indexing)
+ all_files.append(
+ {
+ "id": file_id,
+ "name": file_name,
+ "mimeType": "", # Will be determined later
+ }
+ )
+ else:
+ # No selection specified - fetch all files (original behavior)
+ page_token = None
+
+ while len(all_files) < max_items:
+ files, next_token, error = await composio_connector.list_drive_files(
+ page_token=page_token,
+ page_size=min(100, max_items - len(all_files)),
+ )
+
+ if error:
+ return 0, 0, [f"Failed to fetch Drive files: {error}"]
+
+ all_files.extend(files)
+
+ if not next_token:
+ break
+ page_token = next_token
+
+ if not all_files:
+ logger.info("No Google Drive files found")
+ return 0, 0, []
+
+ logger.info(f"Found {len(all_files)} Google Drive files to index via Composio (full scan)")
+
+ for file_info in all_files:
+ try:
+ # Handle both standard Google API and potential Composio variations
+ file_id = file_info.get("id", "") or file_info.get("fileId", "")
+ file_name = (
+ file_info.get("name", "")
+ or file_info.get("fileName", "")
+ or "Untitled"
+ )
+ mime_type = file_info.get("mimeType", "") or file_info.get(
+ "mime_type", ""
+ )
+
+ if not file_id:
+ documents_skipped += 1
+ continue
+
+ # Skip folders
+ if mime_type == "application/vnd.google-apps.folder":
+ continue
+
+ # Process the file
+ indexed, skipped, errors = await _process_single_drive_file(
+ session=session,
+ composio_connector=composio_connector,
+ file_id=file_id,
+ file_name=file_name,
+ mime_type=mime_type,
+ connector_id=connector_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ task_logger=task_logger,
+ log_entry=log_entry,
+ )
+
+ documents_indexed += indexed
+ documents_skipped += skipped
+ processing_errors.extend(errors)
+
+ # Batch commit every 10 documents
+ if documents_indexed > 0 and documents_indexed % 10 == 0:
+ logger.info(f"Committing batch: {documents_indexed} Google Drive files processed so far")
+ await session.commit()
+
+ except Exception as e:
+ error_msg = f"Error processing Drive file {file_name or 'unknown'}: {e!s}"
+ logger.error(error_msg, exc_info=True)
+ processing_errors.append(error_msg)
+ documents_skipped += 1
+
+ logger.info(f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped")
+ return documents_indexed, documents_skipped, processing_errors
+
+
+async def _process_single_drive_file(
+ session: AsyncSession,
+ composio_connector: ComposioGoogleDriveConnector,
+ file_id: str,
+ file_name: str,
+ mime_type: str,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ task_logger: TaskLoggingService,
+ log_entry,
+) -> tuple[int, int, list[str]]:
+ """Process a single Google Drive file for indexing.
+
+ Returns:
+ Tuple of (documents_indexed, documents_skipped, processing_errors)
+ """
+ processing_errors = []
+
+ # Generate unique identifier hash
+ document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
+ unique_identifier_hash = generate_unique_identifier_hash(
+ document_type, f"drive_{file_id}", search_space_id
+ )
+
+ # Check if document exists
+ existing_document = await check_document_by_unique_identifier(
+ session, unique_identifier_hash
+ )
+
+ # Get file content
+ content, content_error = await composio_connector.get_drive_file_content(file_id)
+
+ if content_error or not content:
+ logger.warning(
+ f"Could not get content for file {file_name}: {content_error}"
+ )
+ # Use metadata as content fallback
+ markdown_content = f"# {file_name}\n\n"
+ markdown_content += f"**File ID:** {file_id}\n"
+ markdown_content += f"**Type:** {mime_type}\n"
+ elif isinstance(content, dict):
+ # Safety check: if content is still a dict, log error and use fallback
+ error_msg = f"Unexpected dict content format for file {file_name}: {list(content.keys())}"
+ logger.error(error_msg)
+ processing_errors.append(error_msg)
+ markdown_content = f"# {file_name}\n\n"
+ markdown_content += f"**File ID:** {file_id}\n"
+ markdown_content += f"**Type:** {mime_type}\n"
+ else:
+ # Process content based on file type
+ markdown_content = await _process_file_content(
+ content=content,
+ file_name=file_name,
+ file_id=file_id,
+ mime_type=mime_type,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ session=session,
+ task_logger=task_logger,
+ log_entry=log_entry,
+ processing_errors=processing_errors,
+ )
+
+ content_hash = generate_content_hash(markdown_content, search_space_id)
+
+ if existing_document:
+ if existing_document.content_hash == content_hash:
+ return 0, 1, processing_errors # Skipped
+
+ # Update existing document
+ user_llm = await get_user_long_context_llm(
+ session, user_id, search_space_id
+ )
+
+ if user_llm:
+ document_metadata = {
+ "file_id": file_id,
+ "file_name": file_name,
+ "mime_type": mime_type,
+ "document_type": "Google Drive File (Composio)",
+ }
+ (
+ summary_content,
+ summary_embedding,
+ ) = await generate_document_summary(
+ markdown_content, user_llm, document_metadata
+ )
+ else:
+ summary_content = (
+ f"Google Drive File: {file_name}\n\nType: {mime_type}"
+ )
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
+
+ chunks = await create_document_chunks(markdown_content)
+
+ existing_document.title = f"Drive: {file_name}"
+ existing_document.content = summary_content
+ existing_document.content_hash = content_hash
+ existing_document.embedding = summary_embedding
+ existing_document.document_metadata = {
+ "file_id": file_id,
+ "file_name": file_name,
+ "FILE_NAME": file_name, # For compatibility
+ "mime_type": mime_type,
+ "connector_id": connector_id,
+ "source": "composio",
+ }
+ existing_document.chunks = chunks
+ existing_document.updated_at = get_current_timestamp()
+
+ return 1, 0, processing_errors # Indexed
+
+ # Create new document
+ user_llm = await get_user_long_context_llm(
+ session, user_id, search_space_id
+ )
+
+ if user_llm:
+ document_metadata = {
+ "file_id": file_id,
+ "file_name": file_name,
+ "mime_type": mime_type,
+ "document_type": "Google Drive File (Composio)",
+ }
+ (
+ summary_content,
+ summary_embedding,
+ ) = await generate_document_summary(
+ markdown_content, user_llm, document_metadata
+ )
+ else:
+ summary_content = (
+ f"Google Drive File: {file_name}\n\nType: {mime_type}"
+ )
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
+
+ chunks = await create_document_chunks(markdown_content)
+
+ document = Document(
+ search_space_id=search_space_id,
+ title=f"Drive: {file_name}",
+ document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
+ document_metadata={
+ "file_id": file_id,
+ "file_name": file_name,
+ "FILE_NAME": file_name, # For compatibility
+ "mime_type": mime_type,
+ "connector_id": connector_id,
+ "toolkit_id": "googledrive",
+ "source": "composio",
+ },
+ content=summary_content,
+ content_hash=content_hash,
+ unique_identifier_hash=unique_identifier_hash,
+ embedding=summary_embedding,
+ chunks=chunks,
+ updated_at=get_current_timestamp(),
+ )
+ session.add(document)
+
+ return 1, 0, processing_errors # Indexed
+
+
+async def _fetch_folder_files_recursively(
+ composio_connector: ComposioGoogleDriveConnector,
+ folder_id: str,
+ max_files: int = 100,
+ current_count: int = 0,
+ depth: int = 0,
+ max_depth: int = 10,
+) -> list[dict[str, Any]]:
+ """
+ Recursively fetch files from a Google Drive folder via Composio.
+
+ Args:
+ composio_connector: The Composio connector instance
+ folder_id: Google Drive folder ID
+ max_files: Maximum number of files to fetch
+ current_count: Current number of files already fetched
+ depth: Current recursion depth
+ max_depth: Maximum recursion depth to prevent infinite loops
+
+ Returns:
+ List of file info dictionaries
+ """
+ if depth >= max_depth:
+ logger.warning(f"Max recursion depth reached for folder {folder_id}")
+ return []
+
+ if current_count >= max_files:
+ return []
+
+ all_files = []
+ page_token = None
+
+ try:
+ while len(all_files) + current_count < max_files:
+ files, next_token, error = await composio_connector.list_drive_files(
+ folder_id=folder_id,
+ page_token=page_token,
+ page_size=min(100, max_files - len(all_files) - current_count),
+ )
+
+ if error:
+ logger.warning(
+ f"Error fetching files from subfolder {folder_id}: {error}"
+ )
+ break
+
+ for file_info in files:
+ mime_type = file_info.get("mimeType", "") or file_info.get(
+ "mime_type", ""
+ )
+
+ if mime_type == "application/vnd.google-apps.folder":
+ # Recursively fetch from subfolders
+ subfolder_files = await _fetch_folder_files_recursively(
+ composio_connector,
+ file_info.get("id"),
+ max_files=max_files,
+ current_count=current_count + len(all_files),
+ depth=depth + 1,
+ max_depth=max_depth,
+ )
+ all_files.extend(subfolder_files)
+ else:
+ all_files.append(file_info)
+
+ if len(all_files) + current_count >= max_files:
+ break
+
+ if not next_token:
+ break
+ page_token = next_token
+
+ return all_files[: max_files - current_count]
+
+ except Exception as e:
+ logger.error(f"Error in recursive folder fetch: {e!s}")
+ return all_files
+
diff --git a/surfsense_backend/app/routes/composio_routes.py b/surfsense_backend/app/routes/composio_routes.py
index 5af332760..5ad2266b7 100644
--- a/surfsense_backend/app/routes/composio_routes.py
+++ b/surfsense_backend/app/routes/composio_routes.py
@@ -46,6 +46,13 @@ logger = logging.getLogger(__name__)
router = APIRouter()
+# Map toolkit_id to frontend connector ID
+TOOLKIT_TO_FRONTEND_CONNECTOR_ID = {
+ "googledrive": "composio-googledrive",
+ "gmail": "composio-gmail",
+ "googlecalendar": "composio-googlecalendar",
+}
+
# Initialize security utilities
_state_manager = None
@@ -327,8 +334,12 @@ async def composio_callback(
await session.commit()
await session.refresh(existing_connector)
+ # Get the frontend connector ID based on toolkit_id
+ frontend_connector_id = TOOLKIT_TO_FRONTEND_CONNECTOR_ID.get(
+ toolkit_id, "composio-connector"
+ )
return RedirectResponse(
- url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&success=true&connector=composio-connector&connectorId={existing_connector.id}"
+ url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&success=true&connector={frontend_connector_id}&connectorId={existing_connector.id}"
)
try:
@@ -358,8 +369,12 @@ async def composio_callback(
f"Successfully created Composio connector {db_connector.id} for user {user_id}, toolkit {toolkit_id}"
)
+ # Get the frontend connector ID based on toolkit_id
+ frontend_connector_id = TOOLKIT_TO_FRONTEND_CONNECTOR_ID.get(
+ toolkit_id, "composio-connector"
+ )
return RedirectResponse(
- url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&success=true&connector=composio-connector&connectorId={db_connector.id}"
+ url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&success=true&connector={frontend_connector_id}&connectorId={db_connector.id}"
)
except IntegrityError as e:
diff --git a/surfsense_backend/app/services/composio_service.py b/surfsense_backend/app/services/composio_service.py
index 3810f03a4..3ea2d1bf2 100644
--- a/surfsense_backend/app/services/composio_service.py
+++ b/surfsense_backend/app/services/composio_service.py
@@ -53,6 +53,27 @@ TOOLKIT_TO_DOCUMENT_TYPE = {
"googlecalendar": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
}
+# Mapping of toolkit IDs to their indexer functions
+# Format: toolkit_id -> (module_path, function_name, supports_date_filter)
+# supports_date_filter: True if the indexer accepts start_date/end_date params
+TOOLKIT_TO_INDEXER = {
+ "googledrive": (
+ "app.connectors.composio_google_drive_connector",
+ "index_composio_google_drive",
+ False, # Google Drive doesn't use date filtering
+ ),
+ "gmail": (
+ "app.connectors.composio_gmail_connector",
+ "index_composio_gmail",
+ True, # Gmail uses date filtering
+ ),
+ "googlecalendar": (
+ "app.connectors.composio_google_calendar_connector",
+ "index_composio_google_calendar",
+ True, # Calendar uses date filtering
+ ),
+}
+
class ComposioService:
"""Service for interacting with Composio API."""
diff --git a/surfsense_backend/app/tasks/composio_indexer.py b/surfsense_backend/app/tasks/composio_indexer.py
index 3eed8470e..f97652114 100644
--- a/surfsense_backend/app/tasks/composio_indexer.py
+++ b/surfsense_backend/app/tasks/composio_indexer.py
@@ -2,65 +2,39 @@
Composio connector indexer.
Routes indexing requests to toolkit-specific handlers (Google Drive, Gmail, Calendar).
+Uses a registry pattern for clean, extensible connector routing.
Note: This module is intentionally placed in app/tasks/ (not in connector_indexers/)
to avoid circular import issues with the connector_indexers package.
"""
import logging
-import os
-import tempfile
-from datetime import UTC, datetime
-from pathlib import Path
-from typing import Any
+from importlib import import_module
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
-from sqlalchemy.orm import selectinload
-from app.config import config
-from app.connectors.composio_connector import ComposioConnector
from app.db import (
- Document,
- DocumentType,
- Log,
SearchSourceConnector,
SearchSourceConnectorType,
)
-from app.services.composio_service import INDEXABLE_TOOLKITS, TOOLKIT_TO_DOCUMENT_TYPE
-from app.services.llm_service import get_user_long_context_llm
+from app.services.composio_service import INDEXABLE_TOOLKITS, TOOLKIT_TO_INDEXER
from app.services.task_logging_service import TaskLoggingService
-from app.tasks.connector_indexers.base import calculate_date_range
-from app.utils.document_converters import (
- create_document_chunks,
- generate_content_hash,
- generate_document_summary,
- generate_unique_identifier_hash,
-)
# Set up logging
logger = logging.getLogger(__name__)
-# ============ Utility functions (copied from connector_indexers.base to avoid circular imports) ============
+# Valid Composio connector types
+COMPOSIO_CONNECTOR_TYPES = {
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
+ SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
+}
-def get_current_timestamp() -> datetime:
- """Get the current timestamp with timezone for updated_at field."""
- return datetime.now(UTC)
-
-
-async def check_document_by_unique_identifier(
- session: AsyncSession, unique_identifier_hash: str
-) -> Document | None:
- """Check if a document with the given unique identifier hash already exists."""
- existing_doc_result = await session.execute(
- select(Document)
- .options(selectinload(Document.chunks))
- .where(Document.unique_identifier_hash == unique_identifier_hash)
- )
- return existing_doc_result.scalars().first()
+# ============ Utility functions ============
async def get_connector_by_id(
@@ -78,312 +52,26 @@ async def get_connector_by_id(
return result.scalars().first()
-async def update_connector_last_indexed(
- session: AsyncSession,
- connector: SearchSourceConnector,
- update_last_indexed: bool = True,
-) -> None:
- """Update the last_indexed_at timestamp for a connector."""
- if update_last_indexed:
- connector.last_indexed_at = datetime.now(
- UTC
- ) # Use UTC for timezone consistency
- logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
-
-
-# Binary file extensions that need file processor
-BINARY_FILE_EXTENSIONS = {
- ".pdf",
- ".doc",
- ".docx",
- ".xls",
- ".xlsx",
- ".ppt",
- ".pptx",
- ".png",
- ".jpg",
- ".jpeg",
- ".gif",
- ".bmp",
- ".tiff",
- ".webp",
- ".zip",
- ".tar",
- ".gz",
- ".rar",
- ".7z",
- ".mp3",
- ".mp4",
- ".wav",
- ".avi",
- ".mov",
- ".exe",
- ".dll",
- ".so",
- ".bin",
-}
-
-# Text file extensions that can be decoded as UTF-8
-TEXT_FILE_EXTENSIONS = {
- ".txt",
- ".md",
- ".markdown",
- ".json",
- ".xml",
- ".html",
- ".htm",
- ".css",
- ".js",
- ".ts",
- ".py",
- ".java",
- ".c",
- ".cpp",
- ".h",
- ".yaml",
- ".yml",
- ".toml",
- ".ini",
- ".cfg",
- ".conf",
- ".sh",
- ".bash",
- ".zsh",
- ".fish",
- ".sql",
- ".csv",
- ".tsv",
- ".rst",
- ".tex",
- ".log",
-}
-
-
-def _is_binary_file(file_name: str, mime_type: str) -> bool:
- """Check if a file is binary based on extension or mime type."""
- extension = Path(file_name).suffix.lower()
-
- # Check extension first
- if extension in BINARY_FILE_EXTENSIONS:
- return True
- if extension in TEXT_FILE_EXTENSIONS:
- return False
-
- # Check mime type
- if mime_type:
- if mime_type.startswith(("image/", "audio/", "video/", "application/pdf")):
- return True
- if mime_type.startswith(("text/", "application/json", "application/xml")):
- return False
- # Office documents
- if (
- "spreadsheet" in mime_type
- or "document" in mime_type
- or "presentation" in mime_type
- ):
- return True
-
- # Default to text for unknown types
- return False
-
-
-async def _process_file_content(
- content: bytes | str,
- file_name: str,
- file_id: str,
- mime_type: str,
- search_space_id: int,
- user_id: str,
- session: AsyncSession,
- task_logger: TaskLoggingService,
- log_entry: Log,
- processing_errors: list[str],
-) -> str:
+def get_indexer_function(toolkit_id: str):
"""
- Process file content and return markdown text.
-
- For binary files (PDFs, images, etc.), uses Surfsense's ETL service.
- For text files, decodes as UTF-8.
+ Dynamically import and return the indexer function for a toolkit.
Args:
- content: File content as bytes or string
- file_name: Name of the file
- file_id: Google Drive file ID
- mime_type: MIME type of the file
- search_space_id: Search space ID
- user_id: User ID
- session: Database session
- task_logger: Task logging service
- log_entry: Log entry for tracking
- processing_errors: List to append errors to
+ toolkit_id: The toolkit ID (e.g., "googledrive", "gmail")
Returns:
- Markdown content string
+ Tuple of (indexer_function, supports_date_filter)
+
+ Raises:
+ ValueError: If toolkit not found in registry
"""
- # Ensure content is bytes
- if isinstance(content, str):
- content = content.encode("utf-8")
+ if toolkit_id not in TOOLKIT_TO_INDEXER:
+ raise ValueError(f"No indexer registered for toolkit: {toolkit_id}")
- # Check if this is a binary file
- if _is_binary_file(file_name, mime_type):
- # Use ETL service for binary files (PDF, Office docs, etc.)
- temp_file_path = None
- try:
- # Get file extension
- extension = Path(file_name).suffix or ".bin"
-
- # Write to temp file
- with tempfile.NamedTemporaryFile(
- delete=False, suffix=extension
- ) as tmp_file:
- tmp_file.write(content)
- temp_file_path = tmp_file.name
-
- # Use the configured ETL service to extract text
- extracted_text = await _extract_text_with_etl(
- temp_file_path, file_name, task_logger, log_entry
- )
-
- if extracted_text:
- return extracted_text
- else:
- # Fallback if extraction fails
- logger.warning(f"Could not extract text from binary file {file_name}")
- return f"# {file_name}\n\n[Binary file - text extraction failed]\n\n**File ID:** {file_id}\n**Type:** {mime_type}\n"
-
- except Exception as e:
- error_msg = f"Error processing binary file {file_name}: {e!s}"
- logger.error(error_msg)
- processing_errors.append(error_msg)
- return f"# {file_name}\n\n[Binary file - processing error]\n\n**File ID:** {file_id}\n**Type:** {mime_type}\n"
- finally:
- # Cleanup temp file
- if temp_file_path and os.path.exists(temp_file_path):
- try:
- os.unlink(temp_file_path)
- except Exception as e:
- logger.debug(f"Could not delete temp file {temp_file_path}: {e}")
- else:
- # Text file - try to decode as UTF-8
- try:
- return content.decode("utf-8")
- except UnicodeDecodeError:
- # Try other encodings
- for encoding in ["latin-1", "cp1252", "iso-8859-1"]:
- try:
- return content.decode(encoding)
- except UnicodeDecodeError:
- continue
-
- # If all encodings fail, treat as binary
- error_msg = f"Could not decode text file {file_name} with any encoding"
- logger.warning(error_msg)
- processing_errors.append(error_msg)
- return f"# {file_name}\n\n[File content could not be decoded]\n\n**File ID:** {file_id}\n**Type:** {mime_type}\n"
-
-
-async def _extract_text_with_etl(
- file_path: str,
- file_name: str,
- task_logger: TaskLoggingService,
- log_entry: Log,
-) -> str | None:
- """
- Extract text from a file using the configured ETL service.
-
- Args:
- file_path: Path to the file
- file_name: Name of the file
- task_logger: Task logging service
- log_entry: Log entry for tracking
-
- Returns:
- Extracted text as markdown, or None if extraction fails
- """
- import warnings
- from logging import ERROR, getLogger
-
- etl_service = config.ETL_SERVICE
-
- try:
- if etl_service == "UNSTRUCTURED":
- from langchain_unstructured import UnstructuredLoader
-
- from app.utils.document_converters import convert_document_to_markdown
-
- loader = UnstructuredLoader(
- file_path,
- mode="elements",
- post_processors=[],
- languages=["eng"],
- include_orig_elements=False,
- include_metadata=False,
- strategy="auto",
- )
-
- docs = await loader.aload()
- if docs:
- return await convert_document_to_markdown(docs)
- return None
-
- elif etl_service == "LLAMACLOUD":
- from app.tasks.document_processors.file_processors import (
- parse_with_llamacloud_retry,
- )
-
- # Estimate pages (rough estimate based on file size)
- file_size = os.path.getsize(file_path)
- estimated_pages = max(1, file_size // (80 * 1024))
-
- result = await parse_with_llamacloud_retry(
- file_path=file_path,
- estimated_pages=estimated_pages,
- task_logger=task_logger,
- log_entry=log_entry,
- )
-
- markdown_documents = await result.aget_markdown_documents(
- split_by_page=False
- )
- if markdown_documents:
- return markdown_documents[0].text
- return None
-
- elif etl_service == "DOCLING":
- from app.services.docling_service import create_docling_service
-
- docling_service = create_docling_service()
-
- # Suppress pdfminer warnings
- pdfminer_logger = getLogger("pdfminer")
- original_level = pdfminer_logger.level
-
- with warnings.catch_warnings():
- warnings.filterwarnings(
- "ignore", category=UserWarning, module="pdfminer"
- )
- warnings.filterwarnings(
- "ignore", message=".*Cannot set gray non-stroke color.*"
- )
- warnings.filterwarnings("ignore", message=".*invalid float value.*")
-
- pdfminer_logger.setLevel(ERROR)
-
- try:
- result = await docling_service.process_document(
- file_path, file_name
- )
- finally:
- pdfminer_logger.setLevel(original_level)
-
- return result.get("content")
- else:
- logger.warning(f"Unknown ETL service: {etl_service}")
- return None
-
- except Exception as e:
- logger.error(f"ETL extraction failed for {file_name}: {e!s}")
- return None
+ module_path, function_name, supports_date_filter = TOOLKIT_TO_INDEXER[toolkit_id]
+ module = import_module(module_path)
+ indexer_func = getattr(module, function_name)
+ return indexer_func, supports_date_filter
# ============ Main indexer function ============
@@ -403,6 +91,7 @@ async def index_composio_connector(
Index content from a Composio connector.
Routes to toolkit-specific indexing based on the connector's toolkit_id.
+ Uses a registry pattern for clean, extensible connector routing.
Args:
session: Database session
@@ -435,19 +124,10 @@ async def index_composio_connector(
try:
# Get connector by id - accept any Composio connector type
- # We'll check the actual type after loading
- connector = await get_connector_by_id(
- session,
- connector_id,
- None, # Don't filter by type, we'll validate after
- )
+ connector = await get_connector_by_id(session, connector_id, None)
# Validate it's a Composio connector
- if connector and connector.connector_type not in [
- SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
- SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
- SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
- ]:
+ if connector and connector.connector_type not in COMPOSIO_CONNECTOR_TYPES:
error_msg = f"Connector {connector_id} is not a Composio connector"
await task_logger.log_task_failure(
log_entry, error_msg, {"error_type": "InvalidConnectorType"}
@@ -480,53 +160,35 @@ async def index_composio_connector(
)
return 0, error_msg
- # Route to toolkit-specific indexer
- if toolkit_id == "googledrive":
- return await _index_composio_google_drive(
- session=session,
- connector=connector,
- connector_id=connector_id,
- search_space_id=search_space_id,
- user_id=user_id,
- task_logger=task_logger,
- log_entry=log_entry,
- update_last_indexed=update_last_indexed,
- max_items=max_items,
- )
- elif toolkit_id == "gmail":
- return await _index_composio_gmail(
- session=session,
- connector=connector,
- connector_id=connector_id,
- search_space_id=search_space_id,
- user_id=user_id,
- start_date=start_date,
- end_date=end_date,
- task_logger=task_logger,
- log_entry=log_entry,
- update_last_indexed=update_last_indexed,
- max_items=max_items,
- )
- elif toolkit_id == "googlecalendar":
- return await _index_composio_google_calendar(
- session=session,
- connector=connector,
- connector_id=connector_id,
- search_space_id=search_space_id,
- user_id=user_id,
- start_date=start_date,
- end_date=end_date,
- task_logger=task_logger,
- log_entry=log_entry,
- update_last_indexed=update_last_indexed,
- max_items=max_items,
- )
- else:
- error_msg = f"No indexer implemented for toolkit: {toolkit_id}"
+ # Get indexer function from registry
+ try:
+ indexer_func, supports_date_filter = get_indexer_function(toolkit_id)
+ except ValueError as e:
await task_logger.log_task_failure(
- log_entry, error_msg, {"error_type": "NoIndexerImplemented"}
+ log_entry, str(e), {"error_type": "NoIndexerImplemented"}
)
- return 0, error_msg
+ return 0, str(e)
+
+ # Build kwargs for the indexer function
+ kwargs = {
+ "session": session,
+ "connector": connector,
+ "connector_id": connector_id,
+ "search_space_id": search_space_id,
+ "user_id": user_id,
+ "task_logger": task_logger,
+ "log_entry": log_entry,
+ "update_last_indexed": update_last_indexed,
+ "max_items": max_items,
+ }
+
+ # Add date params for toolkits that support them
+ if supports_date_filter:
+ kwargs["start_date"] = start_date
+ kwargs["end_date"] = end_date
+
+ # Call the toolkit-specific indexer
+ return await indexer_func(**kwargs)
except SQLAlchemyError as db_error:
await session.rollback()
@@ -548,1378 +210,3 @@ async def index_composio_connector(
)
logger.error(f"Failed to index Composio connector: {e!s}", exc_info=True)
return 0, f"Failed to index Composio connector: {e!s}"
-
-
-async def _index_composio_google_drive(
- session: AsyncSession,
- connector,
- connector_id: int,
- search_space_id: int,
- user_id: str,
- task_logger: TaskLoggingService,
- log_entry,
- update_last_indexed: bool = True,
- max_items: int = 1000,
-) -> tuple[int, str]:
- """Index Google Drive files via Composio with delta sync support.
-
- Delta Sync Flow:
- 1. First sync: Full scan + get initial page token
- 2. Subsequent syncs: Use LIST_CHANGES to process only changed files
-
- Supports folder/file selection via connector config:
- - selected_folders: List of {id, name} for folders to index
- - selected_files: List of {id, name} for individual files to index
- - indexing_options: {max_files_per_folder, incremental_sync, include_subfolders}
- """
- try:
- composio_connector = ComposioConnector(session, connector_id)
- connector_config = await composio_connector.get_config()
-
- # Get folder/file selection configuration
- selected_folders = connector_config.get("selected_folders", [])
- selected_files = connector_config.get("selected_files", [])
- indexing_options = connector_config.get("indexing_options", {})
-
- # Check for stored page token for delta sync
- stored_page_token = connector_config.get("drive_page_token")
- use_delta_sync = stored_page_token and connector.last_indexed_at
-
- max_files_per_folder = indexing_options.get("max_files_per_folder", 100)
- include_subfolders = indexing_options.get("include_subfolders", True)
-
- # Route to delta sync or full scan
- if use_delta_sync:
- logger.info(f"Using delta sync for Composio Google Drive connector {connector_id}")
- await task_logger.log_task_progress(
- log_entry,
- f"Starting delta sync for Google Drive via Composio (connector {connector_id})",
- {"stage": "delta_sync", "token": stored_page_token[:20] + "..."},
- )
-
- documents_indexed, documents_skipped, processing_errors = await _index_composio_drive_delta_sync(
- session=session,
- composio_connector=composio_connector,
- connector_id=connector_id,
- search_space_id=search_space_id,
- user_id=user_id,
- page_token=stored_page_token,
- max_items=max_items,
- task_logger=task_logger,
- log_entry=log_entry,
- )
- else:
- logger.info(f"Using full scan for Composio Google Drive connector {connector_id} (first sync or no token)")
- await task_logger.log_task_progress(
- log_entry,
- f"Fetching Google Drive files via Composio for connector {connector_id}",
- {
- "stage": "full_scan",
- "selected_folders": len(selected_folders),
- "selected_files": len(selected_files),
- },
- )
-
- documents_indexed, documents_skipped, processing_errors = await _index_composio_drive_full_scan(
- session=session,
- composio_connector=composio_connector,
- connector_id=connector_id,
- search_space_id=search_space_id,
- user_id=user_id,
- selected_folders=selected_folders,
- selected_files=selected_files,
- max_files_per_folder=max_files_per_folder,
- include_subfolders=include_subfolders,
- max_items=max_items,
- task_logger=task_logger,
- log_entry=log_entry,
- )
-
- # Get new page token for next sync (always update after successful sync)
- new_token, token_error = await composio_connector.get_drive_start_page_token()
- if new_token and not token_error:
- from sqlalchemy.orm.attributes import flag_modified
-
- # Refresh connector to avoid stale state
- await session.refresh(connector)
-
- if not connector.config:
- connector.config = {}
- connector.config["drive_page_token"] = new_token
- flag_modified(connector, "config")
- logger.info(f"Updated drive_page_token for connector {connector_id}")
- elif token_error:
- logger.warning(f"Failed to get new page token: {token_error}")
-
- # CRITICAL: Always update timestamp so Electric SQL syncs and UI shows indexed status
- await update_connector_last_indexed(session, connector, update_last_indexed)
-
- # Final commit
- logger.info(f"Final commit: Total {documents_indexed} Google Drive files processed")
- await session.commit()
- logger.info("Successfully committed all Composio Google Drive document changes to database")
-
- # Handle processing errors
- error_message = None
- if processing_errors:
- if len(processing_errors) == 1:
- error_message = processing_errors[0]
- else:
- error_message = f"Failed to process {len(processing_errors)} file(s). First error: {processing_errors[0]}"
- await task_logger.log_task_failure(
- log_entry,
- f"Completed Google Drive indexing with {len(processing_errors)} error(s) for connector {connector_id}",
- {
- "documents_indexed": documents_indexed,
- "documents_skipped": documents_skipped,
- "sync_type": "delta" if use_delta_sync else "full",
- "errors": processing_errors,
- },
- )
- else:
- await task_logger.log_task_success(
- log_entry,
- f"Successfully completed Google Drive indexing via Composio for connector {connector_id}",
- {
- "documents_indexed": documents_indexed,
- "documents_skipped": documents_skipped,
- "sync_type": "delta" if use_delta_sync else "full",
- },
- )
-
- return documents_indexed, error_message
-
- except Exception as e:
- logger.error(f"Failed to index Google Drive via Composio: {e!s}", exc_info=True)
- return 0, f"Failed to index Google Drive via Composio: {e!s}"
-
-
-async def _index_composio_drive_delta_sync(
- session: AsyncSession,
- composio_connector: ComposioConnector,
- connector_id: int,
- search_space_id: int,
- user_id: str,
- page_token: str,
- max_items: int,
- task_logger: TaskLoggingService,
- log_entry,
-) -> tuple[int, int, list[str]]:
- """Index Google Drive files using delta sync (only changed files).
-
- Uses GOOGLEDRIVE_LIST_CHANGES to fetch only files that changed since last sync.
- Handles: new files, modified files, and deleted files.
- """
- documents_indexed = 0
- documents_skipped = 0
- processing_errors = []
-
- # Fetch all changes with pagination
- all_changes = []
- current_token = page_token
-
- while len(all_changes) < max_items:
- changes, next_token, error = await composio_connector.list_drive_changes(
- page_token=current_token,
- page_size=100,
- include_removed=True,
- )
-
- if error:
- logger.error(f"Error fetching Drive changes: {error}")
- processing_errors.append(f"Failed to fetch changes: {error}")
- break
-
- all_changes.extend(changes)
-
- if not next_token or next_token == current_token:
- break
- current_token = next_token
-
- if not all_changes:
- logger.info("No changes detected since last sync")
- return 0, 0, []
-
- logger.info(f"Processing {len(all_changes)} changes from delta sync")
-
- for change in all_changes[:max_items]:
- try:
- # Handle removed files
- is_removed = change.get("removed", False)
- file_info = change.get("file", {})
- file_id = change.get("fileId") or file_info.get("id", "")
-
- if not file_id:
- documents_skipped += 1
- continue
-
- # Check if file was trashed or removed
- if is_removed or file_info.get("trashed", False):
- # Remove document from database
- document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
- unique_identifier_hash = generate_unique_identifier_hash(
- document_type, f"drive_{file_id}", search_space_id
- )
- existing_document = await check_document_by_unique_identifier(
- session, unique_identifier_hash
- )
- if existing_document:
- await session.delete(existing_document)
- documents_indexed += 1
- logger.info(f"Deleted document for removed/trashed file: {file_id}")
- continue
-
- # Process changed file
- file_name = file_info.get("name", "") or "Untitled"
- mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
-
- # Skip folders
- if mime_type == "application/vnd.google-apps.folder":
- continue
-
- # Process the file
- indexed, skipped, errors = await _process_single_drive_file(
- session=session,
- composio_connector=composio_connector,
- file_id=file_id,
- file_name=file_name,
- mime_type=mime_type,
- connector_id=connector_id,
- search_space_id=search_space_id,
- user_id=user_id,
- task_logger=task_logger,
- log_entry=log_entry,
- )
-
- documents_indexed += indexed
- documents_skipped += skipped
- processing_errors.extend(errors)
-
- # Batch commit every 10 documents
- if documents_indexed > 0 and documents_indexed % 10 == 0:
- await session.commit()
- logger.info(f"Committed batch: {documents_indexed} changes processed")
-
- except Exception as e:
- error_msg = f"Error processing change for file {file_id}: {e!s}"
- logger.error(error_msg, exc_info=True)
- processing_errors.append(error_msg)
- documents_skipped += 1
-
- logger.info(f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped")
- return documents_indexed, documents_skipped, processing_errors
-
-
-async def _index_composio_drive_full_scan(
- session: AsyncSession,
- composio_connector: ComposioConnector,
- connector_id: int,
- search_space_id: int,
- user_id: str,
- selected_folders: list[dict],
- selected_files: list[dict],
- max_files_per_folder: int,
- include_subfolders: bool,
- max_items: int,
- task_logger: TaskLoggingService,
- log_entry,
-) -> tuple[int, int, list[str]]:
- """Index Google Drive files using full scan (first sync or when no delta token)."""
- documents_indexed = 0
- documents_skipped = 0
- processing_errors = []
-
- all_files = []
-
- # If specific folders/files are selected, fetch from those
- if selected_folders or selected_files:
- # Fetch files from selected folders
- for folder in selected_folders:
- folder_id = folder.get("id")
- folder_name = folder.get("name", "Unknown")
-
- if not folder_id:
- continue
-
- # Handle special case for "root" folder
- actual_folder_id = None if folder_id == "root" else folder_id
-
- logger.info(f"Fetching files from folder: {folder_name} ({folder_id})")
-
- # Fetch files from this folder
- folder_files = []
- page_token = None
-
- while len(folder_files) < max_files_per_folder:
- (
- files,
- next_token,
- error,
- ) = await composio_connector.list_drive_files(
- folder_id=actual_folder_id,
- page_token=page_token,
- page_size=min(100, max_files_per_folder - len(folder_files)),
- )
-
- if error:
- logger.warning(
- f"Failed to fetch files from folder {folder_name}: {error}"
- )
- break
-
- # Process files
- for file_info in files:
- mime_type = file_info.get("mimeType", "") or file_info.get(
- "mime_type", ""
- )
-
- # If it's a folder and include_subfolders is enabled, recursively fetch
- if mime_type == "application/vnd.google-apps.folder":
- if include_subfolders:
- # Add subfolder files recursively
- subfolder_files = await _fetch_folder_files_recursively(
- composio_connector,
- file_info.get("id"),
- max_files=max_files_per_folder,
- current_count=len(folder_files),
- )
- folder_files.extend(subfolder_files)
- else:
- folder_files.append(file_info)
-
- if not next_token:
- break
- page_token = next_token
-
- all_files.extend(folder_files[:max_files_per_folder])
- logger.info(f"Found {len(folder_files)} files in folder {folder_name}")
-
- # Add specifically selected files
- for selected_file in selected_files:
- file_id = selected_file.get("id")
- file_name = selected_file.get("name", "Unknown")
-
- if not file_id:
- continue
-
- # Add file info (we'll fetch content later during indexing)
- all_files.append(
- {
- "id": file_id,
- "name": file_name,
- "mimeType": "", # Will be determined later
- }
- )
- else:
- # No selection specified - fetch all files (original behavior)
- page_token = None
-
- while len(all_files) < max_items:
- files, next_token, error = await composio_connector.list_drive_files(
- page_token=page_token,
- page_size=min(100, max_items - len(all_files)),
- )
-
- if error:
- return 0, 0, [f"Failed to fetch Drive files: {error}"]
-
- all_files.extend(files)
-
- if not next_token:
- break
- page_token = next_token
-
- if not all_files:
- logger.info("No Google Drive files found")
- return 0, 0, []
-
- logger.info(f"Found {len(all_files)} Google Drive files to index via Composio (full scan)")
-
- for file_info in all_files:
- try:
- # Handle both standard Google API and potential Composio variations
- file_id = file_info.get("id", "") or file_info.get("fileId", "")
- file_name = (
- file_info.get("name", "")
- or file_info.get("fileName", "")
- or "Untitled"
- )
- mime_type = file_info.get("mimeType", "") or file_info.get(
- "mime_type", ""
- )
-
- if not file_id:
- documents_skipped += 1
- continue
-
- # Skip folders
- if mime_type == "application/vnd.google-apps.folder":
- continue
-
- # Process the file
- indexed, skipped, errors = await _process_single_drive_file(
- session=session,
- composio_connector=composio_connector,
- file_id=file_id,
- file_name=file_name,
- mime_type=mime_type,
- connector_id=connector_id,
- search_space_id=search_space_id,
- user_id=user_id,
- task_logger=task_logger,
- log_entry=log_entry,
- )
-
- documents_indexed += indexed
- documents_skipped += skipped
- processing_errors.extend(errors)
-
- # Batch commit every 10 documents
- if documents_indexed > 0 and documents_indexed % 10 == 0:
- logger.info(f"Committing batch: {documents_indexed} Google Drive files processed so far")
- await session.commit()
-
- except Exception as e:
- error_msg = f"Error processing Drive file {file_name or 'unknown'}: {e!s}"
- logger.error(error_msg, exc_info=True)
- processing_errors.append(error_msg)
- documents_skipped += 1
-
- logger.info(f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped")
- return documents_indexed, documents_skipped, processing_errors
-
-
-async def _process_single_drive_file(
- session: AsyncSession,
- composio_connector: ComposioConnector,
- file_id: str,
- file_name: str,
- mime_type: str,
- connector_id: int,
- search_space_id: int,
- user_id: str,
- task_logger: TaskLoggingService,
- log_entry,
-) -> tuple[int, int, list[str]]:
- """Process a single Google Drive file for indexing.
-
- Returns:
- Tuple of (documents_indexed, documents_skipped, processing_errors)
- """
- processing_errors = []
-
- # Generate unique identifier hash
- document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
- unique_identifier_hash = generate_unique_identifier_hash(
- document_type, f"drive_{file_id}", search_space_id
- )
-
- # Check if document exists
- existing_document = await check_document_by_unique_identifier(
- session, unique_identifier_hash
- )
-
- # Get file content
- content, content_error = await composio_connector.get_drive_file_content(file_id)
-
- if content_error or not content:
- logger.warning(
- f"Could not get content for file {file_name}: {content_error}"
- )
- # Use metadata as content fallback
- markdown_content = f"# {file_name}\n\n"
- markdown_content += f"**File ID:** {file_id}\n"
- markdown_content += f"**Type:** {mime_type}\n"
- elif isinstance(content, dict):
- # Safety check: if content is still a dict, log error and use fallback
- error_msg = f"Unexpected dict content format for file {file_name}: {list(content.keys())}"
- logger.error(error_msg)
- processing_errors.append(error_msg)
- markdown_content = f"# {file_name}\n\n"
- markdown_content += f"**File ID:** {file_id}\n"
- markdown_content += f"**Type:** {mime_type}\n"
- else:
- # Process content based on file type
- markdown_content = await _process_file_content(
- content=content,
- file_name=file_name,
- file_id=file_id,
- mime_type=mime_type,
- search_space_id=search_space_id,
- user_id=user_id,
- session=session,
- task_logger=task_logger,
- log_entry=log_entry,
- processing_errors=processing_errors,
- )
-
- content_hash = generate_content_hash(markdown_content, search_space_id)
-
- if existing_document:
- if existing_document.content_hash == content_hash:
- return 0, 1, processing_errors # Skipped
-
- # Update existing document
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
-
- if user_llm:
- document_metadata = {
- "file_id": file_id,
- "file_name": file_name,
- "mime_type": mime_type,
- "document_type": "Google Drive File (Composio)",
- }
- (
- summary_content,
- summary_embedding,
- ) = await generate_document_summary(
- markdown_content, user_llm, document_metadata
- )
- else:
- summary_content = (
- f"Google Drive File: {file_name}\n\nType: {mime_type}"
- )
- summary_embedding = config.embedding_model_instance.embed(
- summary_content
- )
-
- chunks = await create_document_chunks(markdown_content)
-
- existing_document.title = f"Drive: {file_name}"
- existing_document.content = summary_content
- existing_document.content_hash = content_hash
- existing_document.embedding = summary_embedding
- existing_document.document_metadata = {
- "file_id": file_id,
- "file_name": file_name,
- "FILE_NAME": file_name, # For compatibility
- "mime_type": mime_type,
- "connector_id": connector_id,
- "source": "composio",
- }
- existing_document.chunks = chunks
- existing_document.updated_at = get_current_timestamp()
-
- return 1, 0, processing_errors # Indexed
-
- # Create new document
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
-
- if user_llm:
- document_metadata = {
- "file_id": file_id,
- "file_name": file_name,
- "mime_type": mime_type,
- "document_type": "Google Drive File (Composio)",
- }
- (
- summary_content,
- summary_embedding,
- ) = await generate_document_summary(
- markdown_content, user_llm, document_metadata
- )
- else:
- summary_content = (
- f"Google Drive File: {file_name}\n\nType: {mime_type}"
- )
- summary_embedding = config.embedding_model_instance.embed(
- summary_content
- )
-
- chunks = await create_document_chunks(markdown_content)
-
- document = Document(
- search_space_id=search_space_id,
- title=f"Drive: {file_name}",
- document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
- document_metadata={
- "file_id": file_id,
- "file_name": file_name,
- "FILE_NAME": file_name, # For compatibility
- "mime_type": mime_type,
- "connector_id": connector_id,
- "toolkit_id": "googledrive",
- "source": "composio",
- },
- content=summary_content,
- content_hash=content_hash,
- unique_identifier_hash=unique_identifier_hash,
- embedding=summary_embedding,
- chunks=chunks,
- updated_at=get_current_timestamp(),
- )
- session.add(document)
-
- return 1, 0, processing_errors # Indexed
-
-
-async def _fetch_folder_files_recursively(
- composio_connector: ComposioConnector,
- folder_id: str,
- max_files: int = 100,
- current_count: int = 0,
- depth: int = 0,
- max_depth: int = 10,
-) -> list[dict[str, Any]]:
- """
- Recursively fetch files from a Google Drive folder via Composio.
-
- Args:
- composio_connector: The Composio connector instance
- folder_id: Google Drive folder ID
- max_files: Maximum number of files to fetch
- current_count: Current number of files already fetched
- depth: Current recursion depth
- max_depth: Maximum recursion depth to prevent infinite loops
-
- Returns:
- List of file info dictionaries
- """
- if depth >= max_depth:
- logger.warning(f"Max recursion depth reached for folder {folder_id}")
- return []
-
- if current_count >= max_files:
- return []
-
- all_files = []
- page_token = None
-
- try:
- while len(all_files) + current_count < max_files:
- files, next_token, error = await composio_connector.list_drive_files(
- folder_id=folder_id,
- page_token=page_token,
- page_size=min(100, max_files - len(all_files) - current_count),
- )
-
- if error:
- logger.warning(
- f"Error fetching files from subfolder {folder_id}: {error}"
- )
- break
-
- for file_info in files:
- mime_type = file_info.get("mimeType", "") or file_info.get(
- "mime_type", ""
- )
-
- if mime_type == "application/vnd.google-apps.folder":
- # Recursively fetch from subfolders
- subfolder_files = await _fetch_folder_files_recursively(
- composio_connector,
- file_info.get("id"),
- max_files=max_files,
- current_count=current_count + len(all_files),
- depth=depth + 1,
- max_depth=max_depth,
- )
- all_files.extend(subfolder_files)
- else:
- all_files.append(file_info)
-
- if len(all_files) + current_count >= max_files:
- break
-
- if not next_token:
- break
- page_token = next_token
-
- return all_files[: max_files - current_count]
-
- except Exception as e:
- logger.error(f"Error in recursive folder fetch: {e!s}")
- return all_files
-
-
-async def _process_gmail_message_batch(
- session: AsyncSession,
- messages: list[dict[str, Any]],
- composio_connector: ComposioConnector,
- connector_id: int,
- search_space_id: int,
- user_id: str,
- total_documents_indexed: int = 0,
-) -> tuple[int, int]:
- """
- Process a batch of Gmail messages and index them.
-
- Args:
- total_documents_indexed: Running total of documents indexed so far (for batch commits).
-
- Returns:
- Tuple of (documents_indexed, documents_skipped)
- """
- documents_indexed = 0
- documents_skipped = 0
-
- for message in messages:
- try:
- # Composio uses 'messageId' (camelCase), not 'id'
- message_id = message.get("messageId", "") or message.get("id", "")
- if not message_id:
- documents_skipped += 1
- continue
-
- # Composio's GMAIL_FETCH_EMAILS already returns full message content
- # No need for a separate detail API call
-
- # Extract message info from Composio response
- # Composio structure: messageId, messageText, messageTimestamp, payload.headers, labelIds
- payload = message.get("payload", {})
- headers = payload.get("headers", [])
-
- subject = "No Subject"
- sender = "Unknown Sender"
- date_str = message.get("messageTimestamp", "Unknown Date")
-
- for header in headers:
- name = header.get("name", "").lower()
- value = header.get("value", "")
- if name == "subject":
- subject = value
- elif name == "from":
- sender = value
- elif name == "date":
- date_str = value
-
- # Format to markdown using the full message data
- markdown_content = composio_connector.format_gmail_message_to_markdown(
- message
- )
-
- # Check for empty content (defensive parsing per Composio best practices)
- if not markdown_content.strip():
- logger.warning(f"Skipping Gmail message with no content: {subject}")
- documents_skipped += 1
- continue
-
- # Generate unique identifier
- document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"])
- unique_identifier_hash = generate_unique_identifier_hash(
- document_type, f"gmail_{message_id}", search_space_id
- )
-
- content_hash = generate_content_hash(markdown_content, search_space_id)
-
- existing_document = await check_document_by_unique_identifier(
- session, unique_identifier_hash
- )
-
- # Get label IDs from Composio response
- label_ids = message.get("labelIds", [])
- # Extract thread_id if available (for consistency with non-Composio implementation)
- thread_id = message.get("threadId", "") or message.get("thread_id", "")
-
- if existing_document:
- if existing_document.content_hash == content_hash:
- documents_skipped += 1
- continue
-
- # Update existing
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
-
- if user_llm:
- document_metadata = {
- "message_id": message_id,
- "thread_id": thread_id,
- "subject": subject,
- "sender": sender,
- "document_type": "Gmail Message (Composio)",
- }
- (
- summary_content,
- summary_embedding,
- ) = await generate_document_summary(
- markdown_content, user_llm, document_metadata
- )
- else:
- summary_content = (
- f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
- )
- summary_embedding = config.embedding_model_instance.embed(
- summary_content
- )
-
- chunks = await create_document_chunks(markdown_content)
-
- existing_document.title = f"Gmail: {subject}"
- existing_document.content = summary_content
- existing_document.content_hash = content_hash
- existing_document.embedding = summary_embedding
- existing_document.document_metadata = {
- "message_id": message_id,
- "thread_id": thread_id,
- "subject": subject,
- "sender": sender,
- "date": date_str,
- "labels": label_ids,
- "connector_id": connector_id,
- "source": "composio",
- }
- existing_document.chunks = chunks
- existing_document.updated_at = get_current_timestamp()
-
- documents_indexed += 1
-
- # Batch commit every 10 documents
- current_total = total_documents_indexed + documents_indexed
- if current_total % 10 == 0:
- logger.info(
- f"Committing batch: {current_total} Gmail messages processed so far"
- )
- await session.commit()
- continue
-
- # Create new document
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
-
- if user_llm:
- document_metadata = {
- "message_id": message_id,
- "thread_id": thread_id,
- "subject": subject,
- "sender": sender,
- "document_type": "Gmail Message (Composio)",
- }
- summary_content, summary_embedding = await generate_document_summary(
- markdown_content, user_llm, document_metadata
- )
- else:
- summary_content = (
- f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
- )
- summary_embedding = config.embedding_model_instance.embed(
- summary_content
- )
-
- chunks = await create_document_chunks(markdown_content)
-
- document = Document(
- search_space_id=search_space_id,
- title=f"Gmail: {subject}",
- document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]),
- document_metadata={
- "message_id": message_id,
- "thread_id": thread_id,
- "subject": subject,
- "sender": sender,
- "date": date_str,
- "labels": label_ids,
- "connector_id": connector_id,
- "toolkit_id": "gmail",
- "source": "composio",
- },
- content=summary_content,
- content_hash=content_hash,
- unique_identifier_hash=unique_identifier_hash,
- embedding=summary_embedding,
- chunks=chunks,
- updated_at=get_current_timestamp(),
- )
- session.add(document)
- documents_indexed += 1
-
- # Batch commit every 10 documents
- current_total = total_documents_indexed + documents_indexed
- if current_total % 10 == 0:
- logger.info(
- f"Committing batch: {current_total} Gmail messages processed so far"
- )
- await session.commit()
-
- except Exception as e:
- logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
- documents_skipped += 1
- # Rollback on error to avoid partial state (per Composio best practices)
- try:
- await session.rollback()
- except Exception as rollback_error:
- logger.error(
- f"Error during rollback: {rollback_error!s}", exc_info=True
- )
- continue
-
- return documents_indexed, documents_skipped
-
-
-async def _index_composio_gmail(
- session: AsyncSession,
- connector,
- connector_id: int,
- search_space_id: int,
- user_id: str,
- start_date: str | None,
- end_date: str | None,
- task_logger: TaskLoggingService,
- log_entry,
- update_last_indexed: bool = True,
- max_items: int = 1000,
-) -> tuple[int, str]:
- """Index Gmail messages via Composio with pagination and incremental processing."""
- try:
- composio_connector = ComposioConnector(session, connector_id)
-
- # Normalize date values - handle "undefined" strings from frontend
- if start_date == "undefined" or start_date == "":
- start_date = None
- if end_date == "undefined" or end_date == "":
- end_date = None
-
- # Use provided dates directly if both are provided, otherwise calculate from last_indexed_at
- # This ensures user-selected dates are respected (matching non-Composio Gmail connector behavior)
- if start_date is not None and end_date is not None:
- # User provided both dates - use them directly
- start_date_str = start_date
- end_date_str = end_date
- else:
- # Calculate date range with defaults (uses last_indexed_at or 365 days back)
- # This ensures indexing works even when user doesn't specify dates
- start_date_str, end_date_str = calculate_date_range(
- connector, start_date, end_date, default_days_back=365
- )
-
- # Build query with date range
- query_parts = []
- if start_date_str:
- query_parts.append(f"after:{start_date_str.replace('-', '/')}")
- if end_date_str:
- query_parts.append(f"before:{end_date_str.replace('-', '/')}")
- query = " ".join(query_parts) if query_parts else ""
-
- logger.info(
- f"Gmail query for connector {connector_id}: '{query}' "
- f"(start_date={start_date_str}, end_date={end_date_str})"
- )
-
- # Use smaller batch size to avoid 413 payload too large errors
- batch_size = 50
- page_token = None
- total_documents_indexed = 0
- total_documents_skipped = 0
- total_messages_fetched = 0
- result_size_estimate = None # Will be set from first API response
-
- while total_messages_fetched < max_items:
- # Calculate how many messages to fetch in this batch
- remaining = max_items - total_messages_fetched
- current_batch_size = min(batch_size, remaining)
-
- # Use result_size_estimate if available, otherwise fall back to max_items
- estimated_total = (
- result_size_estimate if result_size_estimate is not None else max_items
- )
- # Cap estimated_total at max_items to avoid showing misleading progress
- estimated_total = min(estimated_total, max_items)
-
- await task_logger.log_task_progress(
- log_entry,
- f"Fetching Gmail messages batch via Composio for connector {connector_id} "
- f"({total_messages_fetched}/{estimated_total} fetched, {total_documents_indexed} indexed)",
- {
- "stage": "fetching_messages",
- "batch_size": current_batch_size,
- "total_fetched": total_messages_fetched,
- "total_indexed": total_documents_indexed,
- "estimated_total": estimated_total,
- },
- )
-
- # Fetch batch of messages
- (
- messages,
- next_token,
- result_size_estimate_batch,
- error,
- ) = await composio_connector.list_gmail_messages(
- query=query,
- max_results=current_batch_size,
- page_token=page_token,
- )
-
- if error:
- await task_logger.log_task_failure(
- log_entry, f"Failed to fetch Gmail messages: {error}", {}
- )
- return 0, f"Failed to fetch Gmail messages: {error}"
-
- if not messages:
- # No more messages available
- break
-
- # Update result_size_estimate from first response (Gmail provides this estimate)
- if result_size_estimate is None and result_size_estimate_batch is not None:
- result_size_estimate = result_size_estimate_batch
- logger.info(
- f"Gmail API estimated {result_size_estimate} total messages for query: '{query}'"
- )
-
- total_messages_fetched += len(messages)
- # Recalculate estimated_total after potentially updating result_size_estimate
- estimated_total = (
- result_size_estimate if result_size_estimate is not None else max_items
- )
- estimated_total = min(estimated_total, max_items)
-
- logger.info(
- f"Fetched batch of {len(messages)} Gmail messages "
- f"(total: {total_messages_fetched}/{estimated_total})"
- )
-
- # Process batch incrementally
- batch_indexed, batch_skipped = await _process_gmail_message_batch(
- session=session,
- messages=messages,
- composio_connector=composio_connector,
- connector_id=connector_id,
- search_space_id=search_space_id,
- user_id=user_id,
- total_documents_indexed=total_documents_indexed,
- )
-
- total_documents_indexed += batch_indexed
- total_documents_skipped += batch_skipped
-
- logger.info(
- f"Processed batch: {batch_indexed} indexed, {batch_skipped} skipped "
- f"(total: {total_documents_indexed} indexed, {total_documents_skipped} skipped)"
- )
-
- # Batch commits happen in _process_gmail_message_batch every 10 documents
- # This ensures progress is saved incrementally, preventing data loss on crashes
-
- # Check if we should continue
- if not next_token:
- # No more pages available
- break
-
- if len(messages) < current_batch_size:
- # Last page had fewer items than requested, we're done
- break
-
- # Continue with next page
- page_token = next_token
-
- if total_messages_fetched == 0:
- success_msg = "No Gmail messages found in the specified date range"
- await task_logger.log_task_success(
- log_entry, success_msg, {"messages_count": 0}
- )
- # CRITICAL: Update timestamp even when no messages found so Electric SQL syncs and UI shows indexed status
- await update_connector_last_indexed(session, connector, update_last_indexed)
- await session.commit()
- return 0, None # Return None (not error) when no items found
-
- # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
- # This ensures the UI shows "Last indexed" instead of "Never indexed"
- await update_connector_last_indexed(session, connector, update_last_indexed)
-
- # Final commit to ensure all documents are persisted (safety net)
- # This matches the pattern used in non-Composio Gmail indexer
- logger.info(
- f"Final commit: Total {total_documents_indexed} Gmail messages processed"
- )
- await session.commit()
- logger.info(
- "Successfully committed all Composio Gmail document changes to database"
- )
-
- await task_logger.log_task_success(
- log_entry,
- f"Successfully completed Gmail indexing via Composio for connector {connector_id}",
- {
- "documents_indexed": total_documents_indexed,
- "documents_skipped": total_documents_skipped,
- "messages_fetched": total_messages_fetched,
- },
- )
-
- return total_documents_indexed, None
-
- except Exception as e:
- logger.error(f"Failed to index Gmail via Composio: {e!s}", exc_info=True)
- return 0, f"Failed to index Gmail via Composio: {e!s}"
-
-
-async def _index_composio_google_calendar(
- session: AsyncSession,
- connector,
- connector_id: int,
- search_space_id: int,
- user_id: str,
- start_date: str | None,
- end_date: str | None,
- task_logger: TaskLoggingService,
- log_entry,
- update_last_indexed: bool = True,
- max_items: int = 2500,
-) -> tuple[int, str]:
- """Index Google Calendar events via Composio."""
- try:
- composio_connector = ComposioConnector(session, connector_id)
-
- await task_logger.log_task_progress(
- log_entry,
- f"Fetching Google Calendar events via Composio for connector {connector_id}",
- {"stage": "fetching_events"},
- )
-
- # Normalize date values - handle "undefined" strings from frontend
- if start_date == "undefined" or start_date == "":
- start_date = None
- if end_date == "undefined" or end_date == "":
- end_date = None
-
- # Use provided dates directly if both are provided, otherwise calculate from last_indexed_at
- # This ensures user-selected dates are respected (matching non-Composio Calendar connector behavior)
- if start_date is not None and end_date is not None:
- # User provided both dates - use them directly
- start_date_str = start_date
- end_date_str = end_date
- else:
- # Calculate date range with defaults (uses last_indexed_at or 365 days back)
- # This ensures indexing works even when user doesn't specify dates
- start_date_str, end_date_str = calculate_date_range(
- connector, start_date, end_date, default_days_back=365
- )
-
- # Build time range for API call
- time_min = f"{start_date_str}T00:00:00Z"
- time_max = f"{end_date_str}T23:59:59Z"
-
- logger.info(
- f"Google Calendar query for connector {connector_id}: "
- f"(start_date={start_date_str}, end_date={end_date_str})"
- )
-
- events, error = await composio_connector.list_calendar_events(
- time_min=time_min,
- time_max=time_max,
- max_results=max_items,
- )
-
- if error:
- await task_logger.log_task_failure(
- log_entry, f"Failed to fetch Calendar events: {error}", {}
- )
- return 0, f"Failed to fetch Calendar events: {error}"
-
- if not events:
- success_msg = "No Google Calendar events found in the specified date range"
- await task_logger.log_task_success(
- log_entry, success_msg, {"events_count": 0}
- )
- # CRITICAL: Update timestamp even when no events found so Electric SQL syncs and UI shows indexed status
- await update_connector_last_indexed(session, connector, update_last_indexed)
- await session.commit()
- return (
- 0,
- None,
- ) # Return None (not error) when no items found - this is success with 0 items
-
- logger.info(f"Found {len(events)} Google Calendar events to index via Composio")
-
- documents_indexed = 0
- documents_skipped = 0
-
- for event in events:
- try:
- # Handle both standard Google API and potential Composio variations
- event_id = event.get("id", "") or event.get("eventId", "")
- summary = (
- event.get("summary", "") or event.get("title", "") or "No Title"
- )
-
- if not event_id:
- documents_skipped += 1
- continue
-
- # Format to markdown
- markdown_content = composio_connector.format_calendar_event_to_markdown(
- event
- )
-
- # Generate unique identifier
- document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"])
- unique_identifier_hash = generate_unique_identifier_hash(
- document_type, f"calendar_{event_id}", search_space_id
- )
-
- content_hash = generate_content_hash(markdown_content, search_space_id)
-
- existing_document = await check_document_by_unique_identifier(
- session, unique_identifier_hash
- )
-
- # Extract event times
- start = event.get("start", {})
- end = event.get("end", {})
- start_time = start.get("dateTime") or start.get("date", "")
- end_time = end.get("dateTime") or end.get("date", "")
- location = event.get("location", "")
-
- if existing_document:
- if existing_document.content_hash == content_hash:
- documents_skipped += 1
- continue
-
- # Update existing
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
-
- if user_llm:
- document_metadata = {
- "event_id": event_id,
- "summary": summary,
- "start_time": start_time,
- "document_type": "Google Calendar Event (Composio)",
- }
- (
- summary_content,
- summary_embedding,
- ) = await generate_document_summary(
- markdown_content, user_llm, document_metadata
- )
- else:
- summary_content = f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
- if location:
- summary_content += f"\nLocation: {location}"
- summary_embedding = config.embedding_model_instance.embed(
- summary_content
- )
-
- chunks = await create_document_chunks(markdown_content)
-
- existing_document.title = f"Calendar: {summary}"
- existing_document.content = summary_content
- existing_document.content_hash = content_hash
- existing_document.embedding = summary_embedding
- existing_document.document_metadata = {
- "event_id": event_id,
- "summary": summary,
- "start_time": start_time,
- "end_time": end_time,
- "location": location,
- "connector_id": connector_id,
- "source": "composio",
- }
- existing_document.chunks = chunks
- existing_document.updated_at = get_current_timestamp()
-
- documents_indexed += 1
-
- # Batch commit every 10 documents
- if documents_indexed % 10 == 0:
- logger.info(
- f"Committing batch: {documents_indexed} Google Calendar events processed so far"
- )
- await session.commit()
- continue
-
- # Create new document
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
-
- if user_llm:
- document_metadata = {
- "event_id": event_id,
- "summary": summary,
- "start_time": start_time,
- "document_type": "Google Calendar Event (Composio)",
- }
- (
- summary_content,
- summary_embedding,
- ) = await generate_document_summary(
- markdown_content, user_llm, document_metadata
- )
- else:
- summary_content = (
- f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
- )
- if location:
- summary_content += f"\nLocation: {location}"
- summary_embedding = config.embedding_model_instance.embed(
- summary_content
- )
-
- chunks = await create_document_chunks(markdown_content)
-
- document = Document(
- search_space_id=search_space_id,
- title=f"Calendar: {summary}",
- document_type=DocumentType(
- TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"]
- ),
- document_metadata={
- "event_id": event_id,
- "summary": summary,
- "start_time": start_time,
- "end_time": end_time,
- "location": location,
- "connector_id": connector_id,
- "toolkit_id": "googlecalendar",
- "source": "composio",
- },
- content=summary_content,
- content_hash=content_hash,
- unique_identifier_hash=unique_identifier_hash,
- embedding=summary_embedding,
- chunks=chunks,
- updated_at=get_current_timestamp(),
- )
- session.add(document)
- documents_indexed += 1
-
- # Batch commit every 10 documents
- if documents_indexed % 10 == 0:
- logger.info(
- f"Committing batch: {documents_indexed} Google Calendar events processed so far"
- )
- await session.commit()
-
- except Exception as e:
- logger.error(f"Error processing Calendar event: {e!s}", exc_info=True)
- documents_skipped += 1
- continue
-
- # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
- # This ensures the UI shows "Last indexed" instead of "Never indexed"
- await update_connector_last_indexed(session, connector, update_last_indexed)
-
- # Final commit to ensure all documents are persisted (safety net)
- # This matches the pattern used in non-Composio Gmail indexer
- logger.info(
- f"Final commit: Total {documents_indexed} Google Calendar events processed"
- )
- await session.commit()
- logger.info(
- "Successfully committed all Composio Google Calendar document changes to database"
- )
-
- await task_logger.log_task_success(
- log_entry,
- f"Successfully completed Google Calendar indexing via Composio for connector {connector_id}",
- {
- "documents_indexed": documents_indexed,
- "documents_skipped": documents_skipped,
- },
- )
-
- return documents_indexed, None
-
- except Exception as e:
- logger.error(
- f"Failed to index Google Calendar via Composio: {e!s}", exc_info=True
- )
- return 0, f"Failed to index Google Calendar via Composio: {e!s}"
diff --git a/surfsense_web/components/assistant-ui/connector-popup/components/composio-connector-card.tsx b/surfsense_web/components/assistant-ui/connector-popup/components/composio-connector-card.tsx
deleted file mode 100644
index 671fc3ce6..000000000
--- a/surfsense_web/components/assistant-ui/connector-popup/components/composio-connector-card.tsx
+++ /dev/null
@@ -1,78 +0,0 @@
-"use client";
-
-import { Zap } from "lucide-react";
-import Image from "next/image";
-import type { FC } from "react";
-import { Button } from "@/components/ui/button";
-import { cn } from "@/lib/utils";
-
-interface ComposioConnectorCardProps {
- id: string;
- title: string;
- description: string;
- connectorCount?: number;
- onConnect: () => void;
-}
-
-export const ComposioConnectorCard: FC = ({
- id,
- title,
- description,
- connectorCount = 0,
- onConnect,
-}) => {
- const hasConnections = connectorCount > 0;
-
- return (
-
-
-
-
-
-
- {title}
-
-
- {hasConnections ? (
-
-
- {connectorCount} {connectorCount === 1 ? "connection" : "connections"}
-
-
- ) : (
-
{description}
- )}
-
-
- {hasConnections ? "Manage" : "Browse"}
-
-
- );
-};
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx
new file mode 100644
index 000000000..6e7a06073
--- /dev/null
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx
@@ -0,0 +1,220 @@
+"use client";
+
+import { Calendar, Clock } from "lucide-react";
+import type { FC } from "react";
+import { useEffect, useState } from "react";
+import { Label } from "@/components/ui/label";
+import {
+ Select,
+ SelectContent,
+ SelectItem,
+ SelectTrigger,
+ SelectValue,
+} from "@/components/ui/select";
+import { Switch } from "@/components/ui/switch";
+import type { SearchSourceConnector } from "@/contracts/types/connector.types";
+
+interface ComposioCalendarConfigProps {
+ connector: SearchSourceConnector;
+ onConfigChange?: (config: Record) => void;
+ onNameChange?: (name: string) => void;
+}
+
+interface CalendarIndexingOptions {
+ max_events: number;
+ include_recurring: boolean;
+ include_past_events: boolean;
+ days_ahead: number;
+}
+
+const DEFAULT_CALENDAR_OPTIONS: CalendarIndexingOptions = {
+ max_events: 500,
+ include_recurring: true,
+ include_past_events: true,
+ days_ahead: 365,
+};
+
+export const ComposioCalendarConfig: FC = ({ connector, onConfigChange }) => {
+ const isIndexable = connector.config?.is_indexable as boolean;
+
+ // Initialize with existing options from connector config
+ const existingOptions =
+ (connector.config?.calendar_options as CalendarIndexingOptions | undefined) || DEFAULT_CALENDAR_OPTIONS;
+
+ const [calendarOptions, setCalendarOptions] = useState(existingOptions);
+
+ // Update options when connector config changes
+ useEffect(() => {
+ const options =
+ (connector.config?.calendar_options as CalendarIndexingOptions | undefined) ||
+ DEFAULT_CALENDAR_OPTIONS;
+ setCalendarOptions(options);
+ }, [connector.config]);
+
+ const updateConfig = (options: CalendarIndexingOptions) => {
+ if (onConfigChange) {
+ onConfigChange({
+ ...connector.config,
+ calendar_options: options,
+ });
+ }
+ };
+
+ const handleOptionChange = (key: keyof CalendarIndexingOptions, value: number | boolean) => {
+ const newOptions = { ...calendarOptions, [key]: value };
+ setCalendarOptions(newOptions);
+ updateConfig(newOptions);
+ };
+
+ // Only show configuration if the connector is indexable
+ if (!isIndexable) {
+ return
;
+ }
+
+ return (
+
+ {/* Calendar Indexing Options */}
+
+
+
+
+
Calendar Indexing Options
+
+
+ Configure how events are indexed from your Google Calendar.
+
+
+
+ {/* Max events to index */}
+
+
+
+
+ Max events to index
+
+
+ Maximum number of events to index per sync
+
+
+
+ handleOptionChange("max_events", parseInt(value, 10))
+ }
+ >
+
+
+
+
+
+ 100 events
+
+
+ 250 events
+
+
+ 500 events
+
+
+ 1000 events
+
+
+ 2500 events
+
+
+
+
+
+
+ {/* Days ahead */}
+
+
+
+
+
+
+ Future events range
+
+
+
+ How far ahead to index future events
+
+
+
+ handleOptionChange("days_ahead", parseInt(value, 10))
+ }
+ >
+
+
+
+
+
+ 30 days
+
+
+ 90 days
+
+
+ 180 days
+
+
+ 1 year
+
+
+ 2 years
+
+
+
+
+
+
+ {/* Include recurring events toggle */}
+
+
+
+ Include recurring events
+
+
+ Index individual instances of recurring events
+
+
+
+ handleOptionChange("include_recurring", checked)
+ }
+ />
+
+
+ {/* Include past events toggle */}
+
+
+
+ Include past events
+
+
+ Index events from before the selected date range
+
+
+
+ handleOptionChange("include_past_events", checked)
+ }
+ />
+
+
+
+ );
+};
+
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
deleted file mode 100644
index fdff956e5..000000000
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
+++ /dev/null
@@ -1,353 +0,0 @@
-"use client";
-
-import { File, FileSpreadsheet, FileText, FolderClosed, Image, Presentation } from "lucide-react";
-import type { FC } from "react";
-import { useEffect, useState } from "react";
-import { ComposioDriveFolderTree } from "@/components/connectors/composio-drive-folder-tree";
-import { Badge } from "@/components/ui/badge";
-import { Button } from "@/components/ui/button";
-import { Label } from "@/components/ui/label";
-import {
- Select,
- SelectContent,
- SelectItem,
- SelectTrigger,
- SelectValue,
-} from "@/components/ui/select";
-import { Switch } from "@/components/ui/switch";
-import type { SearchSourceConnector } from "@/contracts/types/connector.types";
-import { cn } from "@/lib/utils";
-
-interface ComposioConfigProps {
- connector: SearchSourceConnector;
- onConfigChange?: (config: Record) => void;
- onNameChange?: (name: string) => void;
-}
-
-interface SelectedFolder {
- id: string;
- name: string;
-}
-
-interface IndexingOptions {
- max_files_per_folder: number;
- incremental_sync: boolean;
- include_subfolders: boolean;
-}
-
-const DEFAULT_INDEXING_OPTIONS: IndexingOptions = {
- max_files_per_folder: 100,
- incremental_sync: true,
- include_subfolders: true,
-};
-
-// Helper to get appropriate icon for file type based on file name
-function getFileIconFromName(fileName: string, className: string = "size-3.5 shrink-0") {
- const lowerName = fileName.toLowerCase();
- // Spreadsheets
- if (
- lowerName.endsWith(".xlsx") ||
- lowerName.endsWith(".xls") ||
- lowerName.endsWith(".csv") ||
- lowerName.includes("spreadsheet")
- ) {
- return ;
- }
- // Presentations
- if (
- lowerName.endsWith(".pptx") ||
- lowerName.endsWith(".ppt") ||
- lowerName.includes("presentation")
- ) {
- return ;
- }
- // Documents (word, text only - not PDF)
- if (
- lowerName.endsWith(".docx") ||
- lowerName.endsWith(".doc") ||
- lowerName.endsWith(".txt") ||
- lowerName.includes("document") ||
- lowerName.includes("word") ||
- lowerName.includes("text")
- ) {
- return ;
- }
- // Images
- if (
- lowerName.endsWith(".png") ||
- lowerName.endsWith(".jpg") ||
- lowerName.endsWith(".jpeg") ||
- lowerName.endsWith(".gif") ||
- lowerName.endsWith(".webp") ||
- lowerName.endsWith(".svg")
- ) {
- return ;
- }
- // Default (including PDF)
- return ;
-}
-
-export const ComposioConfig: FC = ({ connector, onConfigChange }) => {
- const toolkitId = connector.config?.toolkit_id as string;
- const isIndexable = connector.config?.is_indexable as boolean;
- const composioAccountId = connector.config?.composio_connected_account_id as string;
-
- // Check if this is a Google Drive Composio connector
- const isGoogleDrive = toolkitId === "googledrive";
-
- // Initialize with existing selected folders and files from connector config
- const existingFolders =
- (connector.config?.selected_folders as SelectedFolder[] | undefined) || [];
- const existingFiles = (connector.config?.selected_files as SelectedFolder[] | undefined) || [];
- const existingIndexingOptions =
- (connector.config?.indexing_options as IndexingOptions | undefined) || DEFAULT_INDEXING_OPTIONS;
-
- const [selectedFolders, setSelectedFolders] = useState(existingFolders);
- const [selectedFiles, setSelectedFiles] = useState(existingFiles);
- const [showFolderSelector, setShowFolderSelector] = useState(false);
- const [indexingOptions, setIndexingOptions] = useState(existingIndexingOptions);
-
- // Update selected folders and files when connector config changes
- useEffect(() => {
- const folders = (connector.config?.selected_folders as SelectedFolder[] | undefined) || [];
- const files = (connector.config?.selected_files as SelectedFolder[] | undefined) || [];
- const options =
- (connector.config?.indexing_options as IndexingOptions | undefined) ||
- DEFAULT_INDEXING_OPTIONS;
- setSelectedFolders(folders);
- setSelectedFiles(files);
- setIndexingOptions(options);
- }, [connector.config]);
-
- const updateConfig = (
- folders: SelectedFolder[],
- files: SelectedFolder[],
- options: IndexingOptions
- ) => {
- if (onConfigChange) {
- onConfigChange({
- ...connector.config,
- selected_folders: folders,
- selected_files: files,
- indexing_options: options,
- });
- }
- };
-
- const handleSelectFolders = (folders: SelectedFolder[]) => {
- setSelectedFolders(folders);
- updateConfig(folders, selectedFiles, indexingOptions);
- };
-
- const handleSelectFiles = (files: SelectedFolder[]) => {
- setSelectedFiles(files);
- updateConfig(selectedFolders, files, indexingOptions);
- };
-
- const handleIndexingOptionChange = (key: keyof IndexingOptions, value: number | boolean) => {
- const newOptions = { ...indexingOptions, [key]: value };
- setIndexingOptions(newOptions);
- updateConfig(selectedFolders, selectedFiles, newOptions);
- };
-
- const totalSelected = selectedFolders.length + selectedFiles.length;
-
- return (
-
- {/* Connection Details */}
-
-
- Connection Details
-
-
-
- Toolkit
- {toolkitId}
-
-
- Indexing Supported
-
- {isIndexable ? "Yes" : "Coming Soon"}
-
-
- {composioAccountId && (
-
- Account ID
-
- {composioAccountId}
-
-
- )}
-
-
-
- {/* Google Drive specific: Folder & File Selection */}
- {isGoogleDrive && isIndexable && (
- <>
-
-
-
Folder & File Selection
-
- Select specific folders and/or individual files to index.
-
-
-
- {totalSelected > 0 && (
-
-
- Selected {totalSelected} item{totalSelected > 1 ? "s" : ""}: {(() => {
- const parts: string[] = [];
- if (selectedFolders.length > 0) {
- parts.push(
- `${selectedFolders.length} folder${selectedFolders.length > 1 ? "s" : ""}`
- );
- }
- if (selectedFiles.length > 0) {
- parts.push(
- `${selectedFiles.length} file${selectedFiles.length > 1 ? "s" : ""}`
- );
- }
- return parts.length > 0 ? `(${parts.join(" ")})` : "";
- })()}
-
-
- {selectedFolders.map((folder) => (
-
-
- {folder.name}
-
- ))}
- {selectedFiles.map((file) => (
-
- {getFileIconFromName(file.name)}
- {file.name}
-
- ))}
-
-
- )}
-
- {showFolderSelector ? (
-
-
- setShowFolderSelector(false)}
- className="bg-slate-400/5 dark:bg-white/5 border-slate-400/20 hover:bg-slate-400/10 dark:hover:bg-white/10 text-xs sm:text-sm h-8 sm:h-9"
- >
- Done Selecting
-
-
- ) : (
-
setShowFolderSelector(true)}
- className="bg-slate-400/5 dark:bg-white/5 border-slate-400/20 hover:bg-slate-400/10 dark:hover:bg-white/10 text-xs sm:text-sm h-8 sm:h-9"
- >
- {totalSelected > 0 ? "Change Selection" : "Select Folders & Files"}
-
- )}
-
-
- {/* Indexing Options */}
-
-
-
Indexing Options
-
- Configure how files are indexed from your Google Drive.
-
-
-
- {/* Max files per folder */}
-
-
-
-
- Max files per folder
-
-
- Maximum number of files to index from each folder
-
-
-
- handleIndexingOptionChange("max_files_per_folder", parseInt(value, 10))
- }
- >
-
-
-
-
-
- 50 files
-
-
- 100 files
-
-
- 250 files
-
-
- 500 files
-
-
- 1000 files
-
-
-
-
-
-
- {/* Include subfolders toggle */}
-
-
-
- Include subfolders
-
-
- Recursively index files in subfolders of selected folders
-
-
-
- handleIndexingOptionChange("include_subfolders", checked)
- }
- />
-
-
- >
- )}
-
- );
-};
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx
new file mode 100644
index 000000000..755b91a5a
--- /dev/null
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx
@@ -0,0 +1,313 @@
+"use client";
+
+import { File, FileSpreadsheet, FileText, FolderClosed, Image, Presentation } from "lucide-react";
+import type { FC } from "react";
+import { useEffect, useState } from "react";
+import { ComposioDriveFolderTree } from "@/components/connectors/composio-drive-folder-tree";
+import { Button } from "@/components/ui/button";
+import { Label } from "@/components/ui/label";
+import {
+ Select,
+ SelectContent,
+ SelectItem,
+ SelectTrigger,
+ SelectValue,
+} from "@/components/ui/select";
+import { Switch } from "@/components/ui/switch";
+import type { SearchSourceConnector } from "@/contracts/types/connector.types";
+
+interface ComposioDriveConfigProps {
+ connector: SearchSourceConnector;
+ onConfigChange?: (config: Record) => void;
+ onNameChange?: (name: string) => void;
+}
+
+interface SelectedFolder {
+ id: string;
+ name: string;
+}
+
+interface IndexingOptions {
+ max_files_per_folder: number;
+ incremental_sync: boolean;
+ include_subfolders: boolean;
+}
+
+const DEFAULT_INDEXING_OPTIONS: IndexingOptions = {
+ max_files_per_folder: 100,
+ incremental_sync: true,
+ include_subfolders: true,
+};
+
+// Helper to get appropriate icon for file type based on file name
+function getFileIconFromName(fileName: string, className: string = "size-3.5 shrink-0") {
+ const lowerName = fileName.toLowerCase();
+ // Spreadsheets
+ if (
+ lowerName.endsWith(".xlsx") ||
+ lowerName.endsWith(".xls") ||
+ lowerName.endsWith(".csv") ||
+ lowerName.includes("spreadsheet")
+ ) {
+ return ;
+ }
+ // Presentations
+ if (
+ lowerName.endsWith(".pptx") ||
+ lowerName.endsWith(".ppt") ||
+ lowerName.includes("presentation")
+ ) {
+ return ;
+ }
+ // Documents (word, text only - not PDF)
+ if (
+ lowerName.endsWith(".docx") ||
+ lowerName.endsWith(".doc") ||
+ lowerName.endsWith(".txt") ||
+ lowerName.includes("document") ||
+ lowerName.includes("word") ||
+ lowerName.includes("text")
+ ) {
+ return ;
+ }
+ // Images
+ if (
+ lowerName.endsWith(".png") ||
+ lowerName.endsWith(".jpg") ||
+ lowerName.endsWith(".jpeg") ||
+ lowerName.endsWith(".gif") ||
+ lowerName.endsWith(".webp") ||
+ lowerName.endsWith(".svg")
+ ) {
+ return ;
+ }
+ // Default (including PDF)
+ return ;
+}
+
+export const ComposioDriveConfig: FC = ({ connector, onConfigChange }) => {
+ const isIndexable = connector.config?.is_indexable as boolean;
+
+ // Initialize with existing selected folders and files from connector config
+ const existingFolders =
+ (connector.config?.selected_folders as SelectedFolder[] | undefined) || [];
+ const existingFiles = (connector.config?.selected_files as SelectedFolder[] | undefined) || [];
+ const existingIndexingOptions =
+ (connector.config?.indexing_options as IndexingOptions | undefined) || DEFAULT_INDEXING_OPTIONS;
+
+ const [selectedFolders, setSelectedFolders] = useState(existingFolders);
+ const [selectedFiles, setSelectedFiles] = useState(existingFiles);
+ const [showFolderSelector, setShowFolderSelector] = useState(false);
+ const [indexingOptions, setIndexingOptions] = useState(existingIndexingOptions);
+
+ // Update selected folders and files when connector config changes
+ useEffect(() => {
+ const folders = (connector.config?.selected_folders as SelectedFolder[] | undefined) || [];
+ const files = (connector.config?.selected_files as SelectedFolder[] | undefined) || [];
+ const options =
+ (connector.config?.indexing_options as IndexingOptions | undefined) ||
+ DEFAULT_INDEXING_OPTIONS;
+ setSelectedFolders(folders);
+ setSelectedFiles(files);
+ setIndexingOptions(options);
+ }, [connector.config]);
+
+ const updateConfig = (
+ folders: SelectedFolder[],
+ files: SelectedFolder[],
+ options: IndexingOptions
+ ) => {
+ if (onConfigChange) {
+ onConfigChange({
+ ...connector.config,
+ selected_folders: folders,
+ selected_files: files,
+ indexing_options: options,
+ });
+ }
+ };
+
+ const handleSelectFolders = (folders: SelectedFolder[]) => {
+ setSelectedFolders(folders);
+ updateConfig(folders, selectedFiles, indexingOptions);
+ };
+
+ const handleSelectFiles = (files: SelectedFolder[]) => {
+ setSelectedFiles(files);
+ updateConfig(selectedFolders, files, indexingOptions);
+ };
+
+ const handleIndexingOptionChange = (key: keyof IndexingOptions, value: number | boolean) => {
+ const newOptions = { ...indexingOptions, [key]: value };
+ setIndexingOptions(newOptions);
+ updateConfig(selectedFolders, selectedFiles, newOptions);
+ };
+
+ const totalSelected = selectedFolders.length + selectedFiles.length;
+
+ // Only show configuration if the connector is indexable
+ if (!isIndexable) {
+ return
;
+ }
+
+ return (
+
+ {/* Folder & File Selection */}
+
+
+
Folder & File Selection
+
+ Select specific folders and/or individual files to index from your Google Drive.
+
+
+
+ {totalSelected > 0 && (
+
+
+ Selected {totalSelected} item{totalSelected > 1 ? "s" : ""}: {(() => {
+ const parts: string[] = [];
+ if (selectedFolders.length > 0) {
+ parts.push(
+ `${selectedFolders.length} folder${selectedFolders.length > 1 ? "s" : ""}`
+ );
+ }
+ if (selectedFiles.length > 0) {
+ parts.push(
+ `${selectedFiles.length} file${selectedFiles.length > 1 ? "s" : ""}`
+ );
+ }
+ return parts.length > 0 ? `(${parts.join(" ")})` : "";
+ })()}
+
+
+ {selectedFolders.map((folder) => (
+
+
+ {folder.name}
+
+ ))}
+ {selectedFiles.map((file) => (
+
+ {getFileIconFromName(file.name)}
+ {file.name}
+
+ ))}
+
+
+ )}
+
+ {showFolderSelector ? (
+
+
+ setShowFolderSelector(false)}
+ className="bg-slate-400/5 dark:bg-white/5 border-slate-400/20 hover:bg-slate-400/10 dark:hover:bg-white/10 text-xs sm:text-sm h-8 sm:h-9"
+ >
+ Done Selecting
+
+
+ ) : (
+
setShowFolderSelector(true)}
+ className="bg-slate-400/5 dark:bg-white/5 border-slate-400/20 hover:bg-slate-400/10 dark:hover:bg-white/10 text-xs sm:text-sm h-8 sm:h-9"
+ >
+ {totalSelected > 0 ? "Change Selection" : "Select Folders & Files"}
+
+ )}
+
+
+ {/* Indexing Options */}
+
+
+
Indexing Options
+
+ Configure how files are indexed from your Google Drive.
+
+
+
+ {/* Max files per folder */}
+
+
+
+
+ Max files per folder
+
+
+ Maximum number of files to index from each folder
+
+
+
+ handleIndexingOptionChange("max_files_per_folder", parseInt(value, 10))
+ }
+ >
+
+
+
+
+
+ 50 files
+
+
+ 100 files
+
+
+ 250 files
+
+
+ 500 files
+
+
+ 1000 files
+
+
+
+
+
+
+ {/* Include subfolders toggle */}
+
+
+
+ Include subfolders
+
+
+ Recursively index files in subfolders of selected folders
+
+
+
+ handleIndexingOptionChange("include_subfolders", checked)
+ }
+ />
+
+
+
+ );
+};
+
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx
new file mode 100644
index 000000000..963753ab3
--- /dev/null
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx
@@ -0,0 +1,174 @@
+"use client";
+
+import { Mail, Tag } from "lucide-react";
+import type { FC } from "react";
+import { useEffect, useState } from "react";
+import { Input } from "@/components/ui/input";
+import { Label } from "@/components/ui/label";
+import {
+ Select,
+ SelectContent,
+ SelectItem,
+ SelectTrigger,
+ SelectValue,
+} from "@/components/ui/select";
+import type { SearchSourceConnector } from "@/contracts/types/connector.types";
+
+interface ComposioGmailConfigProps {
+ connector: SearchSourceConnector;
+ onConfigChange?: (config: Record) => void;
+ onNameChange?: (name: string) => void;
+}
+
+interface GmailIndexingOptions {
+ max_emails: number;
+ label_filter: string;
+ search_query: string;
+}
+
+const DEFAULT_GMAIL_OPTIONS: GmailIndexingOptions = {
+ max_emails: 500,
+ label_filter: "",
+ search_query: "",
+};
+
+export const ComposioGmailConfig: FC = ({ connector, onConfigChange }) => {
+ const isIndexable = connector.config?.is_indexable as boolean;
+
+ // Initialize with existing options from connector config
+ const existingOptions =
+ (connector.config?.gmail_options as GmailIndexingOptions | undefined) || DEFAULT_GMAIL_OPTIONS;
+
+ const [gmailOptions, setGmailOptions] = useState(existingOptions);
+
+ // Update options when connector config changes
+ useEffect(() => {
+ const options =
+ (connector.config?.gmail_options as GmailIndexingOptions | undefined) ||
+ DEFAULT_GMAIL_OPTIONS;
+ setGmailOptions(options);
+ }, [connector.config]);
+
+ const updateConfig = (options: GmailIndexingOptions) => {
+ if (onConfigChange) {
+ onConfigChange({
+ ...connector.config,
+ gmail_options: options,
+ });
+ }
+ };
+
+ const handleOptionChange = (key: keyof GmailIndexingOptions, value: number | string) => {
+ const newOptions = { ...gmailOptions, [key]: value };
+ setGmailOptions(newOptions);
+ updateConfig(newOptions);
+ };
+
+ // Only show configuration if the connector is indexable
+ if (!isIndexable) {
+ return
;
+ }
+
+ return (
+
+ {/* Gmail Indexing Options */}
+
+
+
+
+
Gmail Indexing Options
+
+
+ Configure how emails are indexed from your Gmail account.
+
+
+
+ {/* Max emails to index */}
+
+
+
+
+ Max emails to index
+
+
+ Maximum number of emails to index per sync
+
+
+
+ handleOptionChange("max_emails", parseInt(value, 10))
+ }
+ >
+
+
+
+
+
+ 100 emails
+
+
+ 250 emails
+
+
+ 500 emails
+
+
+ 1000 emails
+
+
+ 2500 emails
+
+
+
+
+
+
+ {/* Label filter */}
+
+
+
+
+
+ Label filter (optional)
+
+
+
+ Only index emails with this label (e.g., "INBOX", "IMPORTANT", "work")
+
+
+
handleOptionChange("label_filter", e.target.value)}
+ placeholder="Enter label name..."
+ className="bg-slate-400/5 dark:bg-slate-400/5 border-slate-400/20 text-xs sm:text-sm"
+ />
+
+
+ {/* Search query */}
+
+
+
+ Search query (optional)
+
+
+ Gmail search query to filter emails (e.g., "from:boss@company.com", "has:attachment")
+
+
+
handleOptionChange("search_query", e.target.value)}
+ placeholder="Enter Gmail search query..."
+ className="bg-slate-400/5 dark:bg-slate-400/5 border-slate-400/20 text-xs sm:text-sm"
+ />
+
+
+
+ );
+};
+
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx
index 1a713a5a0..6b4d86b5a 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx
@@ -6,7 +6,9 @@ import { BaiduSearchApiConfig } from "./components/baidu-search-api-config";
import { BookStackConfig } from "./components/bookstack-config";
import { CirclebackConfig } from "./components/circleback-config";
import { ClickUpConfig } from "./components/clickup-config";
-import { ComposioConfig } from "./components/composio-config";
+import { ComposioCalendarConfig } from "./components/composio-calendar-config";
+import { ComposioDriveConfig } from "./components/composio-drive-config";
+import { ComposioGmailConfig } from "./components/composio-gmail-config";
import { ConfluenceConfig } from "./components/confluence-config";
import { DiscordConfig } from "./components/discord-config";
import { ElasticsearchConfig } from "./components/elasticsearch-config";
@@ -78,9 +80,11 @@ export function getConnectorConfigComponent(
case "OBSIDIAN_CONNECTOR":
return ObsidianConfig;
case "COMPOSIO_GOOGLE_DRIVE_CONNECTOR":
+ return ComposioDriveConfig;
case "COMPOSIO_GMAIL_CONNECTOR":
+ return ComposioGmailConfig;
case "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR":
- return ComposioConfig;
+ return ComposioCalendarConfig;
// OAuth connectors (Gmail, Calendar, Airtable, Notion) and others don't need special config UI
default:
return null;
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
index fbdffed7a..6b1a8c92b 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@@ -206,8 +206,9 @@ export const ConnectorEditView: FC = ({
{/* Date range selector and periodic sync - only shown for indexable connectors */}
{connector.is_indexable && (
<>
- {/* Date range selector - not shown for Google Drive, Webcrawler, or GitHub (indexes full repo snapshots) */}
+ {/* Date range selector - not shown for Google Drive (regular and Composio), Webcrawler, or GitHub (indexes full repo snapshots) */}
{connector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" &&
+ connector.connector_type !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
connector.connector_type !== "WEBCRAWLER_CONNECTOR" &&
connector.connector_type !== "GITHUB_CONNECTOR" && (
= ({
onEndDateChange={onEndDateChange}
allowFutureDates={
connector.connector_type === "GOOGLE_CALENDAR_CONNECTOR" ||
+ connector.connector_type === "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR" ||
connector.connector_type === "LUMA_CONNECTOR"
}
/>
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
index 68fc688c3..17995fdfa 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
@@ -9,11 +9,7 @@ import { getConnectorTypeDisplay } from "@/lib/connectors/utils";
import { cn } from "@/lib/utils";
import { DateRangeSelector } from "../../components/date-range-selector";
import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
-import {
- COMPOSIO_CONNECTORS,
- type IndexingConfigState,
- OAUTH_CONNECTORS,
-} from "../../constants/connector-constants";
+import type { IndexingConfigState } from "../../constants/connector-constants";
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
import { getConnectorConfigComponent } from "../index";
@@ -95,11 +91,6 @@ export const IndexingConfigurationView: FC = ({
};
}, [checkScrollState]);
- // Check both OAUTH_CONNECTORS and COMPOSIO_CONNECTORS
- const authConnector =
- OAUTH_CONNECTORS.find((c) => c.connectorType === connector?.connector_type) ||
- COMPOSIO_CONNECTORS.find((c) => c.connectorType === connector?.connector_type);
-
return (
{/* Fixed Header */}
@@ -158,8 +149,9 @@ export const IndexingConfigurationView: FC
= ({
{/* Date range selector and periodic sync - only shown for indexable connectors */}
{connector?.is_indexable && (
<>
- {/* Date range selector - not shown for Google Drive, Webcrawler, or GitHub (indexes full repo snapshots) */}
+ {/* Date range selector - not shown for Google Drive (regular and Composio), Webcrawler, or GitHub (indexes full repo snapshots) */}
{config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
+ config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
config.connectorType !== "WEBCRAWLER_CONNECTOR" &&
config.connectorType !== "GITHUB_CONNECTOR" && (
= ({
onEndDateChange={onEndDateChange}
allowFutureDates={
config.connectorType === "GOOGLE_CALENDAR_CONNECTOR" ||
+ config.connectorType === "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR" ||
config.connectorType === "LUMA_CONNECTOR"
}
/>
)}
- {/* Periodic sync - not shown for Google Drive */}
- {config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" && (
+ {/* Periodic sync - not shown for Google Drive (regular and Composio) */}
+ {config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
+ config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" && (
{
if (
params.success === "true" &&
- params.connector &&
searchSpaceId &&
params.modal === "connectors"
) {
- const oauthConnector = OAUTH_CONNECTORS.find((c) => c.id === params.connector);
- if (oauthConnector) {
- refetchAllConnectors().then((result) => {
- if (!result.data) return;
+ refetchAllConnectors().then((result) => {
+ if (!result.data) return;
- let newConnector: SearchSourceConnector | undefined;
- if (params.connectorId) {
- const connectorId = parseInt(params.connectorId, 10);
- newConnector = result.data.find((c: SearchSourceConnector) => c.id === connectorId);
- } else {
+ let newConnector: SearchSourceConnector | undefined;
+ let oauthConnector:
+ | (typeof OAUTH_CONNECTORS)[number]
+ | (typeof COMPOSIO_CONNECTORS)[number]
+ | undefined;
+
+ // First, try to find connector by connectorId if provided
+ if (params.connectorId) {
+ const connectorId = parseInt(params.connectorId, 10);
+ newConnector = result.data.find((c: SearchSourceConnector) => c.id === connectorId);
+
+ // If we found the connector, find the matching OAuth/Composio connector by type
+ if (newConnector) {
+ oauthConnector =
+ OAUTH_CONNECTORS.find(
+ (c) => c.connectorType === newConnector!.connector_type
+ ) ||
+ COMPOSIO_CONNECTORS.find(
+ (c) => c.connectorType === newConnector!.connector_type
+ );
+ }
+ }
+
+ // If we don't have a connector yet, try to find by connector param
+ if (!newConnector && params.connector) {
+ oauthConnector =
+ OAUTH_CONNECTORS.find((c) => c.id === params.connector) ||
+ COMPOSIO_CONNECTORS.find((c) => c.id === params.connector);
+
+ if (oauthConnector) {
newConnector = result.data.find(
- (c: SearchSourceConnector) => c.connector_type === oauthConnector.connectorType
+ (c: SearchSourceConnector) => c.connector_type === oauthConnector!.connectorType
);
}
+ }
- if (newConnector) {
- const connectorValidation = searchSourceConnector.safeParse(newConnector);
- if (connectorValidation.success) {
- // Track connector connected event for OAuth connectors
- trackConnectorConnected(
- Number(searchSpaceId),
- oauthConnector.connectorType,
- newConnector.id
- );
+ if (newConnector && oauthConnector) {
+ const connectorValidation = searchSourceConnector.safeParse(newConnector);
+ if (connectorValidation.success) {
+ // Track connector connected event for OAuth/Composio connectors
+ trackConnectorConnected(
+ Number(searchSpaceId),
+ oauthConnector.connectorType,
+ newConnector.id
+ );
- const config = validateIndexingConfigState({
- connectorType: oauthConnector.connectorType,
- connectorId: newConnector.id,
- connectorTitle: oauthConnector.title,
- });
- setIndexingConfig(config);
- setIndexingConnector(newConnector);
- setIndexingConnectorConfig(newConnector.config);
- setIsOpen(true);
- const url = new URL(window.location.href);
- url.searchParams.delete("success");
- url.searchParams.set("connectorId", newConnector.id.toString());
- url.searchParams.set("view", "configure");
- window.history.replaceState({}, "", url.toString());
- } else {
- console.warn("Invalid connector data after OAuth:", connectorValidation.error);
- toast.error("Failed to validate connector data");
- }
+ const config = validateIndexingConfigState({
+ connectorType: oauthConnector.connectorType,
+ connectorId: newConnector.id,
+ connectorTitle: oauthConnector.title,
+ });
+ setIndexingConfig(config);
+ setIndexingConnector(newConnector);
+ setIndexingConnectorConfig(newConnector.config);
+ setIsOpen(true);
+ const url = new URL(window.location.href);
+ url.searchParams.delete("success");
+ url.searchParams.set("connectorId", newConnector.id.toString());
+ url.searchParams.set("view", "configure");
+ window.history.replaceState({}, "", url.toString());
+ } else {
+ console.warn("Invalid connector data after OAuth:", connectorValidation.error);
+ toast.error("Failed to validate connector data");
}
- });
- }
+ }
+ });
}
} catch (error) {
// Invalid query params - log but don't crash
@@ -863,9 +885,10 @@ export const useConnectorDialog = () => {
async (refreshConnectors: () => void) => {
if (!indexingConfig || !searchSpaceId) return;
- // Validate date range (skip for Google Drive and Webcrawler)
+ // Validate date range (skip for Google Drive, Composio Drive, and Webcrawler)
if (
indexingConfig.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
+ indexingConfig.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
indexingConfig.connectorType !== "WEBCRAWLER_CONNECTOR"
) {
const dateRangeValidation = dateRangeSchema.safeParse({ startDate, endDate });
@@ -910,8 +933,12 @@ export const useConnectorDialog = () => {
});
}
- // Handle Google Drive folder selection
- if (indexingConfig.connectorType === "GOOGLE_DRIVE_CONNECTOR" && indexingConnectorConfig) {
+ // Handle Google Drive folder selection (regular and Composio)
+ if (
+ (indexingConfig.connectorType === "GOOGLE_DRIVE_CONNECTOR" ||
+ indexingConfig.connectorType === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR") &&
+ indexingConnectorConfig
+ ) {
const selectedFolders = indexingConnectorConfig.selected_folders as
| Array<{ id: string; name: string }>
| undefined;
diff --git a/surfsense_web/lib/connectors/utils.ts b/surfsense_web/lib/connectors/utils.ts
index 34721a6aa..0ca1c1ea9 100644
--- a/surfsense_web/lib/connectors/utils.ts
+++ b/surfsense_web/lib/connectors/utils.ts
@@ -16,6 +16,9 @@ export const getConnectorTypeDisplay = (type: string): string => {
GOOGLE_CALENDAR_CONNECTOR: "Google Calendar",
GOOGLE_GMAIL_CONNECTOR: "Google Gmail",
GOOGLE_DRIVE_CONNECTOR: "Google Drive",
+ COMPOSIO_GOOGLE_DRIVE_CONNECTOR: "Google Drive",
+ COMPOSIO_GMAIL_CONNECTOR: "Gmail",
+ COMPOSIO_GOOGLE_CALENDAR_CONNECTOR: "Google Calendar",
AIRTABLE_CONNECTOR: "Airtable",
LUMA_CONNECTOR: "Luma",
ELASTICSEARCH_CONNECTOR: "Elasticsearch",
From 12f45e1bd3a1d9c47b1543caf37558d76dbdec77 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 20:19:04 +0530
Subject: [PATCH 13/51] feat: streamline Composio connector configurations and
enhance UI interactions
- Refactored Composio connector configuration components to improve modularity and maintainability.
- Simplified the ComposioCalendarConfig, ComposioGmailConfig, and ComposioDriveConfig components by removing unnecessary state management and UI elements.
- Added functionality to remove selected folders and files in the Google Drive and Composio Drive configurations, enhancing user experience.
- Updated connector display names for better clarity in the UI.
- Improved the overall structure of the connector edit view for better readability and usability.
---
.../app/routes/composio_routes.py | 7 +-
.../assistant-ui/connector-popup.tsx | 3 +-
.../components/composio-calendar-config.tsx | 209 +-----------------
.../components/composio-drive-config.tsx | 44 +++-
.../components/composio-gmail-config.tsx | 163 +-------------
.../components/google-drive-config.tsx | 44 +++-
.../views/connector-edit-view.tsx | 3 +-
.../tabs/active-connectors-tab.tsx | 5 +-
8 files changed, 88 insertions(+), 390 deletions(-)
diff --git a/surfsense_backend/app/routes/composio_routes.py b/surfsense_backend/app/routes/composio_routes.py
index 5ad2266b7..9e9b59f82 100644
--- a/surfsense_backend/app/routes/composio_routes.py
+++ b/surfsense_backend/app/routes/composio_routes.py
@@ -344,13 +344,16 @@ async def composio_callback(
try:
# Generate a unique, user-friendly connector name
- connector_name = await generate_unique_connector_name(
+ # Pass just toolkit_name (without "(Composio)") to avoid redundancy
+ base_name = await generate_unique_connector_name(
session,
connector_type,
space_id,
user_id,
- f"{toolkit_name} (Composio)",
+ toolkit_name,
)
+ # Append "(Composio)" suffix for identification
+ connector_name = f"{base_name} (Composio)"
db_connector = SearchSourceConnector(
name=connector_name,
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index a04e2a9fd..1ec8fad73 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -7,7 +7,7 @@ import type { FC } from "react";
import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
import { currentUserAtom } from "@/atoms/user/user-query.atoms";
import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
-import { Dialog, DialogContent } from "@/components/ui/dialog";
+import { Dialog, DialogContent, DialogTitle } from "@/components/ui/dialog";
import { Tabs, TabsContent } from "@/components/ui/tabs";
import type { SearchSourceConnector } from "@/contracts/types/connector.types";
import { useConnectorsElectric } from "@/hooks/use-connectors-electric";
@@ -185,6 +185,7 @@ export const ConnectorIndicator: FC = () => {
+ Manage Connectors
{/* YouTube Crawler View - shown when adding YouTube videos */}
{isYouTubeView && searchSpaceId ? (
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx
index 6e7a06073..ce5133a9d 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx
@@ -1,17 +1,6 @@
"use client";
-import { Calendar, Clock } from "lucide-react";
import type { FC } from "react";
-import { useEffect, useState } from "react";
-import { Label } from "@/components/ui/label";
-import {
- Select,
- SelectContent,
- SelectItem,
- SelectTrigger,
- SelectValue,
-} from "@/components/ui/select";
-import { Switch } from "@/components/ui/switch";
import type { SearchSourceConnector } from "@/contracts/types/connector.types";
interface ComposioCalendarConfigProps {
@@ -20,201 +9,7 @@ interface ComposioCalendarConfigProps {
onNameChange?: (name: string) => void;
}
-interface CalendarIndexingOptions {
- max_events: number;
- include_recurring: boolean;
- include_past_events: boolean;
- days_ahead: number;
-}
-
-const DEFAULT_CALENDAR_OPTIONS: CalendarIndexingOptions = {
- max_events: 500,
- include_recurring: true,
- include_past_events: true,
- days_ahead: 365,
-};
-
-export const ComposioCalendarConfig: FC = ({ connector, onConfigChange }) => {
- const isIndexable = connector.config?.is_indexable as boolean;
-
- // Initialize with existing options from connector config
- const existingOptions =
- (connector.config?.calendar_options as CalendarIndexingOptions | undefined) || DEFAULT_CALENDAR_OPTIONS;
-
- const [calendarOptions, setCalendarOptions] = useState(existingOptions);
-
- // Update options when connector config changes
- useEffect(() => {
- const options =
- (connector.config?.calendar_options as CalendarIndexingOptions | undefined) ||
- DEFAULT_CALENDAR_OPTIONS;
- setCalendarOptions(options);
- }, [connector.config]);
-
- const updateConfig = (options: CalendarIndexingOptions) => {
- if (onConfigChange) {
- onConfigChange({
- ...connector.config,
- calendar_options: options,
- });
- }
- };
-
- const handleOptionChange = (key: keyof CalendarIndexingOptions, value: number | boolean) => {
- const newOptions = { ...calendarOptions, [key]: value };
- setCalendarOptions(newOptions);
- updateConfig(newOptions);
- };
-
- // Only show configuration if the connector is indexable
- if (!isIndexable) {
- return
;
- }
-
- return (
-
- {/* Calendar Indexing Options */}
-
-
-
-
-
Calendar Indexing Options
-
-
- Configure how events are indexed from your Google Calendar.
-
-
-
- {/* Max events to index */}
-
-
-
-
- Max events to index
-
-
- Maximum number of events to index per sync
-
-
-
- handleOptionChange("max_events", parseInt(value, 10))
- }
- >
-
-
-
-
-
- 100 events
-
-
- 250 events
-
-
- 500 events
-
-
- 1000 events
-
-
- 2500 events
-
-
-
-
-
-
- {/* Days ahead */}
-
-
-
-
-
-
- Future events range
-
-
-
- How far ahead to index future events
-
-
-
- handleOptionChange("days_ahead", parseInt(value, 10))
- }
- >
-
-
-
-
-
- 30 days
-
-
- 90 days
-
-
- 180 days
-
-
- 1 year
-
-
- 2 years
-
-
-
-
-
-
- {/* Include recurring events toggle */}
-
-
-
- Include recurring events
-
-
- Index individual instances of recurring events
-
-
-
- handleOptionChange("include_recurring", checked)
- }
- />
-
-
- {/* Include past events toggle */}
-
-
-
- Include past events
-
-
- Index events from before the selected date range
-
-
-
- handleOptionChange("include_past_events", checked)
- }
- />
-
-
-
- );
+export const ComposioCalendarConfig: FC = () => {
+ return
;
};
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx
index 755b91a5a..0ab0869ff 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx
@@ -1,6 +1,6 @@
"use client";
-import { File, FileSpreadsheet, FileText, FolderClosed, Image, Presentation } from "lucide-react";
+import { File, FileSpreadsheet, FileText, FolderClosed, Image, Presentation, X } from "lucide-react";
import type { FC } from "react";
import { useEffect, useState } from "react";
import { ComposioDriveFolderTree } from "@/components/connectors/composio-drive-folder-tree";
@@ -143,6 +143,18 @@ export const ComposioDriveConfig: FC = ({ connector, o
updateConfig(selectedFolders, selectedFiles, newOptions);
};
+ const handleRemoveFolder = (folderId: string) => {
+ const newFolders = selectedFolders.filter((folder) => folder.id !== folderId);
+ setSelectedFolders(newFolders);
+ updateConfig(newFolders, selectedFiles, indexingOptions);
+ };
+
+ const handleRemoveFile = (fileId: string) => {
+ const newFiles = selectedFiles.filter((file) => file.id !== fileId);
+ setSelectedFiles(newFiles);
+ updateConfig(selectedFolders, newFiles, indexingOptions);
+ };
+
const totalSelected = selectedFolders.length + selectedFiles.length;
// Only show configuration if the connector is indexable
@@ -176,29 +188,45 @@ export const ComposioDriveConfig: FC = ({ connector, o
`${selectedFiles.length} file${selectedFiles.length > 1 ? "s" : ""}`
);
}
- return parts.length > 0 ? `(${parts.join(" ")})` : "";
+ return parts.length > 0 ? `(${parts.join(", ")})` : "";
})()}
{selectedFolders.map((folder) => (
-
- {folder.name}
-
+
{folder.name}
+
handleRemoveFolder(folder.id)}
+ className="shrink-0 p-0.5 hover:bg-muted-foreground/20 rounded transition-colors"
+ aria-label={`Remove ${folder.name}`}
+ >
+
+
+
))}
{selectedFiles.map((file) => (
-
{getFileIconFromName(file.name)}
- {file.name}
-
+ {file.name}
+ handleRemoveFile(file.id)}
+ className="shrink-0 p-0.5 hover:bg-muted-foreground/20 rounded transition-colors"
+ aria-label={`Remove ${file.name}`}
+ >
+
+
+
))}
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx
index 963753ab3..4664e3e64 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx
@@ -1,17 +1,6 @@
"use client";
-import { Mail, Tag } from "lucide-react";
import type { FC } from "react";
-import { useEffect, useState } from "react";
-import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
-import {
- Select,
- SelectContent,
- SelectItem,
- SelectTrigger,
- SelectValue,
-} from "@/components/ui/select";
import type { SearchSourceConnector } from "@/contracts/types/connector.types";
interface ComposioGmailConfigProps {
@@ -20,155 +9,7 @@ interface ComposioGmailConfigProps {
onNameChange?: (name: string) => void;
}
-interface GmailIndexingOptions {
- max_emails: number;
- label_filter: string;
- search_query: string;
-}
-
-const DEFAULT_GMAIL_OPTIONS: GmailIndexingOptions = {
- max_emails: 500,
- label_filter: "",
- search_query: "",
-};
-
-export const ComposioGmailConfig: FC = ({ connector, onConfigChange }) => {
- const isIndexable = connector.config?.is_indexable as boolean;
-
- // Initialize with existing options from connector config
- const existingOptions =
- (connector.config?.gmail_options as GmailIndexingOptions | undefined) || DEFAULT_GMAIL_OPTIONS;
-
- const [gmailOptions, setGmailOptions] = useState(existingOptions);
-
- // Update options when connector config changes
- useEffect(() => {
- const options =
- (connector.config?.gmail_options as GmailIndexingOptions | undefined) ||
- DEFAULT_GMAIL_OPTIONS;
- setGmailOptions(options);
- }, [connector.config]);
-
- const updateConfig = (options: GmailIndexingOptions) => {
- if (onConfigChange) {
- onConfigChange({
- ...connector.config,
- gmail_options: options,
- });
- }
- };
-
- const handleOptionChange = (key: keyof GmailIndexingOptions, value: number | string) => {
- const newOptions = { ...gmailOptions, [key]: value };
- setGmailOptions(newOptions);
- updateConfig(newOptions);
- };
-
- // Only show configuration if the connector is indexable
- if (!isIndexable) {
- return
;
- }
-
- return (
-
- {/* Gmail Indexing Options */}
-
-
-
-
-
Gmail Indexing Options
-
-
- Configure how emails are indexed from your Gmail account.
-
-
-
- {/* Max emails to index */}
-
-
-
-
- Max emails to index
-
-
- Maximum number of emails to index per sync
-
-
-
- handleOptionChange("max_emails", parseInt(value, 10))
- }
- >
-
-
-
-
-
- 100 emails
-
-
- 250 emails
-
-
- 500 emails
-
-
- 1000 emails
-
-
- 2500 emails
-
-
-
-
-
-
- {/* Label filter */}
-
-
-
-
-
- Label filter (optional)
-
-
-
- Only index emails with this label (e.g., "INBOX", "IMPORTANT", "work")
-
-
-
handleOptionChange("label_filter", e.target.value)}
- placeholder="Enter label name..."
- className="bg-slate-400/5 dark:bg-slate-400/5 border-slate-400/20 text-xs sm:text-sm"
- />
-
-
- {/* Search query */}
-
-
-
- Search query (optional)
-
-
- Gmail search query to filter emails (e.g., "from:boss@company.com", "has:attachment")
-
-
-
handleOptionChange("search_query", e.target.value)}
- placeholder="Enter Gmail search query..."
- className="bg-slate-400/5 dark:bg-slate-400/5 border-slate-400/20 text-xs sm:text-sm"
- />
-
-
-
- );
+export const ComposioGmailConfig: FC = () => {
+ return
;
};
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx
index 17f4a49a5..b6cfb39ae 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx
@@ -1,6 +1,6 @@
"use client";
-import { File, FileSpreadsheet, FileText, FolderClosed, Image, Presentation } from "lucide-react";
+import { File, FileSpreadsheet, FileText, FolderClosed, Image, Presentation, X } from "lucide-react";
import type { FC } from "react";
import { useEffect, useState } from "react";
import { GoogleDriveFolderTree } from "@/components/connectors/google-drive-folder-tree";
@@ -135,6 +135,18 @@ export const GoogleDriveConfig: FC = ({ connector, onConfi
updateConfig(selectedFolders, selectedFiles, newOptions);
};
+ const handleRemoveFolder = (folderId: string) => {
+ const newFolders = selectedFolders.filter((folder) => folder.id !== folderId);
+ setSelectedFolders(newFolders);
+ updateConfig(newFolders, selectedFiles, indexingOptions);
+ };
+
+ const handleRemoveFile = (fileId: string) => {
+ const newFiles = selectedFiles.filter((file) => file.id !== fileId);
+ setSelectedFiles(newFiles);
+ updateConfig(selectedFolders, newFiles, indexingOptions);
+ };
+
const totalSelected = selectedFolders.length + selectedFiles.length;
return (
@@ -161,29 +173,45 @@ export const GoogleDriveConfig: FC = ({ connector, onConfi
if (selectedFiles.length > 0) {
parts.push(`${selectedFiles.length} file${selectedFiles.length > 1 ? "s" : ""}`);
}
- return parts.length > 0 ? `(${parts.join(" ")})` : "";
+ return parts.length > 0 ? `(${parts.join(", ")})` : "";
})()}
{selectedFolders.map((folder) => (
-
- {folder.name}
-
+
{folder.name}
+
handleRemoveFolder(folder.id)}
+ className="shrink-0 p-0.5 hover:bg-muted-foreground/20 rounded transition-colors"
+ aria-label={`Remove ${folder.name}`}
+ >
+
+
+
))}
{selectedFiles.map((file) => (
-
{getFileIconFromName(file.name)}
- {file.name}
-
+ {file.name}
+ handleRemoveFile(file.id)}
+ className="shrink-0 p-0.5 hover:bg-muted-foreground/20 rounded transition-colors"
+ aria-label={`Remove ${file.name}`}
+ >
+
+
+
))}
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
index 6b1a8c92b..8951336c5 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@@ -9,6 +9,7 @@ import { cn } from "@/lib/utils";
import { DateRangeSelector } from "../../components/date-range-selector";
import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
import { getConnectorConfigComponent } from "../index";
+import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
interface ConnectorEditViewProps {
connector: SearchSourceConnector;
@@ -151,7 +152,7 @@ export const ConnectorEditView: FC = ({
- {connector.name}
+ {getConnectorDisplayName(connector.name)}
Manage your connector settings and sync configuration
diff --git a/surfsense_web/components/assistant-ui/connector-popup/tabs/active-connectors-tab.tsx b/surfsense_web/components/assistant-ui/connector-popup/tabs/active-connectors-tab.tsx
index e45888bb1..2067ca9ad 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/tabs/active-connectors-tab.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/tabs/active-connectors-tab.tsx
@@ -15,6 +15,7 @@ import { connectorsApiService } from "@/lib/apis/connectors-api.service";
import { cn } from "@/lib/utils";
import { COMPOSIO_CONNECTORS, OAUTH_CONNECTORS } from "../constants/connector-constants";
import { getDocumentCountForConnector } from "../utils/connector-document-mapping";
+import { getConnectorDisplayName } from "./all-connectors-tab";
interface ActiveConnectorsTabProps {
searchQuery: string;
@@ -263,8 +264,8 @@ export const ActiveConnectorsTab: FC = ({
-
- {connector.name}
+
+ {getConnectorDisplayName(connector.name)}
{isIndexing ? (
From 08f16b43d72edff44bcd4621a43cad79a61ed103 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 20:36:00 +0530
Subject: [PATCH 14/51] feat: enhance Composio connector naming logic and
improve UI focus
- Updated the Composio connector naming logic to dynamically generate user-friendly names based on existing connectors.
- Introduced new utility functions for counting connectors and retrieving base names for specific connector types.
- Enhanced the UI components to improve accessibility and focus management, ensuring a better user experience when interacting with connector dialogs.
---
.../app/routes/composio_routes.py | 27 +++++++++++--------
.../app/utils/connector_naming.py | 3 +++
.../assistant-ui/connector-popup.tsx | 2 +-
surfsense_web/components/ui/dialog.tsx | 2 +-
4 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/surfsense_backend/app/routes/composio_routes.py b/surfsense_backend/app/routes/composio_routes.py
index 9e9b59f82..14ef9efcf 100644
--- a/surfsense_backend/app/routes/composio_routes.py
+++ b/surfsense_backend/app/routes/composio_routes.py
@@ -35,7 +35,10 @@ from app.services.composio_service import (
ComposioService,
)
from app.users import current_active_user
-from app.utils.connector_naming import generate_unique_connector_name
+from app.utils.connector_naming import (
+ count_connectors_of_type,
+ get_base_name_for_type,
+)
from app.utils.oauth_security import OAuthStateManager
# Note: We no longer use check_duplicate_connector for Composio connectors because
@@ -343,17 +346,19 @@ async def composio_callback(
)
try:
- # Generate a unique, user-friendly connector name
- # Pass just toolkit_name (without "(Composio)") to avoid redundancy
- base_name = await generate_unique_connector_name(
- session,
- connector_type,
- space_id,
- user_id,
- toolkit_name,
+ # Count existing connectors of this type to determine the number
+ count = await count_connectors_of_type(
+ session, connector_type, space_id, user_id
)
- # Append "(Composio)" suffix for identification
- connector_name = f"{base_name} (Composio)"
+
+ # Generate base name (e.g., "Gmail", "Google Drive")
+ base_name = get_base_name_for_type(connector_type)
+
+ # Format: "Gmail (Composio) 1", "Gmail (Composio) 2", etc.
+ if count == 0:
+ connector_name = f"{base_name} (Composio) 1"
+ else:
+ connector_name = f"{base_name} (Composio) {count + 1}"
db_connector = SearchSourceConnector(
name=connector_name,
diff --git a/surfsense_backend/app/utils/connector_naming.py b/surfsense_backend/app/utils/connector_naming.py
index a2b748a3a..7d3efc001 100644
--- a/surfsense_backend/app/utils/connector_naming.py
+++ b/surfsense_backend/app/utils/connector_naming.py
@@ -28,6 +28,9 @@ BASE_NAME_FOR_TYPE = {
SearchSourceConnectorType.CONFLUENCE_CONNECTOR: "Confluence",
SearchSourceConnectorType.AIRTABLE_CONNECTOR: "Airtable",
SearchSourceConnectorType.MCP_CONNECTOR: "Model Context Protocol (MCP)",
+ SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR: "Gmail",
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR: "Google Drive",
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR: "Google Calendar",
}
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index 1ec8fad73..e656c06d6 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -184,7 +184,7 @@ export const ConnectorIndicator: FC = () => {
)}
-
+
Manage Connectors
{/* YouTube Crawler View - shown when adding YouTube videos */}
{isYouTubeView && searchSpaceId ? (
diff --git a/surfsense_web/components/ui/dialog.tsx b/surfsense_web/components/ui/dialog.tsx
index d04d76520..f3fa856d3 100644
--- a/surfsense_web/components/ui/dialog.tsx
+++ b/surfsense_web/components/ui/dialog.tsx
@@ -38,7 +38,7 @@ const DialogContent = React.forwardRef<
Date: Fri, 23 Jan 2026 10:48:43 -0500
Subject: [PATCH 15/51] Reworded README.md around LLM compatibility (Based on
discussion with Sid)
---
README.md | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 7f50b924c..0c5f06029 100644
--- a/README.md
+++ b/README.md
@@ -52,8 +52,10 @@ https://github.com/user-attachments/assets/a0a16566-6967-4374-ac51-9b3e07fbecd7
- Interact in Natural Language and get cited answers.
### 📄 **Cited Answers**
- Get Cited answers just like Perplexity.
+### 🧩 **Universal Compatibility**
+- Connect virtually any inference provider via the OpenAI spec and LiteLLM.
### 🔔 **Privacy & Local LLM Support**
-- Works Flawlessly with Ollama local LLMs.
+- Works Flawlessly with local LLMs like vLLM and Ollama.
### 🏠 **Self Hostable**
- Open source and easy to deploy locally.
### 👥 **Team Collaboration with RBAC**
@@ -61,6 +63,7 @@ https://github.com/user-attachments/assets/a0a16566-6967-4374-ac51-9b3e07fbecd7
- Invite team members with customizable roles (Owner, Admin, Editor, Viewer)
- Granular permissions for documents, chats, connectors, and settings
- Share knowledge bases securely within your organization
+- Team chats update in real-time and "Chat about the chat" in comment threads
### 🎙️ Podcasts
- Blazingly fast podcast generation agent. (Creates a 3-minute podcast in under 20 seconds.)
- Convert your chat conversations into engaging audio content
@@ -237,6 +240,8 @@ Before self-hosting installation, make sure to complete the [prerequisite setup
### **BackEnd**
+- **LiteLLM**: Universal LLM integration supporting 100+ models (OpenAI, Anthropic, Ollama, etc.)
+
- **FastAPI**: Modern, fast web framework for building APIs with Python
- **PostgreSQL with pgvector**: Database with vector search capabilities for similarity searches
@@ -253,8 +258,6 @@ Before self-hosting installation, make sure to complete the [prerequisite setup
- **LangChain**: Framework for developing AI-powered applications.
-- **LiteLLM**: Universal LLM integration supporting 100+ models (OpenAI, Anthropic, Ollama, etc.)
-
- **Rerankers**: Advanced result ranking for improved search relevance
- **Hybrid Search**: Combines vector similarity and full-text search for optimal results using Reciprocal Rank Fusion (RRF)
From d20bb385b5439abc1c1a0dd4e73c275970c68bea Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 23:03:29 +0530
Subject: [PATCH 16/51] feat: enhance date handling and indexing logic across
connectors
- Added normalization for "undefined" strings to None in date parameters to prevent parsing errors.
- Improved date range validation to ensure start_date is strictly before end_date, adjusting end_date if necessary.
- Updated Google Calendar and Composio connector indexing logic to handle duplicate content more effectively, logging warnings for skipped events.
- Enhanced error handling during final commits to manage integrity errors gracefully.
- Refactored date handling in various connector indexers for consistency and reliability.
---
.../app/connectors/google_gmail_connector.py | 7 +++++
.../routes/search_source_connectors_routes.py | 26 ++++++++++++++-----
.../app/tasks/connector_indexers/base.py | 7 +++++
.../google_calendar_indexer.py | 19 ++++++++++++++
.../tasks/connector_indexers/luma_indexer.py | 7 +++++
.../assistant-ui/connector-popup.tsx | 8 +++++-
.../views/connector-edit-view.tsx | 3 +--
.../views/indexing-configuration-view.tsx | 3 +--
.../hooks/use-connector-dialog.ts | 16 ++++++++++--
9 files changed, 83 insertions(+), 13 deletions(-)
diff --git a/surfsense_backend/app/connectors/google_gmail_connector.py b/surfsense_backend/app/connectors/google_gmail_connector.py
index 8c0e4690e..c86a96413 100644
--- a/surfsense_backend/app/connectors/google_gmail_connector.py
+++ b/surfsense_backend/app/connectors/google_gmail_connector.py
@@ -285,6 +285,13 @@ class GoogleGmailConnector:
try:
from datetime import datetime, timedelta
+ # Normalize date values - handle "undefined" strings from frontend
+ # This prevents "time data 'undefined' does not match format" errors
+ if start_date == "undefined" or start_date == "":
+ start_date = None
+ if end_date == "undefined" or end_date == "":
+ end_date = None
+
# Build date query
query_parts = []
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 82f452c61..928327d9a 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -644,20 +644,30 @@ async def index_connector_content(
# Handle different connector types
response_message = ""
- today_str = datetime.now().strftime("%Y-%m-%d")
+ # Use UTC for consistency with last_indexed_at storage
+ today_str = datetime.now(UTC).strftime("%Y-%m-%d")
# Determine the actual date range to use
if start_date is None:
# Use last_indexed_at or default to 365 days ago
if connector.last_indexed_at:
- today = datetime.now().date()
- if connector.last_indexed_at.date() == today:
+ # Convert last_indexed_at to timezone-naive for comparison (like calculate_date_range does)
+ last_indexed_naive = (
+ connector.last_indexed_at.replace(tzinfo=None)
+ if connector.last_indexed_at.tzinfo
+ else connector.last_indexed_at
+ )
+ # Use UTC for "today" to match how last_indexed_at is stored
+ today_utc = datetime.now(UTC).replace(tzinfo=None).date()
+ last_indexed_date = last_indexed_naive.date()
+
+ if last_indexed_date == today_utc:
# If last indexed today, go back 1 day to ensure we don't miss anything
- indexing_from = (today - timedelta(days=1)).strftime("%Y-%m-%d")
+ indexing_from = (today_utc - timedelta(days=1)).strftime("%Y-%m-%d")
else:
- indexing_from = connector.last_indexed_at.strftime("%Y-%m-%d")
+ indexing_from = last_indexed_naive.strftime("%Y-%m-%d")
else:
- indexing_from = (datetime.now() - timedelta(days=365)).strftime(
+ indexing_from = (datetime.now(UTC).replace(tzinfo=None) - timedelta(days=365)).strftime(
"%Y-%m-%d"
)
else:
@@ -666,6 +676,7 @@ async def index_connector_content(
# For calendar connectors, default to today but allow future dates if explicitly provided
if connector.connector_type in [
SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
SearchSourceConnectorType.LUMA_CONNECTOR,
]:
# Default to today if no end_date provided (users can manually select future dates)
@@ -977,6 +988,9 @@ async def index_connector_content(
index_composio_connector_task,
)
+ # For Composio Gmail and Calendar, use the same date calculation logic as normal connectors
+ # This ensures consistent behavior and uses last_indexed_at to reduce API calls
+ # (includes special case: if indexed today, go back 1 day to avoid missing data)
logger.info(
f"Triggering Composio connector indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
)
diff --git a/surfsense_backend/app/tasks/connector_indexers/base.py b/surfsense_backend/app/tasks/connector_indexers/base.py
index b9a99808e..b390937f0 100644
--- a/surfsense_backend/app/tasks/connector_indexers/base.py
+++ b/surfsense_backend/app/tasks/connector_indexers/base.py
@@ -112,6 +112,13 @@ def calculate_date_range(
Returns:
Tuple of (start_date_str, end_date_str)
"""
+ # Normalize "undefined" strings to None (from frontend)
+ # This prevents parsing errors and ensures consistent behavior across all indexers
+ if start_date == "undefined" or start_date == "":
+ start_date = None
+ if end_date == "undefined" or end_date == "":
+ end_date = None
+
if start_date is not None and end_date is not None:
return start_date, end_date
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
index 09bb8de4b..7787560fa 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@@ -4,6 +4,8 @@ Google Calendar connector indexer.
from datetime import datetime, timedelta
+import pytz
+from dateutil.parser import isoparse
from google.oauth2.credentials import Credentials
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession
@@ -205,6 +207,23 @@ async def index_google_calendar_events(
# Use provided dates (including future dates)
start_date_str = start_date
end_date_str = end_date
+
+ # If start_date and end_date are the same, adjust end_date to be one day later
+ # to ensure valid date range (start_date must be strictly before end_date)
+ if start_date_str == end_date_str:
+ # Parse the date and add one day to ensure valid range
+ dt = isoparse(end_date_str)
+ if dt.tzinfo is None:
+ dt = dt.replace(tzinfo=pytz.UTC)
+ else:
+ dt = dt.astimezone(pytz.UTC)
+ # Add one day to end_date to make it strictly after start_date
+ dt_end = dt + timedelta(days=1)
+ end_date_str = dt_end.strftime("%Y-%m-%d")
+ logger.info(
+ f"Adjusted end_date from {end_date} to {end_date_str} "
+ f"to ensure valid date range (start_date must be strictly before end_date)"
+ )
await task_logger.log_task_progress(
log_entry,
diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
index 91f81ac20..0d7a979be 100644
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@@ -116,6 +116,13 @@ async def index_luma_events(
luma_client = LumaConnector(api_key=api_key)
+ # Handle 'undefined' string from frontend (treat as None)
+ # This prevents "time data 'undefined' does not match format" errors
+ if start_date == "undefined" or start_date == "":
+ start_date = None
+ if end_date == "undefined" or end_date == "":
+ end_date = None
+
# Calculate date range
# For calendar connectors, allow future dates to index upcoming events
if start_date is None or end_date is None:
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index e656c06d6..68a548409 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -259,7 +259,13 @@ export const ConnectorIndicator: FC = () => {
editingConnector.connector_type !== "GOOGLE_DRIVE_CONNECTOR"
? () => {
startIndexing(editingConnector.id);
- handleQuickIndexConnector(editingConnector.id, editingConnector.connector_type, stopIndexing);
+ handleQuickIndexConnector(
+ editingConnector.id,
+ editingConnector.connector_type,
+ stopIndexing,
+ startDate,
+ endDate
+ );
}
: undefined
}
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
index 8951336c5..d12264fbd 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@@ -272,8 +272,7 @@ export const ConnectorEditView: FC = ({
Re-indexing runs in the background
- You can continue using SurfSense while we sync your data. Check the Active tab
- to see progress.
+ You can continue using SurfSense while we sync your data. Check inbox for updates.
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
index 17995fdfa..019e6b37f 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
@@ -189,8 +189,7 @@ export const IndexingConfigurationView: FC = ({
Indexing runs in the background
- You can continue using SurfSense while we sync your data. Check the Active tab
- to see progress.
+ You can continue using SurfSense while we sync your data. Check inbox for updates.
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index 3e9e1d930..1bcbd4263 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -1400,9 +1400,15 @@ export const useConnectorDialog = () => {
[editingConnector, searchSpaceId, deleteConnector, router, cameFromMCPList]
);
- // Handle quick index (index without date picker, uses backend defaults)
+ // Handle quick index (index with selected date range, or backend defaults if none selected)
const handleQuickIndexConnector = useCallback(
- async (connectorId: number, connectorType?: string, stopIndexing?: (id: number) => void) => {
+ async (
+ connectorId: number,
+ connectorType?: string,
+ stopIndexing?: (id: number) => void,
+ startDate?: Date,
+ endDate?: Date
+ ) => {
if (!searchSpaceId) return;
// Track quick index clicked event
@@ -1411,10 +1417,16 @@ export const useConnectorDialog = () => {
}
try {
+ // Format dates if provided, otherwise pass undefined (backend will use defaults)
+ const startDateStr = startDate ? format(startDate, "yyyy-MM-dd") : undefined;
+ const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined;
+
await indexConnector({
connector_id: connectorId,
queryParams: {
search_space_id: searchSpaceId,
+ start_date: startDateStr,
+ end_date: endDateStr,
},
});
toast.success("Indexing started", {
From c48ba36fa47ccffb10f68a76231ab017321c5dbe Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 23:36:14 +0530
Subject: [PATCH 17/51] feat: improve indexing logic and duplicate handling in
connectors
- Enhanced Google Calendar and Composio connector indexing to track and log duplicate content, preventing re-indexing of already processed events.
- Implemented robust error handling during final commits to manage integrity errors gracefully, ensuring successful indexing despite potential duplicates.
- Updated notification service to differentiate between actual errors and warnings for duplicate content, improving user feedback.
- Refactored date handling to ensure valid date ranges and adjusted end dates when necessary for better indexing accuracy.
---
.../composio_google_calendar_connector.py | 59 +++++++++++++--
.../routes/search_source_connectors_routes.py | 72 +++++++++++++++----
.../app/services/notification_service.py | 28 ++++++--
.../google_calendar_indexer.py | 49 ++++++++++++-
.../views/connector-edit-view.tsx | 14 ++--
.../hooks/use-connector-dialog.ts | 11 ++-
6 files changed, 198 insertions(+), 35 deletions(-)
diff --git a/surfsense_backend/app/connectors/composio_google_calendar_connector.py b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
index ab8bde53c..3ac235848 100644
--- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@@ -18,7 +18,10 @@ from app.db import Document, DocumentType
from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService
-from app.tasks.connector_indexers.base import calculate_date_range
+from app.tasks.connector_indexers.base import (
+ calculate_date_range,
+ check_duplicate_document_by_hash,
+)
from app.utils.document_converters import (
create_document_chunks,
generate_content_hash,
@@ -256,6 +259,7 @@ async def index_composio_google_calendar(
documents_indexed = 0
documents_skipped = 0
+ duplicate_content_count = 0 # Track events skipped due to duplicate content_hash
for event in events:
try:
@@ -349,7 +353,25 @@ async def index_composio_google_calendar(
logger.info(
f"Committing batch: {documents_indexed} Google Calendar events processed so far"
)
- await session.commit()
+ await session.commit( )
+ continue
+
+ # Document doesn't exist by unique_identifier_hash
+ # Check if a document with the same content_hash exists (from standard connector)
+ with session.no_autoflush:
+ duplicate_by_content = await check_duplicate_document_by_hash(
+ session, content_hash
+ )
+
+ if duplicate_by_content:
+ # A document with the same content already exists (likely from standard connector)
+ logger.info(
+ f"Event {summary} already indexed by another connector "
+ f"(existing document ID: {duplicate_by_content.id}, "
+ f"type: {duplicate_by_content.document_type}). Skipping to avoid duplicate content."
+ )
+ duplicate_content_count += 1
+ documents_skipped += 1
continue
# Create new document
@@ -429,10 +451,28 @@ async def index_composio_google_calendar(
logger.info(
f"Final commit: Total {documents_indexed} Google Calendar events processed"
)
- await session.commit()
- logger.info(
- "Successfully committed all Composio Google Calendar document changes to database"
- )
+ try:
+ await session.commit()
+ logger.info(
+ "Successfully committed all Composio Google Calendar document changes to database"
+ )
+ except Exception as e:
+ # Handle any remaining integrity errors gracefully (race conditions, etc.)
+ if "duplicate key value violates unique constraint" in str(e).lower() or "uniqueviolationerror" in str(e).lower():
+ logger.warning(
+ f"Duplicate content_hash detected during final commit. "
+ f"This may occur if the same event was indexed by multiple connectors. "
+ f"Rolling back and continuing. Error: {e!s}"
+ )
+ await session.rollback()
+ # Don't fail the entire task - some documents may have been successfully indexed
+ else:
+ raise
+
+ # Build warning message if duplicates were found
+ warning_message = None
+ if duplicate_content_count > 0:
+ warning_message = f"{duplicate_content_count} skipped (duplicate)"
await task_logger.log_task_success(
log_entry,
@@ -440,10 +480,15 @@ async def index_composio_google_calendar(
{
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
+ "duplicate_content_count": duplicate_content_count,
},
)
- return documents_indexed, None
+ logger.info(
+ f"Composio Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped "
+ f"({duplicate_content_count} due to duplicate content from other connectors)"
+ )
+ return documents_indexed, warning_message
except Exception as e:
logger.error(
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 928327d9a..3b98d7d7c 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -22,6 +22,8 @@ import logging
from datetime import UTC, datetime, timedelta
from typing import Any
+import pytz
+from dateutil.parser import isoparse
from fastapi import APIRouter, Body, Depends, HTTPException, Query
from pydantic import BaseModel, Field, ValidationError
from sqlalchemy.exc import IntegrityError
@@ -681,6 +683,22 @@ async def index_connector_content(
]:
# Default to today if no end_date provided (users can manually select future dates)
indexing_to = today_str if end_date is None else end_date
+
+ # If start_date and end_date are the same, adjust end_date to be one day later
+ # to ensure valid date range (start_date must be strictly before end_date)
+ if indexing_from == indexing_to:
+ dt = isoparse(indexing_to)
+ if dt.tzinfo is None:
+ dt = dt.replace(tzinfo=pytz.UTC)
+ else:
+ dt = dt.astimezone(pytz.UTC)
+ # Add one day to end_date to make it strictly after start_date
+ dt_end = dt + timedelta(days=1)
+ indexing_to = dt_end.strftime("%Y-%m-%d")
+ logger.info(
+ f"Adjusted end_date from {end_date} to {indexing_to} "
+ f"to ensure valid date range (start_date must be strictly before end_date)"
+ )
else:
# For non-calendar connectors, cap at today
indexing_to = end_date if end_date else today_str
@@ -1231,20 +1249,48 @@ async def _run_indexing_with_notifications(
else:
# No new documents processed - check if this is an error or just no changes
if error_or_warning:
- # Actual failure
- logger.error(f"Indexing failed: {error_or_warning}")
- if notification:
- # Refresh notification to ensure it's not stale after indexing function commits
- await session.refresh(notification)
- await NotificationService.connector_indexing.notify_indexing_completed(
- session=session,
- notification=notification,
- indexed_count=0,
- error_message=error_or_warning,
+ # Check if this is a duplicate warning (success case) or an actual error
+ # Handle both normal and Composio calendar connectors
+ error_or_warning_lower = str(error_or_warning).lower() if error_or_warning else ""
+ is_duplicate_warning = "skipped (duplicate)" in error_or_warning_lower
+
+ if is_duplicate_warning:
+ # Duplicate warnings are success cases - sync worked, just found duplicates
+ logger.info(
+ f"Indexing completed successfully: {error_or_warning}"
)
- await (
- session.commit()
- ) # Commit to ensure Electric SQL syncs the notification update
+ # Still update timestamp so ElectricSQL syncs and clears "Syncing" UI
+ if update_timestamp_func:
+ await update_timestamp_func(session, connector_id)
+ await session.commit() # Commit timestamp update
+ if notification:
+ # Refresh notification to ensure it's not stale after timestamp update commit
+ await session.refresh(notification)
+ await NotificationService.connector_indexing.notify_indexing_completed(
+ session=session,
+ notification=notification,
+ indexed_count=0,
+ error_message=error_or_warning, # Pass as warning, not error
+ is_warning=True, # Flag to indicate this is a warning, not an error
+ )
+ await (
+ session.commit()
+ ) # Commit to ensure Electric SQL syncs the notification update
+ else:
+ # Actual failure
+ logger.error(f"Indexing failed: {error_or_warning}")
+ if notification:
+ # Refresh notification to ensure it's not stale after indexing function commits
+ await session.refresh(notification)
+ await NotificationService.connector_indexing.notify_indexing_completed(
+ session=session,
+ notification=notification,
+ indexed_count=0,
+ error_message=error_or_warning,
+ )
+ await (
+ session.commit()
+ ) # Commit to ensure Electric SQL syncs the notification update
else:
# Success - just no new documents to index (all skipped/unchanged)
logger.info(
diff --git a/surfsense_backend/app/services/notification_service.py b/surfsense_backend/app/services/notification_service.py
index 836daeb9e..9fcf807e7 100644
--- a/surfsense_backend/app/services/notification_service.py
+++ b/surfsense_backend/app/services/notification_service.py
@@ -335,6 +335,7 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
notification: Notification,
indexed_count: int,
error_message: str | None = None,
+ is_warning: bool = False,
) -> Notification:
"""
Update notification when connector indexing completes.
@@ -343,7 +344,8 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
session: Database session
notification: Notification to update
indexed_count: Total number of items indexed
- error_message: Error message if indexing failed (optional)
+ error_message: Error message if indexing failed, or warning message (optional)
+ is_warning: If True, treat error_message as a warning (success case) rather than an error
Returns:
Updated notification
@@ -352,10 +354,26 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
"connector_name", "Connector"
)
+ # If there's an error message but items were indexed, treat it as a warning (partial success)
+ # If is_warning is True, treat it as success even with 0 items (e.g., duplicates found)
+ # Otherwise, treat it as a failure
if error_message:
- title = f"Failed: {connector_name}"
- message = f"Sync failed: {error_message}"
- status = "failed"
+ if indexed_count > 0:
+ # Partial success with warnings (e.g., duplicate content from other connectors)
+ title = f"Ready: {connector_name}"
+ item_text = "item" if indexed_count == 1 else "items"
+ message = f"Now searchable! {indexed_count} {item_text} synced. Note: {error_message}"
+ status = "completed"
+ elif is_warning:
+ # Warning case (e.g., duplicates found) - treat as success
+ title = f"Ready: {connector_name}"
+ message = f"Sync completed. {error_message}"
+ status = "completed"
+ else:
+ # Complete failure
+ title = f"Failed: {connector_name}"
+ message = f"Sync failed: {error_message}"
+ status = "failed"
else:
title = f"Ready: {connector_name}"
if indexed_count == 0:
@@ -367,7 +385,7 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
metadata_updates = {
"indexed_count": indexed_count,
- "sync_stage": "completed" if not error_message else "failed",
+ "sync_stage": "completed" if (not error_message or is_warning or indexed_count > 0) else "failed",
"error_message": error_message,
}
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
index 7787560fa..5bc805549 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@@ -23,6 +23,7 @@ from app.utils.document_converters import (
from .base import (
check_document_by_unique_identifier,
+ check_duplicate_document_by_hash,
get_connector_by_id,
get_current_timestamp,
logger,
@@ -289,6 +290,7 @@ async def index_google_calendar_events(
documents_indexed = 0
documents_skipped = 0
skipped_events = []
+ duplicate_content_count = 0 # Track events skipped due to duplicate content_hash
for event in events:
try:
@@ -409,6 +411,27 @@ async def index_google_calendar_events(
)
continue
+ # Document doesn't exist by unique_identifier_hash
+ # Check if a document with the same content_hash exists (from another connector)
+ with session.no_autoflush:
+ duplicate_by_content = await check_duplicate_document_by_hash(
+ session, content_hash
+ )
+
+ if duplicate_by_content:
+ # A document with the same content already exists (likely from Composio connector)
+ logger.info(
+ f"Event {event_summary} already indexed by another connector "
+ f"(existing document ID: {duplicate_by_content.id}, "
+ f"type: {duplicate_by_content.document_type}). Skipping to avoid duplicate content."
+ )
+ duplicate_content_count += 1
+ documents_skipped += 1
+ skipped_events.append(
+ f"{event_summary} (already indexed by another connector)"
+ )
+ continue
+
# Document doesn't exist - create new one
# Generate summary with metadata
user_llm = await get_user_long_context_llm(
@@ -501,7 +524,25 @@ async def index_google_calendar_events(
logger.info(
f"Final commit: Total {documents_indexed} Google Calendar events processed"
)
- await session.commit()
+ try:
+ await session.commit()
+ except Exception as e:
+ # Handle any remaining integrity errors gracefully (race conditions, etc.)
+ if "duplicate key value violates unique constraint" in str(e).lower() or "uniqueviolationerror" in str(e).lower():
+ logger.warning(
+ f"Duplicate content_hash detected during final commit. "
+ f"This may occur if the same event was indexed by multiple connectors. "
+ f"Rolling back and continuing. Error: {e!s}"
+ )
+ await session.rollback()
+ # Don't fail the entire task - some documents may have been successfully indexed
+ else:
+ raise
+
+ # Build warning message if duplicates were found
+ warning_message = None
+ if duplicate_content_count > 0:
+ warning_message = f"{duplicate_content_count} skipped (duplicate)"
await task_logger.log_task_success(
log_entry,
@@ -510,14 +551,16 @@ async def index_google_calendar_events(
"events_processed": total_processed,
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
+ "duplicate_content_count": duplicate_content_count,
"skipped_events_count": len(skipped_events),
},
)
logger.info(
- f"Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped"
+ f"Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped "
+ f"({duplicate_content_count} due to duplicate content from other connectors)"
)
- return total_processed, None
+ return total_processed, warning_message
except SQLAlchemyError as db_error:
await session.rollback()
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
index d12264fbd..8f58db542 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@@ -100,10 +100,14 @@ export const ConnectorEditView: FC = ({
// Reset local quick indexing state when indexing completes or fails
useEffect(() => {
- if (!isIndexing) {
- setIsQuickIndexing(false);
+ if (!isIndexing && isQuickIndexing) {
+ // Small delay to ensure smooth transition
+ const timer = setTimeout(() => {
+ setIsQuickIndexing(false);
+ }, 100);
+ return () => clearTimeout(timer);
}
- }, [isIndexing]);
+ }, [isIndexing, isQuickIndexing]);
const handleDisconnectClick = () => {
setShowDisconnectConfirm(true);
@@ -119,11 +123,11 @@ export const ConnectorEditView: FC = ({
};
const handleQuickIndex = useCallback(() => {
- if (onQuickIndex) {
+ if (onQuickIndex && !isQuickIndexing && !isIndexing) {
setIsQuickIndexing(true);
onQuickIndex();
}
- }, [onQuickIndex]);
+ }, [onQuickIndex, isQuickIndexing, isIndexing]);
return (
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index 1bcbd4263..9a7f15b0c 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -1409,7 +1409,12 @@ export const useConnectorDialog = () => {
startDate?: Date,
endDate?: Date
) => {
- if (!searchSpaceId) return;
+ if (!searchSpaceId) {
+ if (stopIndexing) {
+ stopIndexing(connectorId);
+ }
+ return;
+ }
// Track quick index clicked event
if (connectorType) {
@@ -1437,6 +1442,8 @@ export const useConnectorDialog = () => {
queryClient.invalidateQueries({
queryKey: cacheKeys.logs.summary(Number(searchSpaceId)),
});
+ // Note: Don't call stopIndexing here - let useIndexingConnectors hook
+ // detect when last_indexed_at changes via Electric SQL
} catch (error) {
console.error("Error indexing connector content:", error);
toast.error(error instanceof Error ? error.message : "Failed to start indexing");
@@ -1446,7 +1453,7 @@ export const useConnectorDialog = () => {
}
}
},
- [searchSpaceId, indexConnector]
+ [searchSpaceId, indexConnector, queryClient]
);
// Handle going back from edit view
From a7333853a283e040515188a481a8c8f935861ee6 Mon Sep 17 00:00:00 2001
From: Eric Lammertsma
Date: Fri, 23 Jan 2026 13:14:23 -0500
Subject: [PATCH 18/51] Swapped Inbox and Documents in sidebar
---
.../layout/providers/LayoutDataProvider.tsx | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
index 52dc7196a..1761c74a1 100644
--- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
+++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
@@ -161,12 +161,6 @@ export function LayoutDataProvider({
// Navigation items
const navItems: NavItem[] = useMemo(
() => [
- {
- title: "Documents",
- url: `/dashboard/${searchSpaceId}/documents`,
- icon: SquareLibrary,
- isActive: pathname?.includes("/documents"),
- },
{
title: "Inbox",
url: "#inbox", // Special URL to indicate this is handled differently
@@ -174,6 +168,12 @@ export function LayoutDataProvider({
isActive: isInboxSidebarOpen,
badge: unreadCount > 0 ? (unreadCount > 99 ? "99+" : unreadCount) : undefined,
},
+ {
+ title: "Documents",
+ url: `/dashboard/${searchSpaceId}/documents`,
+ icon: SquareLibrary,
+ isActive: pathname?.includes("/documents"),
+ },
],
[searchSpaceId, pathname, isInboxSidebarOpen, unreadCount]
);
From 417ff58fad6ba8221c1a561a00ec3f44a99a93cc Mon Sep 17 00:00:00 2001
From: Eric Lammertsma
Date: Fri, 23 Jan 2026 13:27:14 -0500
Subject: [PATCH 19/51] Fixed a bug where new chats weren't auto selected when
created This additionally fixes a bug where the New Chat button wasn't
working properly after creating a new chat
---
.../layout/providers/LayoutDataProvider.tsx | 41 ++++++++++++++++---
1 file changed, 35 insertions(+), 6 deletions(-)
diff --git a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
index 1761c74a1..37cb468ec 100644
--- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
+++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
@@ -1,12 +1,13 @@
"use client";
import { useQuery, useQueryClient } from "@tanstack/react-query";
-import { useAtomValue } from "jotai";
+import { useAtomValue, useSetAtom } from "jotai";
import { Inbox, LogOut, SquareLibrary, Trash2 } from "lucide-react";
import { useParams, usePathname, useRouter } from "next/navigation";
import { useTranslations } from "next-intl";
import { useTheme } from "next-themes";
-import { useCallback, useMemo, useState } from "react";
+import { useCallback, useEffect, useMemo, useState } from "react";
+import { currentThreadAtom, resetCurrentThreadAtom } from "@/atoms/chat/current-thread.atom";
import { deleteSearchSpaceMutationAtom } from "@/atoms/search-spaces/search-space-mutation.atoms";
import { searchSpacesAtom } from "@/atoms/search-spaces/search-space-query.atoms";
import { currentUserAtom } from "@/atoms/user/user-query.atoms";
@@ -55,11 +56,16 @@ export function LayoutDataProvider({
const { data: user } = useAtomValue(currentUserAtom);
const { data: searchSpacesData, refetch: refetchSearchSpaces } = useAtomValue(searchSpacesAtom);
const { mutateAsync: deleteSearchSpace } = useAtomValue(deleteSearchSpaceMutationAtom);
+ const currentThreadState = useAtomValue(currentThreadAtom);
+ const resetCurrentThread = useSetAtom(resetCurrentThreadAtom);
- // Current IDs from URL
+ // State for handling new chat navigation when router is out of sync
+ const [pendingNewChat, setPendingNewChat] = useState(false);
+
+ // Current IDs from URL, with fallback to atom for replaceState updates
const currentChatId = params?.chat_id
? Number(Array.isArray(params.chat_id) ? params.chat_id[0] : params.chat_id)
- : null;
+ : currentThreadState.id;
// Fetch current search space (for caching purposes)
useQuery({
@@ -111,6 +117,17 @@ export function LayoutDataProvider({
const [isDeletingSearchSpace, setIsDeletingSearchSpace] = useState(false);
const [isLeavingSearchSpace, setIsLeavingSearchSpace] = useState(false);
+ // Effect to complete new chat navigation after router syncs
+ // This runs when handleNewChat detected an out-of-sync state and triggered a sync
+ useEffect(() => {
+ if (pendingNewChat && params?.chat_id) {
+ // Router is now synced (chat_id is in params), complete navigation to new-chat
+ resetCurrentThread();
+ router.push(`/dashboard/${searchSpaceId}/new-chat`);
+ setPendingNewChat(false);
+ }
+ }, [pendingNewChat, params?.chat_id, router, searchSpaceId, resetCurrentThread]);
+
const searchSpaces: SearchSpace[] = useMemo(() => {
if (!searchSpacesData || !Array.isArray(searchSpacesData)) return [];
return searchSpacesData.map((space) => ({
@@ -278,8 +295,20 @@ export function LayoutDataProvider({
);
const handleNewChat = useCallback(() => {
- router.push(`/dashboard/${searchSpaceId}/new-chat`);
- }, [router, searchSpaceId]);
+ // Check if router is out of sync (thread created via replaceState but params don't have chat_id)
+ const isOutOfSync = currentThreadState.id !== null && !params?.chat_id;
+
+ if (isOutOfSync) {
+ // First sync Next.js router by navigating to the current chat's actual URL
+ // This updates the router's internal state to match the browser URL
+ router.replace(`/dashboard/${searchSpaceId}/new-chat/${currentThreadState.id}`);
+ // Set flag to trigger navigation to new-chat after params update
+ setPendingNewChat(true);
+ } else {
+ // Normal navigation - router is in sync
+ router.push(`/dashboard/${searchSpaceId}/new-chat`);
+ }
+ }, [router, searchSpaceId, currentThreadState.id, params?.chat_id]);
const handleChatSelect = useCallback(
(chat: ChatItem) => {
From 6d14b49d3f4fb39994be6ba96bc93af3f1031831 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 01:20:51 +0530
Subject: [PATCH 20/51] feat: enhance indexing state management and inbox count
formatting
- Improved indexing state management by refining the logic for handling notifications, ensuring accurate updates for in-progress, completed, and failed states.
- Introduced a new utility function to format inbox counts, displaying numbers up to 999 and using "k+" for larger counts, enhancing user interface clarity.
- Updated sidebar components to utilize the new inbox count formatting, improving the overall user experience.
---
.../hooks/use-indexing-connectors.ts | 87 ++++++++++---------
.../layout/providers/LayoutDataProvider.tsx | 13 ++-
.../layout/ui/sidebar/InboxSidebar.tsx | 15 +++-
.../layout/ui/sidebar/NavSection.tsx | 4 +-
4 files changed, 75 insertions(+), 44 deletions(-)
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts
index e82a8eb29..289da475d 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts
@@ -10,8 +10,9 @@ import { isConnectorIndexingMetadata } from "@/contracts/types/inbox.types";
*
* This provides a better UX than polling by:
* 1. Setting indexing state immediately when user triggers indexing (optimistic)
- * 2. Clearing indexing state when Electric SQL detects last_indexed_at changed
- * 3. Clearing indexing state when a failed notification is detected
+ * 2. Detecting in_progress notifications from Electric SQL to restore state after remounts
+ * 3. Clearing indexing state when notifications become completed or failed
+ * 4. Clearing indexing state when Electric SQL detects last_indexed_at changed
*
* The actual `last_indexed_at` value comes from Electric SQL/PGlite, not local state.
*/
@@ -28,65 +29,73 @@ export function useIndexingConnectors(
// Detect when last_indexed_at changes (indexing completed) via Electric SQL
useEffect(() => {
const previousValues = previousLastIndexedAtRef.current;
- const newIndexingIds = new Set(indexingConnectorIds);
- let hasChanges = false;
for (const connector of connectors) {
const previousValue = previousValues.get(connector.id);
const currentValue = connector.last_indexed_at;
- // If last_indexed_at changed and connector was in indexing state, clear it
+ // If last_indexed_at changed, clear it from indexing state
if (
previousValue !== undefined && // We've seen this connector before
- previousValue !== currentValue && // Value changed
- indexingConnectorIds.has(connector.id) // It was marked as indexing
+ previousValue !== currentValue // Value changed
) {
- newIndexingIds.delete(connector.id);
- hasChanges = true;
+ // Use functional update to access current state
+ setIndexingConnectorIds((prev) => {
+ if (prev.has(connector.id)) {
+ const next = new Set(prev);
+ next.delete(connector.id);
+ return next;
+ }
+ return prev;
+ });
}
// Update previous value tracking
previousValues.set(connector.id, currentValue);
}
+ }, [connectors]);
- if (hasChanges) {
- setIndexingConnectorIds(newIndexingIds);
- }
- }, [connectors, indexingConnectorIds]);
-
- // Detect failed notifications and stop indexing state
+ // Detect notification status changes and update indexing state accordingly
+ // This restores spinner state after component remounts and handles all status transitions
useEffect(() => {
if (!inboxItems || inboxItems.length === 0) return;
- const newIndexingIds = new Set(indexingConnectorIds);
- let hasChanges = false;
+ setIndexingConnectorIds((prev) => {
+ const newIndexingIds = new Set(prev);
+ let hasChanges = false;
- for (const item of inboxItems) {
- // Only check connector_indexing notifications
- if (item.type !== "connector_indexing") continue;
+ for (const item of inboxItems) {
+ // Only check connector_indexing notifications
+ if (item.type !== "connector_indexing") continue;
- // Check if this notification indicates a failure
- const metadata = isConnectorIndexingMetadata(item.metadata)
- ? item.metadata
- : null;
- if (!metadata) continue;
+ const metadata = isConnectorIndexingMetadata(item.metadata)
+ ? item.metadata
+ : null;
+ if (!metadata) continue;
- // Check if status is "failed" or if there's an error_message
- const isFailed =
- metadata.status === "failed" ||
- (metadata.error_message && metadata.error_message.trim().length > 0);
-
- // If failed and connector is in indexing state, clear it
- if (isFailed && indexingConnectorIds.has(metadata.connector_id)) {
- newIndexingIds.delete(metadata.connector_id);
- hasChanges = true;
+ // If status is "in_progress", add connector to indexing set
+ if (metadata.status === "in_progress") {
+ if (!newIndexingIds.has(metadata.connector_id)) {
+ newIndexingIds.add(metadata.connector_id);
+ hasChanges = true;
+ }
+ }
+ // If status is "completed" or "failed", remove connector from indexing set
+ else if (
+ metadata.status === "completed" ||
+ metadata.status === "failed" ||
+ (metadata.error_message && metadata.error_message.trim().length > 0)
+ ) {
+ if (newIndexingIds.has(metadata.connector_id)) {
+ newIndexingIds.delete(metadata.connector_id);
+ hasChanges = true;
+ }
+ }
}
- }
- if (hasChanges) {
- setIndexingConnectorIds(newIndexingIds);
- }
- }, [inboxItems, indexingConnectorIds]);
+ return hasChanges ? newIndexingIds : prev;
+ });
+ }, [inboxItems]);
// Add a connector to the indexing set (called when indexing starts)
const startIndexing = useCallback((connectorId: number) => {
diff --git a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
index 52dc7196a..9e3f55c97 100644
--- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
+++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
@@ -38,6 +38,17 @@ interface LayoutDataProviderProps {
breadcrumb?: React.ReactNode;
}
+/**
+ * Format count for display: shows numbers up to 999, then "1k+", "2k+", etc.
+ */
+function formatInboxCount(count: number): string {
+ if (count <= 999) {
+ return count.toString();
+ }
+ const thousands = Math.floor(count / 1000);
+ return `${thousands}k+`;
+}
+
export function LayoutDataProvider({
searchSpaceId,
children,
@@ -172,7 +183,7 @@ export function LayoutDataProvider({
url: "#inbox", // Special URL to indicate this is handled differently
icon: Inbox,
isActive: isInboxSidebarOpen,
- badge: unreadCount > 0 ? (unreadCount > 99 ? "99+" : unreadCount) : undefined,
+ badge: unreadCount > 0 ? formatInboxCount(unreadCount) : undefined,
},
],
[searchSpaceId, pathname, isInboxSidebarOpen, unreadCount]
diff --git a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
index bb06d6a56..e80c6e62d 100644
--- a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
@@ -70,6 +70,17 @@ function getInitials(name: string | null | undefined, email: string | null | und
return "U";
}
+/**
+ * Format count for display: shows numbers up to 999, then "1k+", "2k+", etc.
+ */
+function formatInboxCount(count: number): string {
+ if (count <= 999) {
+ return count.toString();
+ }
+ const thousands = Math.floor(count / 1000);
+ return `${thousands}k+`;
+}
+
/**
* Get display name for connector type
*/
@@ -732,7 +743,7 @@ export function InboxSidebar({
{t("mentions") || "Mentions"}
- {unreadMentionsCount}
+ {formatInboxCount(unreadMentionsCount)}
@@ -744,7 +755,7 @@ export function InboxSidebar({
{t("status") || "Status"}
- {unreadStatusCount}
+ {formatInboxCount(unreadStatusCount)}
diff --git a/surfsense_web/components/layout/ui/sidebar/NavSection.tsx b/surfsense_web/components/layout/ui/sidebar/NavSection.tsx
index d2d926de8..742a27bbc 100644
--- a/surfsense_web/components/layout/ui/sidebar/NavSection.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/NavSection.tsx
@@ -39,7 +39,7 @@ export function NavSection({ items, onItemClick, isCollapsed = false }: NavSecti
>
{item.badge && (
-
+
{item.badge}
)}
@@ -70,7 +70,7 @@ export function NavSection({ items, onItemClick, isCollapsed = false }: NavSecti
{item.title}
{item.badge && (
-
+
{item.badge}
)}
From f4b1192a063e71437bb24340342fcee2a69f6a1f Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 03:51:57 +0530
Subject: [PATCH 21/51] feat: refine indexing success case handling and
notification messaging
- Enhanced the logic for determining success cases during indexing by distinguishing between duplicate warnings and empty results.
- Updated notification messages to provide clearer feedback for empty results, improving user understanding of indexing outcomes.
- Ensured that notifications reflect accurate statuses, maintaining consistency in user feedback during the indexing process.
---
.../app/routes/search_source_connectors_routes.py | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 3b98d7d7c..487a689dc 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -1249,13 +1249,15 @@ async def _run_indexing_with_notifications(
else:
# No new documents processed - check if this is an error or just no changes
if error_or_warning:
- # Check if this is a duplicate warning (success case) or an actual error
+ # Check if this is a duplicate warning or empty result (success cases) or an actual error
# Handle both normal and Composio calendar connectors
error_or_warning_lower = str(error_or_warning).lower() if error_or_warning else ""
is_duplicate_warning = "skipped (duplicate)" in error_or_warning_lower
+ # "No X found" messages are success cases - sync worked, just found nothing in date range
+ is_empty_result = ("no " in error_or_warning_lower and "found" in error_or_warning_lower)
- if is_duplicate_warning:
- # Duplicate warnings are success cases - sync worked, just found duplicates
+ if is_duplicate_warning or is_empty_result:
+ # These are success cases - sync worked, just found nothing new
logger.info(
f"Indexing completed successfully: {error_or_warning}"
)
@@ -1266,11 +1268,13 @@ async def _run_indexing_with_notifications(
if notification:
# Refresh notification to ensure it's not stale after timestamp update commit
await session.refresh(notification)
+ # For empty results, use a cleaner message
+ notification_message = "No new items found in date range" if is_empty_result else error_or_warning
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
indexed_count=0,
- error_message=error_or_warning, # Pass as warning, not error
+ error_message=notification_message, # Pass as warning, not error
is_warning=True, # Flag to indicate this is a warning, not an error
)
await (
From 5cf6fb15ed9c0f875c584ac4af216d279ae9eb36 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 03:59:17 +0530
Subject: [PATCH 22/51] fix: improve error logging for indexing tasks across
multiple connectors
- Updated error handling in the indexing functions for BookStack, Confluence, Google Calendar, Jira, Linear, and Luma connectors to log specific error messages when failures occur.
- Enhanced logging for cases where no pages or events are found, providing clearer informational messages instead of treating them as critical errors.
- Ensured consistent error reporting across all connector indexers, improving debugging and user feedback during indexing operations.
---
.../app/tasks/connector_indexers/bookstack_indexer.py | 4 ++--
.../app/tasks/connector_indexers/confluence_indexer.py | 4 ++--
.../app/tasks/connector_indexers/google_calendar_indexer.py | 4 ++--
.../app/tasks/connector_indexers/jira_indexer.py | 4 ++--
.../app/tasks/connector_indexers/linear_indexer.py | 4 ++--
.../app/tasks/connector_indexers/luma_indexer.py | 4 ++--
6 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
index 2793f78db..a1067255d 100644
--- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
@@ -136,10 +136,9 @@ async def index_bookstack_pages(
)
if error:
- logger.error(f"Failed to get BookStack pages: {error}")
-
# Don't treat "No pages found" as an error that should stop indexing
if "No pages found" in error:
+ logger.info(f"No BookStack pages found: {error}")
logger.info(
"No pages found is not a critical error, continuing with update"
)
@@ -159,6 +158,7 @@ async def index_bookstack_pages(
)
return 0, None
else:
+ logger.error(f"Failed to get BookStack pages: {error}")
await task_logger.log_task_failure(
log_entry,
f"Failed to get BookStack pages: {error}",
diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
index 7289b0ccd..ddbefafb9 100644
--- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
@@ -120,10 +120,9 @@ async def index_confluence_pages(
)
if error:
- logger.error(f"Failed to get Confluence pages: {error}")
-
# Don't treat "No pages found" as an error that should stop indexing
if "No pages found" in error:
+ logger.info(f"No Confluence pages found: {error}")
logger.info(
"No pages found is not a critical error, continuing with update"
)
@@ -147,6 +146,7 @@ async def index_confluence_pages(
await confluence_client.close()
return 0, None
else:
+ logger.error(f"Failed to get Confluence pages: {error}")
await task_logger.log_task_failure(
log_entry,
f"Failed to get Confluence pages: {error}",
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
index 5bc805549..ef1f821d2 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@@ -243,10 +243,9 @@ async def index_google_calendar_events(
)
if error:
- logger.error(f"Failed to get Google Calendar events: {error}")
-
# Don't treat "No events found" as an error that should stop indexing
if "No events found" in error:
+ logger.info(f"No Google Calendar events found: {error}")
logger.info(
"No events found is not a critical error, continuing with update"
)
@@ -266,6 +265,7 @@ async def index_google_calendar_events(
)
return 0, None
else:
+ logger.error(f"Failed to get Google Calendar events: {error}")
# Check if this is an authentication error that requires re-authentication
error_message = error
error_type = "APIError"
diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
index fdbeb93b0..4851a6466 100644
--- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
@@ -126,10 +126,9 @@ async def index_jira_issues(
)
if error:
- logger.error(f"Failed to get Jira issues: {error}")
-
# Don't treat "No issues found" as an error that should stop indexing
if "No issues found" in error:
+ logger.info(f"No Jira issues found: {error}")
logger.info(
"No issues found is not a critical error, continuing with update"
)
@@ -149,6 +148,7 @@ async def index_jira_issues(
)
return 0, None
else:
+ logger.error(f"Failed to get Jira issues: {error}")
await task_logger.log_task_failure(
log_entry,
f"Failed to get Jira issues: {error}",
diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
index f1bfd42e8..7d8e0c30e 100644
--- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
@@ -145,10 +145,9 @@ async def index_linear_issues(
)
if error:
- logger.error(f"Failed to get Linear issues: {error}")
-
# Don't treat "No issues found" as an error that should stop indexing
if "No issues found" in error:
+ logger.info(f"No Linear issues found: {error}")
logger.info(
"No issues found is not a critical error, continuing with update"
)
@@ -162,6 +161,7 @@ async def index_linear_issues(
)
return 0, None
else:
+ logger.error(f"Failed to get Linear issues: {error}")
return 0, f"Failed to get Linear issues: {error}"
logger.info(f"Retrieved {len(issues)} issues from Linear API")
diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
index 0d7a979be..ead259a44 100644
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@@ -179,10 +179,9 @@ async def index_luma_events(
)
if error:
- logger.error(f"Failed to get Luma events: {error}")
-
# Don't treat "No events found" as an error that should stop indexing
if "No events found" in error or "no events" in error.lower():
+ logger.info(f"No Luma events found: {error}")
logger.info(
"No events found is not a critical error, continuing with update"
)
@@ -202,6 +201,7 @@ async def index_luma_events(
)
return 0, None
else:
+ logger.error(f"Failed to get Luma events: {error}")
await task_logger.log_task_failure(
log_entry,
f"Failed to get Luma events: {error}",
From 97d7207bd4e76a5c76b1d6ed88a0784ea76f0445 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 04:33:10 +0530
Subject: [PATCH 23/51] fix: update Google Drive indexer to use SQLAlchemy
casting for metadata queries
- Modified the Google Drive indexer to use SQLAlchemy's cast function for querying document metadata, ensuring proper type handling for file IDs.
- Improved the consistency of metadata queries across the indexing functions, enhancing reliability in document retrieval and processing.
---
.../app/tasks/connector_indexers/google_drive_indexer.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
index 48282a1af..af180c36b 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -578,7 +578,7 @@ async def _check_rename_only_update(
- (True, message): Only filename changed, document was updated
- (False, None): Content changed or new file, needs full processing
"""
- from sqlalchemy import select
+ from sqlalchemy import cast, select, String
from sqlalchemy.orm.attributes import flag_modified
from app.db import Document
@@ -603,7 +603,7 @@ async def _check_rename_only_update(
select(Document).where(
Document.search_space_id == search_space_id,
Document.document_type == DocumentType.GOOGLE_DRIVE_FILE,
- Document.document_metadata["google_drive_file_id"].astext == file_id,
+ cast(Document.document_metadata["google_drive_file_id"], String) == file_id,
)
)
existing_document = result.scalar_one_or_none()
@@ -755,7 +755,7 @@ async def _remove_document(session: AsyncSession, file_id: str, search_space_id:
Handles both new (file_id-based) and legacy (filename-based) hash schemes.
"""
- from sqlalchemy import select
+ from sqlalchemy import cast, select, String
from app.db import Document
@@ -774,7 +774,7 @@ async def _remove_document(session: AsyncSession, file_id: str, search_space_id:
select(Document).where(
Document.search_space_id == search_space_id,
Document.document_type == DocumentType.GOOGLE_DRIVE_FILE,
- Document.document_metadata["google_drive_file_id"].astext == file_id,
+ cast(Document.document_metadata["google_drive_file_id"], String) == file_id,
)
)
existing_document = result.scalar_one_or_none()
From a5103da3d74fded873e311108b601d8b36740fce Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 04:36:34 +0530
Subject: [PATCH 24/51] chore: ran linting
---
.../connectors/composio_gmail_connector.py | 1 -
.../composio_google_calendar_connector.py | 14 +-
.../composio_google_drive_connector.py | 83 +++++-----
.../connectors/google_calendar_connector.py | 17 ++-
.../app/connectors/google_gmail_connector.py | 17 ++-
.../app/routes/composio_routes.py | 4 +-
.../routes/search_source_connectors_routes.py | 31 ++--
.../app/services/composio_service.py | 142 ++++++++++--------
.../app/services/notification_service.py | 4 +-
.../google_calendar_indexer.py | 21 ++-
.../google_drive_indexer.py | 10 +-
.../google_gmail_indexer.py | 13 +-
.../assistant-ui/connector-popup.tsx | 5 +-
.../components/composio-calendar-config.tsx | 1 -
.../components/composio-drive-config.tsx | 24 +--
.../components/composio-gmail-config.tsx | 1 -
.../components/google-drive-config.tsx | 10 +-
.../views/connector-edit-view.tsx | 3 +-
.../views/indexing-configuration-view.tsx | 17 ++-
.../hooks/use-connector-dialog.ts | 18 +--
.../hooks/use-indexing-connectors.ts | 4 +-
21 files changed, 259 insertions(+), 181 deletions(-)
diff --git a/surfsense_backend/app/connectors/composio_gmail_connector.py b/surfsense_backend/app/connectors/composio_gmail_connector.py
index 5a9645a66..953e2e8fc 100644
--- a/surfsense_backend/app/connectors/composio_gmail_connector.py
+++ b/surfsense_backend/app/connectors/composio_gmail_connector.py
@@ -611,4 +611,3 @@ async def index_composio_gmail(
except Exception as e:
logger.error(f"Failed to index Gmail via Composio: {e!s}", exc_info=True)
return 0, f"Failed to index Gmail via Composio: {e!s}"
-
diff --git a/surfsense_backend/app/connectors/composio_google_calendar_connector.py b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
index 3ac235848..ec5b22b7f 100644
--- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@@ -259,7 +259,9 @@ async def index_composio_google_calendar(
documents_indexed = 0
documents_skipped = 0
- duplicate_content_count = 0 # Track events skipped due to duplicate content_hash
+ duplicate_content_count = (
+ 0 # Track events skipped due to duplicate content_hash
+ )
for event in events:
try:
@@ -353,7 +355,7 @@ async def index_composio_google_calendar(
logger.info(
f"Committing batch: {documents_indexed} Google Calendar events processed so far"
)
- await session.commit( )
+ await session.commit()
continue
# Document doesn't exist by unique_identifier_hash
@@ -362,7 +364,7 @@ async def index_composio_google_calendar(
duplicate_by_content = await check_duplicate_document_by_hash(
session, content_hash
)
-
+
if duplicate_by_content:
# A document with the same content already exists (likely from standard connector)
logger.info(
@@ -458,7 +460,10 @@ async def index_composio_google_calendar(
)
except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.)
- if "duplicate key value violates unique constraint" in str(e).lower() or "uniqueviolationerror" in str(e).lower():
+ if (
+ "duplicate key value violates unique constraint" in str(e).lower()
+ or "uniqueviolationerror" in str(e).lower()
+ ):
logger.warning(
f"Duplicate content_hash detected during final commit. "
f"This may occur if the same event was indexed by multiple connectors. "
@@ -495,4 +500,3 @@ async def index_composio_google_calendar(
f"Failed to index Google Calendar via Composio: {e!s}", exc_info=True
)
return 0, f"Failed to index Google Calendar via Composio: {e!s}"
-
diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py
index e19436611..e3b988676 100644
--- a/surfsense_backend/app/connectors/composio_google_drive_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py
@@ -453,8 +453,8 @@ async def check_document_by_unique_identifier(
session: AsyncSession, unique_identifier_hash: str
) -> Document | None:
"""Check if a document with the given unique identifier hash already exists."""
- from sqlalchemy.orm import selectinload
from sqlalchemy.future import select
+ from sqlalchemy.orm import selectinload
existing_doc_result = await session.execute(
select(Document)
@@ -517,14 +517,20 @@ async def index_composio_google_drive(
# Route to delta sync or full scan
if use_delta_sync:
- logger.info(f"Using delta sync for Composio Google Drive connector {connector_id}")
+ logger.info(
+ f"Using delta sync for Composio Google Drive connector {connector_id}"
+ )
await task_logger.log_task_progress(
log_entry,
f"Starting delta sync for Google Drive via Composio (connector {connector_id})",
{"stage": "delta_sync", "token": stored_page_token[:20] + "..."},
)
- documents_indexed, documents_skipped, processing_errors = await _index_composio_drive_delta_sync(
+ (
+ documents_indexed,
+ documents_skipped,
+ processing_errors,
+ ) = await _index_composio_drive_delta_sync(
session=session,
composio_connector=composio_connector,
connector_id=connector_id,
@@ -536,7 +542,9 @@ async def index_composio_google_drive(
log_entry=log_entry,
)
else:
- logger.info(f"Using full scan for Composio Google Drive connector {connector_id} (first sync or no token)")
+ logger.info(
+ f"Using full scan for Composio Google Drive connector {connector_id} (first sync or no token)"
+ )
await task_logger.log_task_progress(
log_entry,
f"Fetching Google Drive files via Composio for connector {connector_id}",
@@ -547,7 +555,11 @@ async def index_composio_google_drive(
},
)
- documents_indexed, documents_skipped, processing_errors = await _index_composio_drive_full_scan(
+ (
+ documents_indexed,
+ documents_skipped,
+ processing_errors,
+ ) = await _index_composio_drive_full_scan(
session=session,
composio_connector=composio_connector,
connector_id=connector_id,
@@ -580,9 +592,13 @@ async def index_composio_google_drive(
await update_connector_last_indexed(session, connector, update_last_indexed)
# Final commit
- logger.info(f"Final commit: Total {documents_indexed} Google Drive files processed")
+ logger.info(
+ f"Final commit: Total {documents_indexed} Google Drive files processed"
+ )
await session.commit()
- logger.info("Successfully committed all Composio Google Drive document changes to database")
+ logger.info(
+ "Successfully committed all Composio Google Drive document changes to database"
+ )
# Handle processing errors
error_message = None
@@ -731,7 +747,9 @@ async def _index_composio_drive_delta_sync(
processing_errors.append(error_msg)
documents_skipped += 1
- logger.info(f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped")
+ logger.info(
+ f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped"
+ )
return documents_indexed, documents_skipped, processing_errors
@@ -858,20 +876,18 @@ async def _index_composio_drive_full_scan(
logger.info("No Google Drive files found")
return 0, 0, []
- logger.info(f"Found {len(all_files)} Google Drive files to index via Composio (full scan)")
+ logger.info(
+ f"Found {len(all_files)} Google Drive files to index via Composio (full scan)"
+ )
for file_info in all_files:
try:
# Handle both standard Google API and potential Composio variations
file_id = file_info.get("id", "") or file_info.get("fileId", "")
file_name = (
- file_info.get("name", "")
- or file_info.get("fileName", "")
- or "Untitled"
- )
- mime_type = file_info.get("mimeType", "") or file_info.get(
- "mime_type", ""
+ file_info.get("name", "") or file_info.get("fileName", "") or "Untitled"
)
+ mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
if not file_id:
documents_skipped += 1
@@ -901,7 +917,9 @@ async def _index_composio_drive_full_scan(
# Batch commit every 10 documents
if documents_indexed > 0 and documents_indexed % 10 == 0:
- logger.info(f"Committing batch: {documents_indexed} Google Drive files processed so far")
+ logger.info(
+ f"Committing batch: {documents_indexed} Google Drive files processed so far"
+ )
await session.commit()
except Exception as e:
@@ -910,7 +928,9 @@ async def _index_composio_drive_full_scan(
processing_errors.append(error_msg)
documents_skipped += 1
- logger.info(f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped")
+ logger.info(
+ f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped"
+ )
return documents_indexed, documents_skipped, processing_errors
@@ -948,9 +968,7 @@ async def _process_single_drive_file(
content, content_error = await composio_connector.get_drive_file_content(file_id)
if content_error or not content:
- logger.warning(
- f"Could not get content for file {file_name}: {content_error}"
- )
+ logger.warning(f"Could not get content for file {file_name}: {content_error}")
# Use metadata as content fallback
markdown_content = f"# {file_name}\n\n"
markdown_content += f"**File ID:** {file_id}\n"
@@ -985,9 +1003,7 @@ async def _process_single_drive_file(
return 0, 1, processing_errors # Skipped
# Update existing document
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
+ user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
if user_llm:
document_metadata = {
@@ -1003,12 +1019,8 @@ async def _process_single_drive_file(
markdown_content, user_llm, document_metadata
)
else:
- summary_content = (
- f"Google Drive File: {file_name}\n\nType: {mime_type}"
- )
- summary_embedding = config.embedding_model_instance.embed(
- summary_content
- )
+ summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
+ summary_embedding = config.embedding_model_instance.embed(summary_content)
chunks = await create_document_chunks(markdown_content)
@@ -1030,9 +1042,7 @@ async def _process_single_drive_file(
return 1, 0, processing_errors # Indexed
# Create new document
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
+ user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
if user_llm:
document_metadata = {
@@ -1048,12 +1058,8 @@ async def _process_single_drive_file(
markdown_content, user_llm, document_metadata
)
else:
- summary_content = (
- f"Google Drive File: {file_name}\n\nType: {mime_type}"
- )
- summary_embedding = config.embedding_model_instance.embed(
- summary_content
- )
+ summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
+ summary_embedding = config.embedding_model_instance.embed(summary_content)
chunks = await create_document_chunks(markdown_content)
@@ -1159,4 +1165,3 @@ async def _fetch_folder_files_recursively(
except Exception as e:
logger.error(f"Error in recursive folder fetch: {e!s}")
return all_files
-
diff --git a/surfsense_backend/app/connectors/google_calendar_connector.py b/surfsense_backend/app/connectors/google_calendar_connector.py
index ac60b02a8..d8160cf25 100644
--- a/surfsense_backend/app/connectors/google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/google_calendar_connector.py
@@ -144,7 +144,10 @@ class GoogleCalendarConnector:
except Exception as e:
error_str = str(e)
# Check if this is an invalid_grant error (token expired/revoked)
- if "invalid_grant" in error_str.lower() or "token has been expired or revoked" in error_str.lower():
+ if (
+ "invalid_grant" in error_str.lower()
+ or "token has been expired or revoked" in error_str.lower()
+ ):
raise Exception(
"Google Calendar authentication failed. Please re-authenticate."
) from e
@@ -173,7 +176,11 @@ class GoogleCalendarConnector:
except Exception as e:
error_str = str(e)
# If the error already contains a user-friendly re-authentication message, preserve it
- if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
+ if (
+ "re-authenticate" in error_str.lower()
+ or "expired or been revoked" in error_str.lower()
+ or "authentication failed" in error_str.lower()
+ ):
raise Exception(error_str) from e
raise Exception(f"Failed to create Google Calendar service: {e!s}") from e
@@ -283,7 +290,11 @@ class GoogleCalendarConnector:
except Exception as e:
error_str = str(e)
# If the error already contains a user-friendly re-authentication message, preserve it
- if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
+ if (
+ "re-authenticate" in error_str.lower()
+ or "expired or been revoked" in error_str.lower()
+ or "authentication failed" in error_str.lower()
+ ):
return [], error_str
return [], f"Error fetching events: {e!s}"
diff --git a/surfsense_backend/app/connectors/google_gmail_connector.py b/surfsense_backend/app/connectors/google_gmail_connector.py
index c86a96413..7c7262bff 100644
--- a/surfsense_backend/app/connectors/google_gmail_connector.py
+++ b/surfsense_backend/app/connectors/google_gmail_connector.py
@@ -143,7 +143,10 @@ class GoogleGmailConnector:
except Exception as e:
error_str = str(e)
# Check if this is an invalid_grant error (token expired/revoked)
- if "invalid_grant" in error_str.lower() or "token has been expired or revoked" in error_str.lower():
+ if (
+ "invalid_grant" in error_str.lower()
+ or "token has been expired or revoked" in error_str.lower()
+ ):
raise Exception(
"Gmail authentication failed. Please re-authenticate."
) from e
@@ -172,7 +175,11 @@ class GoogleGmailConnector:
except Exception as e:
error_str = str(e)
# If the error already contains a user-friendly re-authentication message, preserve it
- if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
+ if (
+ "re-authenticate" in error_str.lower()
+ or "expired or been revoked" in error_str.lower()
+ or "authentication failed" in error_str.lower()
+ ):
raise Exception(error_str) from e
raise Exception(f"Failed to create Gmail service: {e!s}") from e
@@ -237,7 +244,11 @@ class GoogleGmailConnector:
except Exception as e:
error_str = str(e)
# If the error already contains a user-friendly re-authentication message, preserve it
- if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
+ if (
+ "re-authenticate" in error_str.lower()
+ or "expired or been revoked" in error_str.lower()
+ or "authentication failed" in error_str.lower()
+ ):
return [], error_str
return [], f"Error fetching messages list: {e!s}"
diff --git a/surfsense_backend/app/routes/composio_routes.py b/surfsense_backend/app/routes/composio_routes.py
index 14ef9efcf..a28361132 100644
--- a/surfsense_backend/app/routes/composio_routes.py
+++ b/surfsense_backend/app/routes/composio_routes.py
@@ -350,10 +350,10 @@ async def composio_callback(
count = await count_connectors_of_type(
session, connector_type, space_id, user_id
)
-
+
# Generate base name (e.g., "Gmail", "Google Drive")
base_name = get_base_name_for_type(connector_type)
-
+
# Format: "Gmail (Composio) 1", "Gmail (Composio) 2", etc.
if count == 0:
connector_name = f"{base_name} (Composio) 1"
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 487a689dc..191c6f954 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -662,16 +662,16 @@ async def index_connector_content(
# Use UTC for "today" to match how last_indexed_at is stored
today_utc = datetime.now(UTC).replace(tzinfo=None).date()
last_indexed_date = last_indexed_naive.date()
-
+
if last_indexed_date == today_utc:
# If last indexed today, go back 1 day to ensure we don't miss anything
indexing_from = (today_utc - timedelta(days=1)).strftime("%Y-%m-%d")
else:
indexing_from = last_indexed_naive.strftime("%Y-%m-%d")
else:
- indexing_from = (datetime.now(UTC).replace(tzinfo=None) - timedelta(days=365)).strftime(
- "%Y-%m-%d"
- )
+ indexing_from = (
+ datetime.now(UTC).replace(tzinfo=None) - timedelta(days=365)
+ ).strftime("%Y-%m-%d")
else:
indexing_from = start_date
@@ -683,7 +683,7 @@ async def index_connector_content(
]:
# Default to today if no end_date provided (users can manually select future dates)
indexing_to = today_str if end_date is None else end_date
-
+
# If start_date and end_date are the same, adjust end_date to be one day later
# to ensure valid date range (start_date must be strictly before end_date)
if indexing_from == indexing_to:
@@ -1251,16 +1251,19 @@ async def _run_indexing_with_notifications(
if error_or_warning:
# Check if this is a duplicate warning or empty result (success cases) or an actual error
# Handle both normal and Composio calendar connectors
- error_or_warning_lower = str(error_or_warning).lower() if error_or_warning else ""
+ error_or_warning_lower = (
+ str(error_or_warning).lower() if error_or_warning else ""
+ )
is_duplicate_warning = "skipped (duplicate)" in error_or_warning_lower
# "No X found" messages are success cases - sync worked, just found nothing in date range
- is_empty_result = ("no " in error_or_warning_lower and "found" in error_or_warning_lower)
-
+ is_empty_result = (
+ "no " in error_or_warning_lower
+ and "found" in error_or_warning_lower
+ )
+
if is_duplicate_warning or is_empty_result:
# These are success cases - sync worked, just found nothing new
- logger.info(
- f"Indexing completed successfully: {error_or_warning}"
- )
+ logger.info(f"Indexing completed successfully: {error_or_warning}")
# Still update timestamp so ElectricSQL syncs and clears "Syncing" UI
if update_timestamp_func:
await update_timestamp_func(session, connector_id)
@@ -1269,7 +1272,11 @@ async def _run_indexing_with_notifications(
# Refresh notification to ensure it's not stale after timestamp update commit
await session.refresh(notification)
# For empty results, use a cleaner message
- notification_message = "No new items found in date range" if is_empty_result else error_or_warning
+ notification_message = (
+ "No new items found in date range"
+ if is_empty_result
+ else error_or_warning
+ )
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
diff --git a/surfsense_backend/app/services/composio_service.py b/surfsense_backend/app/services/composio_service.py
index 3ea2d1bf2..ad7841a8b 100644
--- a/surfsense_backend/app/services/composio_service.py
+++ b/surfsense_backend/app/services/composio_service.py
@@ -81,7 +81,9 @@ class ComposioService:
# Default download directory for files from Composio
DEFAULT_DOWNLOAD_DIR = "/tmp/composio_downloads"
- def __init__(self, api_key: str | None = None, file_download_dir: str | None = None):
+ def __init__(
+ self, api_key: str | None = None, file_download_dir: str | None = None
+ ):
"""
Initialize the Composio service.
@@ -90,18 +92,20 @@ class ComposioService:
file_download_dir: Directory for downloaded files. Defaults to /tmp/composio_downloads.
"""
import os
-
+
self.api_key = api_key or config.COMPOSIO_API_KEY
if not self.api_key:
raise ValueError("COMPOSIO_API_KEY is required but not configured")
-
+
# Set up download directory
self.file_download_dir = file_download_dir or self.DEFAULT_DOWNLOAD_DIR
os.makedirs(self.file_download_dir, exist_ok=True)
-
+
# Initialize Composio client with download directory
# Per docs: file_download_dir configures where files are downloaded
- self.client = Composio(api_key=self.api_key, file_download_dir=self.file_download_dir)
+ self.client = Composio(
+ api_key=self.api_key, file_download_dir=self.file_download_dir
+ )
@staticmethod
def is_enabled() -> bool:
@@ -512,7 +516,7 @@ class ComposioService:
Tuple of (file content bytes, error message).
"""
from pathlib import Path
-
+
try:
result = await self.execute_tool(
connected_account_id=connected_account_id,
@@ -532,35 +536,37 @@ class ComposioService:
# Response structure: {data: {...}, error: ..., successful: ...}
# The actual file info is nested inside data["data"]
file_path = None
-
+
if isinstance(data, dict):
# Handle nested response structure: data contains {data, error, successful}
# The actual file info is in data["data"]
inner_data = data
if "data" in data and isinstance(data["data"], dict):
inner_data = data["data"]
- logger.debug(f"Found nested data structure. Inner keys: {list(inner_data.keys())}")
+ logger.debug(
+ f"Found nested data structure. Inner keys: {list(inner_data.keys())}"
+ )
elif "successful" in data and "data" in data:
# Standard Composio response wrapper
inner_data = data["data"] if data["data"] else data
-
+
# Try documented fields: file_path, downloaded_file_content, path, uri
file_path = (
- inner_data.get("file_path") or
- inner_data.get("downloaded_file_content") or
- inner_data.get("path") or
- inner_data.get("uri")
+ inner_data.get("file_path")
+ or inner_data.get("downloaded_file_content")
+ or inner_data.get("path")
+ or inner_data.get("uri")
)
-
+
# Handle nested dict case where downloaded_file_content contains the path
if isinstance(file_path, dict):
file_path = (
- file_path.get("file_path") or
- file_path.get("downloaded_file_content") or
- file_path.get("path") or
- file_path.get("uri")
+ file_path.get("file_path")
+ or file_path.get("downloaded_file_content")
+ or file_path.get("path")
+ or file_path.get("uri")
)
-
+
# If still no path, check if inner_data itself has the nested structure
if not file_path and isinstance(inner_data, dict):
for key in ["downloaded_file_content", "file_path", "path", "uri"]:
@@ -572,15 +578,17 @@ class ComposioService:
elif isinstance(val, dict):
# One more level of nesting
file_path = (
- val.get("file_path") or
- val.get("downloaded_file_content") or
- val.get("path") or
- val.get("uri")
+ val.get("file_path")
+ or val.get("downloaded_file_content")
+ or val.get("path")
+ or val.get("uri")
)
if file_path:
break
-
- logger.debug(f"Composio response keys: {list(data.keys())}, inner keys: {list(inner_data.keys()) if isinstance(inner_data, dict) else 'N/A'}, extracted path: {file_path}")
+
+ logger.debug(
+ f"Composio response keys: {list(data.keys())}, inner keys: {list(inner_data.keys()) if isinstance(inner_data, dict) else 'N/A'}, extracted path: {file_path}"
+ )
elif isinstance(data, str):
# Direct string response (could be path or content)
file_path = data
@@ -591,24 +599,31 @@ class ComposioService:
# Read file from the path
if file_path and isinstance(file_path, str):
path_obj = Path(file_path)
-
+
# Check if it's a valid file path (absolute or in .composio directory)
- if path_obj.is_absolute() or '.composio' in str(path_obj):
+ if path_obj.is_absolute() or ".composio" in str(path_obj):
try:
if path_obj.exists():
content = path_obj.read_bytes()
- logger.info(f"Successfully read {len(content)} bytes from Composio file: {file_path}")
+ logger.info(
+ f"Successfully read {len(content)} bytes from Composio file: {file_path}"
+ )
return content, None
else:
- logger.warning(f"File path from Composio does not exist: {file_path}")
+ logger.warning(
+ f"File path from Composio does not exist: {file_path}"
+ )
return None, f"File not found at path: {file_path}"
except Exception as e:
- logger.error(f"Failed to read file from Composio path {file_path}: {e!s}")
+ logger.error(
+ f"Failed to read file from Composio path {file_path}: {e!s}"
+ )
return None, f"Failed to read file: {e!s}"
else:
# Not a file path - might be base64 encoded content
try:
import base64
+
content = base64.b64decode(file_path)
return content, None
except Exception:
@@ -625,8 +640,11 @@ class ComposioService:
f"Inner data keys: {list(inner_data.keys()) if isinstance(inner_data, dict) else type(inner_data).__name__}, "
f"Full inner data: {inner_data}"
)
- return None, f"No file path in Composio response. Keys: {list(data.keys())}, inner: {list(inner_data.keys()) if isinstance(inner_data, dict) else 'N/A'}"
-
+ return (
+ None,
+ f"No file path in Composio response. Keys: {list(data.keys())}, inner: {list(inner_data.keys()) if isinstance(inner_data, dict) else 'N/A'}",
+ )
+
return None, f"Unexpected data type from Composio: {type(data).__name__}"
except Exception as e:
@@ -638,14 +656,14 @@ class ComposioService:
) -> tuple[str | None, str | None]:
"""
Get the starting page token for Google Drive change tracking.
-
+
This token represents the current state and is used for future delta syncs.
Per Composio docs: Use GOOGLEDRIVE_GET_CHANGES_START_PAGE_TOKEN to get initial token.
-
+
Args:
connected_account_id: Composio connected account ID.
entity_id: The entity/user ID that owns the connected account.
-
+
Returns:
Tuple of (start_page_token, error message).
"""
@@ -656,27 +674,27 @@ class ComposioService:
params={},
entity_id=entity_id,
)
-
+
if not result.get("success"):
return None, result.get("error", "Unknown error")
-
+
data = result.get("data", {})
# Handle nested response: {data: {startPageToken: ...}, successful: ...}
if isinstance(data, dict):
inner_data = data.get("data", data)
token = (
- inner_data.get("startPageToken") or
- inner_data.get("start_page_token") or
- data.get("startPageToken") or
- data.get("start_page_token")
+ inner_data.get("startPageToken")
+ or inner_data.get("start_page_token")
+ or data.get("startPageToken")
+ or data.get("start_page_token")
)
if token:
logger.info(f"Got Drive start page token: {token}")
return token, None
-
+
logger.warning(f"Could not extract start page token from response: {data}")
return None, "No start page token in response"
-
+
except Exception as e:
logger.error(f"Failed to get Drive start page token: {e!s}")
return None, str(e)
@@ -691,18 +709,18 @@ class ComposioService:
) -> tuple[list[dict[str, Any]], str | None, str | None]:
"""
List changes in Google Drive since the given page token.
-
+
Per Composio docs: GOOGLEDRIVE_LIST_CHANGES tracks modifications to files/folders.
If pageToken is not provided, it auto-fetches the current start page token.
Response includes nextPageToken for pagination and newStartPageToken for future syncs.
-
+
Args:
connected_account_id: Composio connected account ID.
entity_id: The entity/user ID that owns the connected account.
page_token: Page token from previous sync (optional - will auto-fetch if not provided).
page_size: Number of changes per page.
include_removed: Whether to include removed items in the response.
-
+
Returns:
Tuple of (changes list, new_start_page_token, error message).
"""
@@ -713,42 +731,44 @@ class ComposioService:
}
if page_token:
params["pageToken"] = page_token
-
+
result = await self.execute_tool(
connected_account_id=connected_account_id,
tool_name="GOOGLEDRIVE_LIST_CHANGES",
params=params,
entity_id=entity_id,
)
-
+
if not result.get("success"):
return [], None, result.get("error", "Unknown error")
-
+
data = result.get("data", {})
-
+
# Handle nested response structure
changes = []
new_start_token = None
-
+
if isinstance(data, dict):
inner_data = data.get("data", data)
changes = inner_data.get("changes", []) or data.get("changes", [])
-
+
# Get the token for next sync
# newStartPageToken is returned when all changes have been fetched
# nextPageToken is for pagination within the current fetch
new_start_token = (
- inner_data.get("newStartPageToken") or
- inner_data.get("new_start_page_token") or
- inner_data.get("nextPageToken") or
- inner_data.get("next_page_token") or
- data.get("newStartPageToken") or
- data.get("nextPageToken")
+ inner_data.get("newStartPageToken")
+ or inner_data.get("new_start_page_token")
+ or inner_data.get("nextPageToken")
+ or inner_data.get("next_page_token")
+ or data.get("newStartPageToken")
+ or data.get("nextPageToken")
)
-
- logger.info(f"Got {len(changes)} Drive changes, new token: {new_start_token[:20] if new_start_token else 'None'}...")
+
+ logger.info(
+ f"Got {len(changes)} Drive changes, new token: {new_start_token[:20] if new_start_token else 'None'}..."
+ )
return changes, new_start_token, None
-
+
except Exception as e:
logger.error(f"Failed to list Drive changes: {e!s}")
return [], None, str(e)
diff --git a/surfsense_backend/app/services/notification_service.py b/surfsense_backend/app/services/notification_service.py
index 9fcf807e7..04f39d8ef 100644
--- a/surfsense_backend/app/services/notification_service.py
+++ b/surfsense_backend/app/services/notification_service.py
@@ -385,7 +385,9 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
metadata_updates = {
"indexed_count": indexed_count,
- "sync_stage": "completed" if (not error_message or is_warning or indexed_count > 0) else "failed",
+ "sync_stage": "completed"
+ if (not error_message or is_warning or indexed_count > 0)
+ else "failed",
"error_message": error_message,
}
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
index ef1f821d2..2365ff984 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@@ -208,7 +208,7 @@ async def index_google_calendar_events(
# Use provided dates (including future dates)
start_date_str = start_date
end_date_str = end_date
-
+
# If start_date and end_date are the same, adjust end_date to be one day later
# to ensure valid date range (start_date must be strictly before end_date)
if start_date_str == end_date_str:
@@ -269,10 +269,14 @@ async def index_google_calendar_events(
# Check if this is an authentication error that requires re-authentication
error_message = error
error_type = "APIError"
- if "re-authenticate" in error.lower() or "expired or been revoked" in error.lower() or "authentication failed" in error.lower():
+ if (
+ "re-authenticate" in error.lower()
+ or "expired or been revoked" in error.lower()
+ or "authentication failed" in error.lower()
+ ):
error_message = "Google Calendar authentication failed. Please re-authenticate."
error_type = "AuthenticationError"
-
+
await task_logger.log_task_failure(
log_entry,
error_message,
@@ -290,7 +294,9 @@ async def index_google_calendar_events(
documents_indexed = 0
documents_skipped = 0
skipped_events = []
- duplicate_content_count = 0 # Track events skipped due to duplicate content_hash
+ duplicate_content_count = (
+ 0 # Track events skipped due to duplicate content_hash
+ )
for event in events:
try:
@@ -417,7 +423,7 @@ async def index_google_calendar_events(
duplicate_by_content = await check_duplicate_document_by_hash(
session, content_hash
)
-
+
if duplicate_by_content:
# A document with the same content already exists (likely from Composio connector)
logger.info(
@@ -528,7 +534,10 @@ async def index_google_calendar_events(
await session.commit()
except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.)
- if "duplicate key value violates unique constraint" in str(e).lower() or "uniqueviolationerror" in str(e).lower():
+ if (
+ "duplicate key value violates unique constraint" in str(e).lower()
+ or "uniqueviolationerror" in str(e).lower()
+ ):
logger.warning(
f"Duplicate content_hash detected during final commit. "
f"This may occur if the same event was indexed by multiple connectors. "
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
index af180c36b..f50e149d3 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -578,7 +578,7 @@ async def _check_rename_only_update(
- (True, message): Only filename changed, document was updated
- (False, None): Content changed or new file, needs full processing
"""
- from sqlalchemy import cast, select, String
+ from sqlalchemy import String, cast, select
from sqlalchemy.orm.attributes import flag_modified
from app.db import Document
@@ -603,7 +603,8 @@ async def _check_rename_only_update(
select(Document).where(
Document.search_space_id == search_space_id,
Document.document_type == DocumentType.GOOGLE_DRIVE_FILE,
- cast(Document.document_metadata["google_drive_file_id"], String) == file_id,
+ cast(Document.document_metadata["google_drive_file_id"], String)
+ == file_id,
)
)
existing_document = result.scalar_one_or_none()
@@ -755,7 +756,7 @@ async def _remove_document(session: AsyncSession, file_id: str, search_space_id:
Handles both new (file_id-based) and legacy (filename-based) hash schemes.
"""
- from sqlalchemy import cast, select, String
+ from sqlalchemy import String, cast, select
from app.db import Document
@@ -774,7 +775,8 @@ async def _remove_document(session: AsyncSession, file_id: str, search_space_id:
select(Document).where(
Document.search_space_id == search_space_id,
Document.document_type == DocumentType.GOOGLE_DRIVE_FILE,
- cast(Document.document_metadata["google_drive_file_id"], String) == file_id,
+ cast(Document.document_metadata["google_drive_file_id"], String)
+ == file_id,
)
)
existing_document = result.scalar_one_or_none()
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
index 6a3057437..08d2904d6 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@@ -173,15 +173,16 @@ async def index_google_gmail_messages(
# Check if this is an authentication error that requires re-authentication
error_message = error
error_type = "APIError"
- if "re-authenticate" in error.lower() or "expired or been revoked" in error.lower() or "authentication failed" in error.lower():
+ if (
+ "re-authenticate" in error.lower()
+ or "expired or been revoked" in error.lower()
+ or "authentication failed" in error.lower()
+ ):
error_message = "Gmail authentication failed. Please re-authenticate."
error_type = "AuthenticationError"
-
+
await task_logger.log_task_failure(
- log_entry,
- error_message,
- error,
- {"error_type": error_type}
+ log_entry, error_message, error, {"error_type": error_type}
)
return 0, error_message
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index 68a548409..293d4a243 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -18,7 +18,10 @@ import { ConnectorDialogHeader } from "./connector-popup/components/connector-di
import { ConnectorConnectView } from "./connector-popup/connector-configs/views/connector-connect-view";
import { ConnectorEditView } from "./connector-popup/connector-configs/views/connector-edit-view";
import { IndexingConfigurationView } from "./connector-popup/connector-configs/views/indexing-configuration-view";
-import { COMPOSIO_CONNECTORS, OAUTH_CONNECTORS } from "./connector-popup/constants/connector-constants";
+import {
+ COMPOSIO_CONNECTORS,
+ OAUTH_CONNECTORS,
+} from "./connector-popup/constants/connector-constants";
import { useConnectorDialog } from "./connector-popup/hooks/use-connector-dialog";
import { useIndexingConnectors } from "./connector-popup/hooks/use-indexing-connectors";
import { ActiveConnectorsTab } from "./connector-popup/tabs/active-connectors-tab";
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx
index ce5133a9d..6f282d892 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx
@@ -12,4 +12,3 @@ interface ComposioCalendarConfigProps {
export const ComposioCalendarConfig: FC = () => {
return
;
};
-
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx
index 0ab0869ff..239125565 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx
@@ -1,6 +1,14 @@
"use client";
-import { File, FileSpreadsheet, FileText, FolderClosed, Image, Presentation, X } from "lucide-react";
+import {
+ File,
+ FileSpreadsheet,
+ FileText,
+ FolderClosed,
+ Image,
+ Presentation,
+ X,
+} from "lucide-react";
import type { FC } from "react";
import { useEffect, useState } from "react";
import { ComposioDriveFolderTree } from "@/components/connectors/composio-drive-folder-tree";
@@ -85,7 +93,10 @@ function getFileIconFromName(fileName: string, className: string = "size-3.5 shr
return ;
}
-export const ComposioDriveConfig: FC = ({ connector, onConfigChange }) => {
+export const ComposioDriveConfig: FC = ({
+ connector,
+ onConfigChange,
+}) => {
const isIndexable = connector.config?.is_indexable as boolean;
// Initialize with existing selected folders and files from connector config
@@ -184,9 +195,7 @@ export const ComposioDriveConfig: FC = ({ connector, o
);
}
if (selectedFiles.length > 0) {
- parts.push(
- `${selectedFiles.length} file${selectedFiles.length > 1 ? "s" : ""}`
- );
+ parts.push(`${selectedFiles.length} file${selectedFiles.length > 1 ? "s" : ""}`);
}
return parts.length > 0 ? `(${parts.join(", ")})` : "";
})()}
@@ -329,13 +338,10 @@ export const ComposioDriveConfig: FC = ({ connector, o
- handleIndexingOptionChange("include_subfolders", checked)
- }
+ onCheckedChange={(checked) => handleIndexingOptionChange("include_subfolders", checked)}
/>
);
};
-
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx
index 4664e3e64..494e1362f 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx
@@ -12,4 +12,3 @@ interface ComposioGmailConfigProps {
export const ComposioGmailConfig: FC = () => {
return
;
};
-
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx
index b6cfb39ae..383f6ce0e 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx
@@ -1,6 +1,14 @@
"use client";
-import { File, FileSpreadsheet, FileText, FolderClosed, Image, Presentation, X } from "lucide-react";
+import {
+ File,
+ FileSpreadsheet,
+ FileText,
+ FolderClosed,
+ Image,
+ Presentation,
+ X,
+} from "lucide-react";
import type { FC } from "react";
import { useEffect, useState } from "react";
import { GoogleDriveFolderTree } from "@/components/connectors/google-drive-folder-tree";
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
index 8f58db542..5668d398e 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@@ -276,7 +276,8 @@ export const ConnectorEditView: FC = ({
Re-indexing runs in the background
- You can continue using SurfSense while we sync your data. Check inbox for updates.
+ You can continue using SurfSense while we sync your data. Check inbox for
+ updates.
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
index 019e6b37f..684f03252 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
@@ -170,13 +170,13 @@ export const IndexingConfigurationView: FC = ({
{/* Periodic sync - not shown for Google Drive (regular and Composio) */}
{config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" && (
-
- )}
+
+ )}
>
)}
@@ -189,7 +189,8 @@ export const IndexingConfigurationView: FC = ({
Indexing runs in the background
- You can continue using SurfSense while we sync your data. Check inbox for updates.
+ You can continue using SurfSense while we sync your data. Check inbox for
+ updates.
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index 9a7f15b0c..639d0f7ed 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -328,11 +328,7 @@ export const useConnectorDialog = () => {
return;
}
- if (
- params.success === "true" &&
- searchSpaceId &&
- params.modal === "connectors"
- ) {
+ if (params.success === "true" && searchSpaceId && params.modal === "connectors") {
refetchAllConnectors().then((result) => {
if (!result.data) return;
@@ -346,16 +342,12 @@ export const useConnectorDialog = () => {
if (params.connectorId) {
const connectorId = parseInt(params.connectorId, 10);
newConnector = result.data.find((c: SearchSourceConnector) => c.id === connectorId);
-
+
// If we found the connector, find the matching OAuth/Composio connector by type
if (newConnector) {
oauthConnector =
- OAUTH_CONNECTORS.find(
- (c) => c.connectorType === newConnector!.connector_type
- ) ||
- COMPOSIO_CONNECTORS.find(
- (c) => c.connectorType === newConnector!.connector_type
- );
+ OAUTH_CONNECTORS.find((c) => c.connectorType === newConnector!.connector_type) ||
+ COMPOSIO_CONNECTORS.find((c) => c.connectorType === newConnector!.connector_type);
}
}
@@ -364,7 +356,7 @@ export const useConnectorDialog = () => {
oauthConnector =
OAUTH_CONNECTORS.find((c) => c.id === params.connector) ||
COMPOSIO_CONNECTORS.find((c) => c.id === params.connector);
-
+
if (oauthConnector) {
newConnector = result.data.find(
(c: SearchSourceConnector) => c.connector_type === oauthConnector!.connectorType
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts
index 289da475d..19741e020 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts
@@ -68,9 +68,7 @@ export function useIndexingConnectors(
// Only check connector_indexing notifications
if (item.type !== "connector_indexing") continue;
- const metadata = isConnectorIndexingMetadata(item.metadata)
- ? item.metadata
- : null;
+ const metadata = isConnectorIndexingMetadata(item.metadata) ? item.metadata : null;
if (!metadata) continue;
// If status is "in_progress", add connector to indexing set
From 7ccb52dc76a45348e965369ad36e1e2314e7a0c2 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 13:18:28 +0530
Subject: [PATCH 25/51] feat: add general settings management to search space
settings
- Introduced a new GeneralSettingsManager component for managing search space name and description.
- Updated the settings navigation to include a general section with an icon and description.
- Modified the SettingsPage to default to the general settings section.
- Enhanced the CreateSearchSpaceDialog for improved UI and accessibility.
- Updated English and Chinese translations for new settings labels and descriptions.
---
.../[search_space_id]/settings/page.tsx | 13 +-
.../ui/dialogs/CreateSearchSpaceDialog.tsx | 37 ++--
.../settings/general-settings-manager.tsx | 200 ++++++++++++++++++
surfsense_web/messages/en.json | 14 +-
surfsense_web/messages/zh.json | 14 +-
5 files changed, 258 insertions(+), 20 deletions(-)
create mode 100644 surfsense_web/components/settings/general-settings-manager.tsx
diff --git a/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx
index fb2f49317..6d5787ed9 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx
@@ -10,11 +10,13 @@ import {
MessageSquare,
Settings,
X,
+ FileText,
} from "lucide-react";
import { AnimatePresence, motion } from "motion/react";
import { useParams, useRouter } from "next/navigation";
import { useTranslations } from "next-intl";
import { useCallback, useEffect, useState } from "react";
+import { GeneralSettingsManager } from "@/components/settings/general-settings-manager";
import { LLMRoleManager } from "@/components/settings/llm-role-manager";
import { ModelConfigManager } from "@/components/settings/model-config-manager";
import { PromptConfigManager } from "@/components/settings/prompt-config-manager";
@@ -30,6 +32,12 @@ interface SettingsNavItem {
}
const settingsNavItems: SettingsNavItem[] = [
+ {
+ id: "general",
+ labelKey: "nav_general",
+ descriptionKey: "nav_general_desc",
+ icon: FileText,
+ },
{
id: "models",
labelKey: "nav_agent_configs",
@@ -262,6 +270,9 @@ function SettingsContent({
ease: [0.4, 0, 0.2, 1],
}}
>
+ {activeSection === "general" && (
+
+ )}
{activeSection === "models" && }
{activeSection === "roles" && }
{activeSection === "prompts" && }
@@ -277,7 +288,7 @@ export default function SettingsPage() {
const router = useRouter();
const params = useParams();
const searchSpaceId = Number(params.search_space_id);
- const [activeSection, setActiveSection] = useState("models");
+ const [activeSection, setActiveSection] = useState("general");
const [isSidebarOpen, setIsSidebarOpen] = useState(false);
// Track settings section view
diff --git a/surfsense_web/components/layout/ui/dialogs/CreateSearchSpaceDialog.tsx b/surfsense_web/components/layout/ui/dialogs/CreateSearchSpaceDialog.tsx
index 7e962536f..427cbd31b 100644
--- a/surfsense_web/components/layout/ui/dialogs/CreateSearchSpaceDialog.tsx
+++ b/surfsense_web/components/layout/ui/dialogs/CreateSearchSpaceDialog.tsx
@@ -82,29 +82,31 @@ export function CreateSearchSpaceDialog({ open, onOpenChange }: CreateSearchSpac
return (
-
-
-
-
-
+
+
+
+
+
-
-
{t("create_title")}
-
{t("create_description")}
+
+ {t("create_title")}
+
+ {t("create_description")}
+
diff --git a/surfsense_web/components/layout/ui/sidebar/MobileSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/MobileSidebar.tsx
index 7dd01d75a..85f907611 100644
--- a/surfsense_web/components/layout/ui/sidebar/MobileSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/MobileSidebar.tsx
@@ -25,6 +25,7 @@ interface MobileSidebarProps {
onNewChat: () => void;
onChatSelect: (chat: ChatItem) => void;
onChatDelete?: (chat: ChatItem) => void;
+ onChatArchive?: (chat: ChatItem) => void;
onViewAllSharedChats?: () => void;
onViewAllPrivateChats?: () => void;
user: User;
@@ -64,6 +65,7 @@ export function MobileSidebar({
onNewChat,
onChatSelect,
onChatDelete,
+ onChatArchive,
onViewAllSharedChats,
onViewAllPrivateChats,
user,
@@ -141,6 +143,7 @@ export function MobileSidebar({
}}
onChatSelect={handleChatSelect}
onChatDelete={onChatDelete}
+ onChatArchive={onChatArchive}
onViewAllSharedChats={onViewAllSharedChats}
onViewAllPrivateChats={onViewAllPrivateChats}
user={user}
diff --git a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx
index f3452749f..d05f21096 100644
--- a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx
@@ -27,6 +27,7 @@ interface SidebarProps {
onNewChat: () => void;
onChatSelect: (chat: ChatItem) => void;
onChatDelete?: (chat: ChatItem) => void;
+ onChatArchive?: (chat: ChatItem) => void;
onViewAllSharedChats?: () => void;
onViewAllPrivateChats?: () => void;
user: User;
@@ -52,6 +53,7 @@ export function Sidebar({
onNewChat,
onChatSelect,
onChatDelete,
+ onChatArchive,
onViewAllSharedChats,
onViewAllPrivateChats,
user,
@@ -175,7 +177,9 @@ export function Sidebar({
key={chat.id}
name={chat.name}
isActive={chat.id === activeChatId}
+ archived={chat.archived}
onClick={() => onChatSelect(chat)}
+ onArchive={() => onChatArchive?.(chat)}
onDelete={() => onChatDelete?.(chat)}
/>
))}
@@ -216,7 +220,9 @@ export function Sidebar({
key={chat.id}
name={chat.name}
isActive={chat.id === activeChatId}
+ archived={chat.archived}
onClick={() => onChatSelect(chat)}
+ onArchive={() => onChatArchive?.(chat)}
onDelete={() => onChatDelete?.(chat)}
/>
))}
diff --git a/surfsense_web/components/layout/ui/sidebar/SidebarHeader.tsx b/surfsense_web/components/layout/ui/sidebar/SidebarHeader.tsx
index 5c8c89152..28c359e64 100644
--- a/surfsense_web/components/layout/ui/sidebar/SidebarHeader.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/SidebarHeader.tsx
@@ -1,6 +1,6 @@
"use client";
-import { ChevronsUpDown, ScrollText, Settings, Users } from "lucide-react";
+import { ChevronsUpDown, Logs, Settings, Users } from "lucide-react";
import { useParams, useRouter } from "next/navigation";
import { useTranslations } from "next-intl";
import { Button } from "@/components/ui/button";
@@ -57,7 +57,7 @@ export function SidebarHeader({
{t("manage_members")}
router.push(`/dashboard/${searchSpaceId}/logs`)}>
-
+
{t("logs")}
From bba3cb1cf9a48c525aabcb4b14d8324881fafa6b Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 17:03:10 +0530
Subject: [PATCH 27/51] feat: enhance ElectricProvider and Tooltip components
for improved user experience
- Updated ElectricProvider to check for user authentication via bearer token before rendering, preventing unnecessary loading screens for unauthenticated users.
- Modified TooltipProvider to include a new disableHoverableContent prop, enhancing tooltip behavior and usability.
---
surfsense_web/components/providers/ElectricProvider.tsx | 7 ++++++-
surfsense_web/components/ui/tooltip.tsx | 4 +++-
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/surfsense_web/components/providers/ElectricProvider.tsx b/surfsense_web/components/providers/ElectricProvider.tsx
index e31885973..f187d10c1 100644
--- a/surfsense_web/components/providers/ElectricProvider.tsx
+++ b/surfsense_web/components/providers/ElectricProvider.tsx
@@ -3,6 +3,7 @@
import { useAtomValue } from "jotai";
import { useEffect, useRef, useState } from "react";
import { currentUserAtom } from "@/atoms/user/user-query.atoms";
+import { getBearerToken } from "@/lib/auth-utils";
import {
cleanupElectric,
type ElectricClient,
@@ -105,9 +106,13 @@ export function ElectricProvider({ children }: ElectricProviderProps) {
};
}, [user?.id, isUserLoaded, electricClient]);
+ // Check if user is authenticated first (has bearer token)
+ // This prevents showing loading screen for unauthenticated users on homepage
+ const hasToken = typeof window !== "undefined" && !!getBearerToken();
+
// For non-authenticated pages (like landing page), render immediately with null context
// Also render immediately if user query failed (e.g., token expired)
- if (!isUserLoaded || !user?.id || isUserError) {
+ if (!hasToken || !isUserLoaded || !user?.id || isUserError) {
return {children} ;
}
diff --git a/surfsense_web/components/ui/tooltip.tsx b/surfsense_web/components/ui/tooltip.tsx
index 871ce9ed5..356467a8a 100644
--- a/surfsense_web/components/ui/tooltip.tsx
+++ b/surfsense_web/components/ui/tooltip.tsx
@@ -7,12 +7,14 @@ import { cn } from "@/lib/utils";
function TooltipProvider({
delayDuration = 0,
+ disableHoverableContent = true,
...props
}: React.ComponentProps) {
return (
);
@@ -42,7 +44,7 @@ function TooltipContent({
data-slot="tooltip-content"
sideOffset={sideOffset}
className={cn(
- "bg-black text-white font-medium shadow-xl px-3 py-1.5 dark:bg-zinc-800 dark:text-zinc-50 border-none animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-fit rounded-md text-xs text-balance",
+ "bg-black text-white font-medium shadow-xl px-3 py-1.5 dark:bg-zinc-800 dark:text-zinc-50 border-none animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-fit rounded-md text-xs text-balance pointer-events-none",
className
)}
{...props}
From 22bd5e0f392db6d91a724c8901779c66c2f61e24 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 19:42:07 +0530
Subject: [PATCH 28/51] feat: implement unified loading screens across various
components
- Introduced a new UnifiedLoadingScreen component for consistent loading indicators in the application.
- Replaced existing loading implementations in LoginPage, AuthCallbackPage, DashboardLayout, and other components with the new unified loading screen.
- Updated translations for loading messages to enhance user experience and clarity.
- Improved loading states in the ElectricProvider and TokenHandler components to utilize the new loading screen, ensuring a cohesive look and feel during loading processes.
---
surfsense_web/app/(home)/login/page.tsx | 37 +++-------
surfsense_web/app/auth/callback/loading.tsx | 19 +++++
surfsense_web/app/auth/callback/page.tsx | 29 ++++----
.../[search_space_id]/client-layout.tsx | 23 ++----
.../new-chat/[[...chat_id]]/page.tsx | 8 ++-
surfsense_web/app/dashboard/layout.tsx | 19 ++---
surfsense_web/app/dashboard/loading.tsx | 21 ++++++
surfsense_web/app/dashboard/page.tsx | 39 ++--------
surfsense_web/components/TokenHandler.tsx | 7 +-
.../new-chat/source-detail-panel.tsx | 15 ++--
.../components/providers/ElectricProvider.tsx | 7 +-
.../components/ui/unified-loading-screen.tsx | 72 +++++++++++++++++++
surfsense_web/messages/en.json | 19 +++--
surfsense_web/messages/zh.json | 17 +++--
14 files changed, 191 insertions(+), 141 deletions(-)
create mode 100644 surfsense_web/app/auth/callback/loading.tsx
create mode 100644 surfsense_web/app/dashboard/loading.tsx
create mode 100644 surfsense_web/components/ui/unified-loading-screen.tsx
diff --git a/surfsense_web/app/(home)/login/page.tsx b/surfsense_web/app/(home)/login/page.tsx
index 7aade8427..0026fdcd0 100644
--- a/surfsense_web/app/(home)/login/page.tsx
+++ b/surfsense_web/app/(home)/login/page.tsx
@@ -1,12 +1,12 @@
"use client";
-import { Loader2 } from "lucide-react";
import { AnimatePresence, motion } from "motion/react";
import { useSearchParams } from "next/navigation";
import { useTranslations } from "next-intl";
import { Suspense, useEffect, useState } from "react";
import { toast } from "sonner";
import { Logo } from "@/components/Logo";
+import { UnifiedLoadingScreen } from "@/components/ui/unified-loading-screen";
import { getAuthErrorDetails, shouldRetry } from "@/lib/auth-errors";
import { AUTH_TYPE } from "@/lib/env-config";
import { AmbientBackground } from "./AmbientBackground";
@@ -59,7 +59,11 @@ function LoginContent() {
});
// Show toast with conditional retry action
- const toastOptions: any = {
+ const toastOptions: {
+ description: string;
+ duration: number;
+ action?: { label: string; onClick: () => void };
+ } = {
description: errorDescription,
duration: 6000,
};
@@ -90,18 +94,7 @@ function LoginContent() {
// Show loading state while determining auth type
if (isLoading) {
- return (
-
-
-
-
-
-
- {tCommon("loading")}
-
-
-
- );
+ return ;
}
if (authType === "GOOGLE") {
@@ -182,23 +175,9 @@ function LoginContent() {
);
}
-// Loading fallback for Suspense
-const LoadingFallback = () => (
-
-);
-
export default function LoginPage() {
return (
- }>
+ }>
);
diff --git a/surfsense_web/app/auth/callback/loading.tsx b/surfsense_web/app/auth/callback/loading.tsx
new file mode 100644
index 000000000..24d1891b4
--- /dev/null
+++ b/surfsense_web/app/auth/callback/loading.tsx
@@ -0,0 +1,19 @@
+import { useTranslations } from "next-intl";
+import { Spinner } from "@/components/ui/spinner";
+
+export default function AuthCallbackLoading() {
+ const t = useTranslations("auth");
+ return (
+
+
+
+
+
+
+ {t("processing_authentication")}
+
+
+
+ );
+}
+
diff --git a/surfsense_web/app/auth/callback/page.tsx b/surfsense_web/app/auth/callback/page.tsx
index da868c316..920581654 100644
--- a/surfsense_web/app/auth/callback/page.tsx
+++ b/surfsense_web/app/auth/callback/page.tsx
@@ -1,23 +1,20 @@
+"use client";
+
import { Suspense } from "react";
+import { useTranslations } from "next-intl";
+import { UnifiedLoadingScreen } from "@/components/ui/unified-loading-screen";
import TokenHandler from "@/components/TokenHandler";
export default function AuthCallbackPage() {
+ const t = useTranslations("auth");
+
return (
-
-
Authentication Callback
-
-
-
- }
- >
-
-
-
+
}>
+
+
);
}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx
index bbafa9703..0c0b0cbc2 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx
@@ -1,7 +1,6 @@
"use client";
import { useAtomValue, useSetAtom } from "jotai";
-import { Loader2 } from "lucide-react";
import { useParams, usePathname, useRouter } from "next/navigation";
import { useTranslations } from "next-intl";
import type React from "react";
@@ -19,6 +18,7 @@ import { DashboardBreadcrumb } from "@/components/dashboard-breadcrumb";
import { LayoutDataProvider } from "@/components/layout";
import { OnboardingTour } from "@/components/onboarding-tour";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { UnifiedLoadingScreen } from "@/components/ui/unified-loading-screen";
export function DashboardClientLayout({
children,
@@ -153,23 +153,10 @@ export function DashboardClientLayout({
isAutoConfiguring
) {
return (
-
-
-
-
- {isAutoConfiguring ? "Setting up AI..." : t("loading_config")}
-
-
- {isAutoConfiguring
- ? "Auto-configuring with available settings"
- : t("checking_llm_prefs")}
-
-
-
-
-
-
-
+
);
}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
index 59e7878c4..4509a44a7 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@@ -9,6 +9,7 @@ import {
import { useQueryClient } from "@tanstack/react-query";
import { useAtomValue, useSetAtom } from "jotai";
import { useParams, useSearchParams } from "next/navigation";
+import { useTranslations } from "next-intl";
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { toast } from "sonner";
import { z } from "zod";
@@ -34,6 +35,7 @@ import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast";
import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview";
import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage";
import { RecallMemoryToolUI, SaveMemoryToolUI } from "@/components/tool-ui/user-memory";
+import { Spinner } from "@/components/ui/spinner";
import { useChatSessionStateSync } from "@/hooks/use-chat-session-state";
import { useMessagesElectric } from "@/hooks/use-messages-electric";
// import { WriteTodosToolUI } from "@/components/tool-ui/write-todos";
@@ -236,6 +238,7 @@ interface ThinkingStepData {
}
export default function NewChatPage() {
+ const t = useTranslations("dashboard");
const params = useParams();
const queryClient = useQueryClient();
const [isInitializing, setIsInitializing] = useState(true);
@@ -1475,8 +1478,9 @@ export default function NewChatPage() {
// Show loading state only when loading an existing thread
if (isInitializing) {
return (
-
-
Loading chat...
+
);
}
diff --git a/surfsense_web/app/dashboard/layout.tsx b/surfsense_web/app/dashboard/layout.tsx
index 71cd6275f..8ffef229c 100644
--- a/surfsense_web/app/dashboard/layout.tsx
+++ b/surfsense_web/app/dashboard/layout.tsx
@@ -1,8 +1,8 @@
"use client";
-import { Loader2 } from "lucide-react";
import { useEffect, useState } from "react";
-import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { useTranslations } from "next-intl";
+import { UnifiedLoadingScreen } from "@/components/ui/unified-loading-screen";
import { getBearerToken, redirectToLogin } from "@/lib/auth-utils";
interface DashboardLayoutProps {
@@ -10,6 +10,7 @@ interface DashboardLayoutProps {
}
export default function DashboardLayout({ children }: DashboardLayoutProps) {
+ const t = useTranslations("dashboard");
const [isCheckingAuth, setIsCheckingAuth] = useState(true);
useEffect(() => {
@@ -25,19 +26,7 @@ export default function DashboardLayout({ children }: DashboardLayoutProps) {
// Show loading screen while checking authentication
if (isCheckingAuth) {
- return (
-
-
-
- Loading Dashboard
- Checking authentication...
-
-
-
-
-
-
- );
+ return
;
}
return (
diff --git a/surfsense_web/app/dashboard/loading.tsx b/surfsense_web/app/dashboard/loading.tsx
new file mode 100644
index 000000000..b18c5dd75
--- /dev/null
+++ b/surfsense_web/app/dashboard/loading.tsx
@@ -0,0 +1,21 @@
+"use client";
+
+import { useTranslations } from "next-intl";
+import { Spinner } from "@/components/ui/spinner";
+
+export default function DashboardLoading() {
+ const t = useTranslations("common");
+ return (
+
+
+
+
+
+
+ {t("loading")}
+
+
+
+ );
+}
+
diff --git a/surfsense_web/app/dashboard/page.tsx b/surfsense_web/app/dashboard/page.tsx
index 767ce5201..440f37123 100644
--- a/surfsense_web/app/dashboard/page.tsx
+++ b/surfsense_web/app/dashboard/page.tsx
@@ -1,7 +1,7 @@
"use client";
import { useAtomValue } from "jotai";
-import { AlertCircle, Loader2, Plus, Search } from "lucide-react";
+import { AlertCircle, Plus, Search } from "lucide-react";
import { motion } from "motion/react";
import { useRouter } from "next/navigation";
import { useTranslations } from "next-intl";
@@ -18,37 +18,7 @@ import {
CardHeader,
CardTitle,
} from "@/components/ui/card";
-
-function LoadingScreen() {
- const t = useTranslations("dashboard");
- return (
-
-
-
-
- {t("loading")}
- {t("fetching_spaces")}
-
-
-
-
-
-
-
- {t("may_take_moment")}
-
-
-
-
- );
-}
+import { UnifiedLoadingScreen } from "@/components/ui/unified-loading-screen";
function ErrorScreen({ message }: { message: string }) {
const t = useTranslations("dashboard");
@@ -121,6 +91,7 @@ export default function DashboardPage() {
const router = useRouter();
const [showCreateDialog, setShowCreateDialog] = useState(false);
+ const t = useTranslations("dashboard");
const { data: searchSpaces = [], isLoading, error } = useAtomValue(searchSpacesAtom);
useEffect(() => {
@@ -131,11 +102,11 @@ export default function DashboardPage() {
}
}, [isLoading, searchSpaces, router]);
- if (isLoading) return
;
+ if (isLoading) return
;
if (error) return
;
if (searchSpaces.length > 0) {
- return
;
+ return
;
}
return (
diff --git a/surfsense_web/components/TokenHandler.tsx b/surfsense_web/components/TokenHandler.tsx
index b4ca36298..3f98451ef 100644
--- a/surfsense_web/components/TokenHandler.tsx
+++ b/surfsense_web/components/TokenHandler.tsx
@@ -1,7 +1,9 @@
"use client";
import { useSearchParams } from "next/navigation";
+import { useTranslations } from "next-intl";
import { useEffect } from "react";
+import { UnifiedLoadingScreen } from "@/components/ui/unified-loading-screen";
import { getAndClearRedirectPath, setBearerToken } from "@/lib/auth-utils";
import { trackLoginSuccess } from "@/lib/posthog/events";
@@ -25,6 +27,7 @@ const TokenHandler = ({
tokenParamName = "token",
storageKey = "surfsense_bearer_token",
}: TokenHandlerProps) => {
+ const t = useTranslations("auth");
const searchParams = useSearchParams();
useEffect(() => {
@@ -67,9 +70,7 @@ const TokenHandler = ({
}, [searchParams, tokenParamName, storageKey, redirectPath]);
return (
-
-
Processing authentication...
-
+
);
};
diff --git a/surfsense_web/components/new-chat/source-detail-panel.tsx b/surfsense_web/components/new-chat/source-detail-panel.tsx
index 08cff8380..1cac73a01 100644
--- a/surfsense_web/components/new-chat/source-detail-panel.tsx
+++ b/surfsense_web/components/new-chat/source-detail-panel.tsx
@@ -7,11 +7,11 @@ import {
ExternalLink,
FileText,
Hash,
- Loader2,
Sparkles,
X,
} from "lucide-react";
import { AnimatePresence, motion, useReducedMotion } from "motion/react";
+import { useTranslations } from "next-intl";
import type React from "react";
import { forwardRef, type ReactNode, useCallback, useEffect, useRef, useState } from "react";
import { createPortal } from "react-dom";
@@ -20,6 +20,7 @@ import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible";
import { ScrollArea } from "@/components/ui/scroll-area";
+import { Spinner } from "@/components/ui/spinner";
import type {
GetDocumentByChunkResponse,
GetSurfsenseDocsByChunkResponse,
@@ -63,7 +64,7 @@ interface ChunkCardProps {
}
const ChunkCard = forwardRef
(
- ({ chunk, index, totalChunks, isCited, isActive, disableLayoutAnimation }, ref) => {
+ ({ chunk, index, totalChunks, isCited }, ref) => {
return (
(null);
const hasScrolledRef = useRef(false); // Use ref to avoid stale closures
const [summaryOpen, setSummaryOpen] = useState(false);
const [activeChunkIndex, setActiveChunkIndex] = useState
(null);
const [mounted, setMounted] = useState(false);
- const [hasScrolledToCited, setHasScrolledToCited] = useState(false);
+ const [_hasScrolledToCited, setHasScrolledToCited] = useState(false);
const shouldReduceMotion = useReducedMotion();
useEffect(() => {
@@ -382,11 +384,8 @@ export function SourceDetailPanel({
animate={{ opacity: 1, scale: 1 }}
className="flex flex-col items-center gap-4"
>
-
- Loading document
+
+ {t("loading_document")}
)}
diff --git a/surfsense_web/components/providers/ElectricProvider.tsx b/surfsense_web/components/providers/ElectricProvider.tsx
index f187d10c1..68263b036 100644
--- a/surfsense_web/components/providers/ElectricProvider.tsx
+++ b/surfsense_web/components/providers/ElectricProvider.tsx
@@ -1,8 +1,10 @@
"use client";
import { useAtomValue } from "jotai";
+import { useTranslations } from "next-intl";
import { useEffect, useRef, useState } from "react";
import { currentUserAtom } from "@/atoms/user/user-query.atoms";
+import { UnifiedLoadingScreen } from "@/components/ui/unified-loading-screen";
import { getBearerToken } from "@/lib/auth-utils";
import {
cleanupElectric,
@@ -28,6 +30,7 @@ interface ElectricProviderProps {
* 5. Provides client via context - hooks should use useElectricClient()
*/
export function ElectricProvider({ children }: ElectricProviderProps) {
+ const t = useTranslations("common");
const [electricClient, setElectricClient] = useState(null);
const [error, setError] = useState(null);
const {
@@ -120,9 +123,7 @@ export function ElectricProvider({ children }: ElectricProviderProps) {
if (!electricClient && !error) {
return (
-
+
);
}
diff --git a/surfsense_web/components/ui/unified-loading-screen.tsx b/surfsense_web/components/ui/unified-loading-screen.tsx
new file mode 100644
index 000000000..7ed6b83d9
--- /dev/null
+++ b/surfsense_web/components/ui/unified-loading-screen.tsx
@@ -0,0 +1,72 @@
+"use client";
+
+import { useEffect, useState } from "react";
+import { createPortal } from "react-dom";
+import { Logo } from "@/components/Logo";
+import { Spinner } from "@/components/ui/spinner";
+import { AmbientBackground } from "@/app/(home)/login/AmbientBackground";
+
+interface UnifiedLoadingScreenProps {
+ /** Optional message to display below the spinner */
+ message?: string;
+ /** Visual style variant */
+ variant?: "login" | "default";
+}
+
+export function UnifiedLoadingScreen({
+ message,
+ variant = "default",
+}: UnifiedLoadingScreenProps) {
+ const [mounted, setMounted] = useState(false);
+
+ useEffect(() => {
+ setMounted(true);
+ }, []);
+
+ // Fixed-size container to prevent layout shifts
+ const spinnerContainer = (
+
+
+
+ );
+
+ const content = variant === "login" ? (
+
+
+
+
+
+ {spinnerContainer}
+ {message && (
+
+ {message}
+
+ )}
+
+
+
+ ) : (
+
+
+ {spinnerContainer}
+ {message && (
+
+ {message}
+
+ )}
+
+
+ );
+
+ // Render inline during SSR, use portal after mounting
+ // This prevents the black flash during initial render
+ if (!mounted) {
+ return content;
+ }
+
+ return createPortal(content, document.body);
+}
+
diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json
index 108d93262..59b9cf56a 100644
--- a/surfsense_web/messages/en.json
+++ b/surfsense_web/messages/en.json
@@ -2,7 +2,8 @@
"common": {
"app_name": "SurfSense",
"welcome": "Welcome",
- "loading": "Loading...",
+ "loading": "Loading",
+ "initializing": "Initializing",
"save": "Save",
"cancel": "Cancel",
"delete": "Delete",
@@ -76,9 +77,10 @@
"passwords_no_match": "Passwords do not match",
"password_mismatch": "Password Mismatch",
"passwords_no_match_desc": "The passwords you entered do not match",
- "creating_account": "Creating your account...",
- "creating_account_btn": "Creating account...",
- "redirecting_login": "Redirecting to login page..."
+ "creating_account": "Creating your account",
+ "creating_account_btn": "Creating account",
+ "redirecting_login": "Redirecting to login page",
+ "processing_authentication": "Processing authentication"
},
"searchSpace": {
"create_title": "Create Search Space",
@@ -143,12 +145,15 @@
"api_keys": "API Keys",
"profile": "Profile",
"loading_dashboard": "Loading Dashboard",
- "checking_auth": "Checking authentication...",
+ "checking_auth": "Checking authentication",
"loading_config": "Loading Configuration",
- "checking_llm_prefs": "Checking your LLM preferences...",
+ "checking_llm_prefs": "Checking your LLM preferences",
+ "setting_up_ai": "Setting up AI",
"config_error": "Configuration Error",
"failed_load_llm_config": "Failed to load your LLM configuration",
"error_loading_chats": "Error loading chats",
+ "loading_chat": "Loading chat",
+ "loading_document": "Loading document",
"no_recent_chats": "No recent chats",
"error_loading_space": "Error loading search space",
"unknown_search_space": "Unknown Search Space",
@@ -165,7 +170,7 @@
"create_search_space": "Create Search Space",
"add_new_search_space": "Add New Search Space",
"loading": "Loading",
- "fetching_spaces": "Fetching your search spaces...",
+ "fetching_spaces": "Fetching your search spaces",
"may_take_moment": "This may take a moment",
"error": "Error",
"something_wrong": "Something went wrong",
diff --git a/surfsense_web/messages/zh.json b/surfsense_web/messages/zh.json
index 51e378bb2..3c4b6cf34 100644
--- a/surfsense_web/messages/zh.json
+++ b/surfsense_web/messages/zh.json
@@ -3,6 +3,7 @@
"app_name": "SurfSense",
"welcome": "欢迎",
"loading": "加载中...",
+ "initializing": "正在初始化",
"save": "保存",
"cancel": "取消",
"delete": "删除",
@@ -76,9 +77,10 @@
"passwords_no_match": "密码不匹配",
"password_mismatch": "密码不匹配",
"passwords_no_match_desc": "您输入的密码不一致",
- "creating_account": "正在创建您的账户...",
- "creating_account_btn": "创建中...",
- "redirecting_login": "正在跳转到登录页面..."
+ "creating_account": "正在创建您的账户",
+ "creating_account_btn": "创建中",
+ "redirecting_login": "正在跳转到登录页面",
+ "processing_authentication": "正在处理身份验证"
},
"searchSpace": {
"create_title": "创建搜索空间",
@@ -128,12 +130,15 @@
"api_keys": "API 密钥",
"profile": "个人资料",
"loading_dashboard": "正在加载仪表盘",
- "checking_auth": "正在检查身份验证...",
+ "checking_auth": "正在检查身份验证",
"loading_config": "正在加载配置",
- "checking_llm_prefs": "正在检查您的 LLM 偏好设置...",
+ "checking_llm_prefs": "正在检查您的 LLM 偏好设置",
+ "setting_up_ai": "正在设置 AI",
"config_error": "配置错误",
"failed_load_llm_config": "无法加载您的 LLM 配置",
"error_loading_chats": "加载对话失败",
+ "loading_chat": "正在加载对话",
+ "loading_document": "正在加载文档",
"no_recent_chats": "暂无最近对话",
"error_loading_space": "加载搜索空间失败",
"unknown_search_space": "未知搜索空间",
@@ -150,7 +155,7 @@
"create_search_space": "创建搜索空间",
"add_new_search_space": "添加新的搜索空间",
"loading": "加载中",
- "fetching_spaces": "正在获取您的搜索空间...",
+ "fetching_spaces": "正在获取您的搜索空间",
"may_take_moment": "这可能需要一些时间",
"error": "错误",
"something_wrong": "出现错误",
From ce9e3b01b7957bbd34bf0907688ad7af1b509826 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 19:53:56 +0530
Subject: [PATCH 29/51] feat: expand onboarding tour with inbox step and update
tooltip positioning
- Added a new onboarding tour step for the inbox, guiding users to view mentions and notifications.
- Updated tooltip positioning logic to accommodate the new inbox step, ensuring proper alignment during the tour.
- Enhanced the check for required elements to include the inbox step, improving the tour initiation process.
---
.../layout/ui/sidebar/NavSection.tsx | 4 +++-
surfsense_web/components/onboarding-tour.tsx | 18 +++++++++++++-----
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/surfsense_web/components/layout/ui/sidebar/NavSection.tsx b/surfsense_web/components/layout/ui/sidebar/NavSection.tsx
index 742a27bbc..dc730bc4a 100644
--- a/surfsense_web/components/layout/ui/sidebar/NavSection.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/NavSection.tsx
@@ -20,7 +20,9 @@ export function NavSection({ items, onItemClick, isCollapsed = false }: NavSecti
const joyrideAttr =
item.title === "Documents" || item.title.toLowerCase().includes("documents")
? { "data-joyride": "documents-sidebar" }
- : {};
+ : item.title === "Inbox" || item.title.toLowerCase().includes("inbox")
+ ? { "data-joyride": "inbox-sidebar" }
+ : {};
if (isCollapsed) {
return (
diff --git a/surfsense_web/components/onboarding-tour.tsx b/surfsense_web/components/onboarding-tour.tsx
index 717a27607..12773c932 100644
--- a/surfsense_web/components/onboarding-tour.tsx
+++ b/surfsense_web/components/onboarding-tour.tsx
@@ -32,6 +32,12 @@ const TOUR_STEPS: TourStep[] = [
content: "Access and manage all your uploaded documents.",
placement: "right",
},
+ {
+ target: '[data-joyride="inbox-sidebar"]',
+ title: "Check your inbox",
+ content: "View mentions and notifications in one place.",
+ placement: "right",
+ },
];
interface TooltipPosition {
@@ -188,14 +194,15 @@ function TourTooltip({
const getPointerStyles = (): React.CSSProperties => {
const lineLength = 16;
const dotSize = 6;
- // Check if this is the documents step (stepIndex === 1)
+ // Check if this is the documents step (stepIndex === 1) or inbox step (stepIndex === 2)
const isDocumentsStep = stepIndex === 1;
+ const isInboxStep = stepIndex === 2;
if (position.pointerPosition === "left") {
return {
position: "absolute",
left: -lineLength - dotSize,
- top: isDocumentsStep ? "calc(50% - 8px)" : "50%",
+ top: isDocumentsStep || isInboxStep ? "calc(50% - 8px)" : "50%",
transform: "translateY(-50%)",
display: "flex",
alignItems: "center",
@@ -518,12 +525,13 @@ export function OnboardingTour() {
// User is new and hasn't seen tour - wait for DOM elements and start tour
const checkAndStartTour = () => {
- // Check if both required elements exist
+ // Check if all required elements exist
const connectorEl = document.querySelector(TOUR_STEPS[0].target);
const documentsEl = document.querySelector(TOUR_STEPS[1].target);
+ const inboxEl = document.querySelector(TOUR_STEPS[2].target);
- if (connectorEl && documentsEl) {
- // Both elements found, start tour
+ if (connectorEl && documentsEl && inboxEl) {
+ // All elements found, start tour
setIsActive(true);
setTargetEl(connectorEl);
setSpotlightTargetEl(connectorEl);
From eaecc091e331207ec2649dd3d913d9da8b302fe1 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 20:05:42 +0530
Subject: [PATCH 30/51] feat: add check icon to active items in sidebar
navigation and user profile
- Introduced a Check icon to visually indicate active items in the NavSection and SidebarUserProfile components.
- Updated styling to ensure the Check icon appears correctly without altering the background color for active items.
- Enhanced user experience by providing a clear visual cue for selected themes and languages in the sidebar.
---
.../layout/ui/sidebar/NavSection.tsx | 13 ++--
.../layout/ui/sidebar/SidebarUserProfile.tsx | 60 +++++++++++--------
2 files changed, 45 insertions(+), 28 deletions(-)
diff --git a/surfsense_web/components/layout/ui/sidebar/NavSection.tsx b/surfsense_web/components/layout/ui/sidebar/NavSection.tsx
index dc730bc4a..d58f52612 100644
--- a/surfsense_web/components/layout/ui/sidebar/NavSection.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/NavSection.tsx
@@ -1,5 +1,6 @@
"use client";
+import { Check } from "lucide-react";
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
import { cn } from "@/lib/utils";
import type { NavItem } from "../../types/layout.types";
@@ -34,12 +35,14 @@ export function NavSection({ items, onItemClick, isCollapsed = false }: NavSecti
className={cn(
"relative flex h-10 w-10 items-center justify-center rounded-md transition-colors",
"hover:bg-accent hover:text-accent-foreground",
- "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring",
- item.isActive && "bg-accent text-accent-foreground"
+ "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring"
)}
{...joyrideAttr}
>
+ {item.isActive && (
+
+ )}
{item.badge && (
{item.badge}
@@ -64,13 +67,15 @@ export function NavSection({ items, onItemClick, isCollapsed = false }: NavSecti
className={cn(
"flex items-center gap-2 rounded-md mx-2 px-2 py-1.5 text-sm transition-colors text-left",
"hover:bg-accent hover:text-accent-foreground",
- "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring",
- item.isActive && "bg-accent text-accent-foreground"
+ "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring"
)}
{...joyrideAttr}
>
{item.title}
+ {item.isActive && (
+
+ )}
{item.badge && (
{item.badge}
diff --git a/surfsense_web/components/layout/ui/sidebar/SidebarUserProfile.tsx b/surfsense_web/components/layout/ui/sidebar/SidebarUserProfile.tsx
index 982d88e8b..66fe24565 100644
--- a/surfsense_web/components/layout/ui/sidebar/SidebarUserProfile.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/SidebarUserProfile.tsx
@@ -1,6 +1,6 @@
"use client";
-import { ChevronUp, Languages, Laptop, LogOut, Moon, Settings, Sun } from "lucide-react";
+import { Check, ChevronUp, Languages, Laptop, LogOut, Moon, Settings, Sun } from "lucide-react";
import { useTranslations } from "next-intl";
import {
DropdownMenu,
@@ -197,11 +197,14 @@ export function SidebarUserProfile({
className={cn(
"mb-1 last:mb-0 transition-all",
"hover:bg-accent/50",
- isSelected && "bg-accent/80"
+ isSelected && "text-primary"
)}
>
{t(themeOption.value)}
+ {isSelected && (
+
+ )}
);
})}
@@ -226,11 +229,14 @@ export function SidebarUserProfile({
className={cn(
"mb-1 last:mb-0 transition-all",
"hover:bg-accent/50",
- isSelected && "bg-accent/80"
+ isSelected && "text-primary"
)}
>
{language.flag}
{language.name}
+ {isSelected && (
+
+ )}
);
})}
@@ -302,26 +308,29 @@ export function SidebarUserProfile({
{t("theme")}
-
- {THEMES.map((themeOption) => {
- const Icon = themeOption.icon;
- const isSelected = theme === themeOption.value;
- return (
- handleThemeChange(themeOption.value)}
- className={cn(
- "mb-1 last:mb-0 transition-all",
- "hover:bg-accent/50",
- isSelected && "bg-accent/80"
- )}
- >
-
- {t(themeOption.value)}
-
- );
- })}
-
+
+ {THEMES.map((themeOption) => {
+ const Icon = themeOption.icon;
+ const isSelected = theme === themeOption.value;
+ return (
+ handleThemeChange(themeOption.value)}
+ className={cn(
+ "mb-1 last:mb-0 transition-all",
+ "hover:bg-accent/50",
+ isSelected && "text-primary"
+ )}
+ >
+
+ {t(themeOption.value)}
+ {isSelected && (
+
+ )}
+
+ );
+ })}
+
)}
@@ -342,11 +351,14 @@ export function SidebarUserProfile({
className={cn(
"mb-1 last:mb-0 transition-all",
"hover:bg-accent/50",
- isSelected && "bg-accent/80"
+ isSelected && "text-primary"
)}
>
{language.flag}
{language.name}
+ {isSelected && (
+
+ )}
);
})}
From de08f0644fb52a8c8bd9d5e09feea3c39fb9626d Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 20:51:46 +0530
Subject: [PATCH 31/51] feat: add ordering to search spaces retrieval for
consistent display
---
surfsense_backend/app/routes/search_spaces_routes.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py
index bc52a52b1..147f515b3 100644
--- a/surfsense_backend/app/routes/search_spaces_routes.py
+++ b/surfsense_backend/app/routes/search_spaces_routes.py
@@ -129,6 +129,7 @@ async def read_search_spaces(
result = await session.execute(
select(SearchSpace)
.filter(SearchSpace.user_id == user.id)
+ .order_by(SearchSpace.id.asc())
.offset(skip)
.limit(limit)
)
@@ -138,6 +139,7 @@ async def read_search_spaces(
select(SearchSpace)
.join(SearchSpaceMembership)
.filter(SearchSpaceMembership.user_id == user.id)
+ .order_by(SearchSpace.id.asc())
.offset(skip)
.limit(limit)
)
From 9215118bab94e21a6d025f5c71d2dad965128593 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 22:43:04 +0530
Subject: [PATCH 32/51] feat: enhance documentation and fixed bug in
`loading.tsx`
---
surfsense_web/app/auth/callback/loading.tsx | 2 +
.../components/bookstack-connect-form.tsx | 124 +--------------
.../components/luma-connect-form.tsx | 131 ----------------
.../components/obsidian-connect-form.tsx | 145 ------------------
.../content/docs/connectors/airtable.mdx | 12 +-
.../content/docs/connectors/bookstack.mdx | 68 +++++++-
.../content/docs/connectors/circleback.mdx | 129 +++++++++++++++-
.../content/docs/connectors/clickup.mdx | 4 +-
.../content/docs/connectors/confluence.mdx | 20 ++-
.../content/docs/connectors/discord.mdx | 14 +-
.../content/docs/connectors/elasticsearch.mdx | 113 +++++++++++++-
.../content/docs/connectors/github.mdx | 40 ++---
.../content/docs/connectors/gmail.mdx | 4 +-
.../docs/connectors/google-calendar.mdx | 4 +-
.../content/docs/connectors/google-drive.mdx | 4 +-
.../content/docs/connectors/jira.mdx | 16 +-
.../content/docs/connectors/linear.mdx | 6 +-
.../content/docs/connectors/luma.mdx | 66 +++++++-
.../content/docs/connectors/meta.json | 3 +-
.../docs/connectors/microsoft-teams.mdx | 8 +-
.../content/docs/connectors/notion.mdx | 13 +-
.../content/docs/connectors/obsidian.mdx | 143 +++++++++++++++++
.../content/docs/connectors/slack.mdx | 8 +-
23 files changed, 614 insertions(+), 463 deletions(-)
create mode 100644 surfsense_web/content/docs/connectors/obsidian.mdx
diff --git a/surfsense_web/app/auth/callback/loading.tsx b/surfsense_web/app/auth/callback/loading.tsx
index 24d1891b4..676ca3632 100644
--- a/surfsense_web/app/auth/callback/loading.tsx
+++ b/surfsense_web/app/auth/callback/loading.tsx
@@ -1,3 +1,5 @@
+"use client";
+
import { useTranslations } from "next-intl";
import { Spinner } from "@/components/ui/spinner";
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/bookstack-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/bookstack-connect-form.tsx
index 2b7123d78..789e23787 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/bookstack-connect-form.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/bookstack-connect-form.tsx
@@ -6,12 +6,6 @@ import type { FC } from "react";
import { useRef, useState } from "react";
import { useForm } from "react-hook-form";
import * as z from "zod";
-import {
- Accordion,
- AccordionContent,
- AccordionItem,
- AccordionTrigger,
-} from "@/components/ui/accordion";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import {
Form,
@@ -85,6 +79,7 @@ export const BookStackConnectForm: FC = ({ onSubmit, isSubmitt
BOOKSTACK_TOKEN_SECRET: values.token_secret,
},
is_indexable: true,
+ is_active: true,
last_indexed_at: null,
periodic_indexing_enabled: periodicEnabled,
indexing_frequency_minutes: periodicEnabled ? parseInt(frequencyMinutes, 10) : null,
@@ -302,123 +297,6 @@ export const BookStackConnectForm: FC = ({ onSubmit, isSubmitt
)}
- {/* Documentation Section */}
-
-
-
- Documentation
-
-
-
-
How it works
-
- The BookStack connector uses the BookStack REST API to fetch all pages from your
- BookStack instance that your account has access to.
-
-
-
- For follow up indexing runs, the connector retrieves pages that have been updated
- since the last indexing attempt.
-
-
- Indexing is configured to run periodically, so updates should appear in your
- search results within minutes.
-
-
-
-
-
-
-
Authorization
-
-
- API Token Required
-
- You need to create an API token from your BookStack instance. The token requires
- "Access System API" permission.
-
-
-
-
-
-
- Step 1: Create an API Token
-
-
- Log in to your BookStack instance
- Click on your profile icon → Edit Profile
- Navigate to the "API Tokens" tab
- Click "Create Token" and give it a name
- Copy both the Token ID and Token Secret
- Paste them in the form above
-
-
-
-
-
- Step 2: Grant necessary access
-
-
- Your user account must have "Access System API" permission. The connector will
- only index content your account can view.
-
-
-
- Rate Limiting
-
- BookStack API has a rate limit of 180 requests per minute. The connector
- automatically handles rate limiting to ensure reliable indexing.
-
-
-
-
-
-
-
-
-
-
Indexing
-
-
- Navigate to the Connector Dashboard and select the BookStack {" "}
- Connector.
-
-
- Enter your BookStack Instance URL (e.g.,
- https://docs.example.com)
-
-
- Enter your Token ID and Token Secret from your
- BookStack API token.
-
-
- Click Connect to establish the connection.
-
- Once connected, your BookStack pages will be indexed automatically.
-
-
-
-
- What Gets Indexed
-
- The BookStack connector indexes the following data:
-
- All pages from your BookStack instance
- Page content in Markdown format
- Page titles and metadata
- Book and chapter hierarchy information
-
-
-
-
-
-
-
-
);
};
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/luma-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/luma-connect-form.tsx
index daa2e2615..2804dbba8 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/luma-connect-form.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/luma-connect-form.tsx
@@ -6,12 +6,6 @@ import type { FC } from "react";
import { useRef, useState } from "react";
import { useForm } from "react-hook-form";
import * as z from "zod";
-import {
- Accordion,
- AccordionContent,
- AccordionItem,
- AccordionTrigger,
-} from "@/components/ui/accordion";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import {
Form,
@@ -253,131 +247,6 @@ export const LumaConnectForm: FC = ({ onSubmit, isSubmitting }
)}
-
- {/* Documentation Section */}
-
-
-
- Documentation
-
-
-
-
How it works
-
- The Luma connector uses the Luma API to fetch all events that your API key has
- access to.
-
-
-
- For follow up indexing runs, the connector retrieves events that have been updated
- since the last indexing attempt.
-
-
- Indexing is configured to run periodically, so updates should appear in your
- search results within minutes.
-
-
-
-
-
-
-
Authorization
-
-
- API Key Required
-
- You need a Luma API key to use this connector. The key will be used to read your
- Luma events with read-only permissions.
-
-
-
-
-
-
- Step 1: Get Your API Key
-
-
- Log into your Luma account
- Navigate to your account settings
- Go to API settings or Developer settings
- Generate a new API key
- Copy the generated API key
-
- You can also visit{" "}
-
- Luma API Settings
- {" "}
- for more information.
-
-
-
-
-
-
- Step 2: Grant necessary access
-
-
- The API key will have access to all events that your user account can see.
- Make sure your account has appropriate permissions for the events you want to
- index.
-
-
-
- Data Privacy
-
- Only event details, descriptions, and attendee information will be indexed.
- Event attachments and linked files are not indexed by this connector.
-
-
-
-
-
-
-
-
-
-
Indexing
-
-
- Navigate to the Connector Dashboard and select the Luma {" "}
- Connector.
-
-
- Place your API Key in the form field.
-
-
- Click Connect to establish the connection.
-
- Once connected, your Luma events will be indexed automatically.
-
-
-
-
- What Gets Indexed
-
- The Luma connector indexes the following data:
-
- Event titles and descriptions
- Event details and metadata
- Attendee information
- Event dates and locations
-
-
-
-
-
-
-
-
);
};
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx
index 94839b03b..3c4b64090 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx
@@ -6,12 +6,6 @@ import type { FC } from "react";
import { useRef, useState } from "react";
import { useForm } from "react-hook-form";
import * as z from "zod";
-import {
- Accordion,
- AccordionContent,
- AccordionItem,
- AccordionTrigger,
-} from "@/components/ui/accordion";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import {
Form,
@@ -320,145 +314,6 @@ export const ObsidianConnectForm: FC = ({ onSubmit, isSubmitti
)}
-
- {/* Documentation Section */}
-
-
-
- Documentation
-
-
-
-
How it works
-
- The Obsidian connector scans your local Obsidian vault directory and indexes all
- Markdown files. It preserves your note structure and extracts metadata from YAML
- frontmatter.
-
-
-
- The connector parses frontmatter metadata (title, tags, aliases, dates, etc.)
-
- Wiki-style links ([[note]]) are extracted and preserved
- Inline tags (#tag) are recognized and indexed
- Content is chunked intelligently for optimal search results
-
- Subsequent indexing runs use content hashing to skip unchanged files for faster
- sync
-
-
-
-
-
-
-
Setup
-
-
-
- File System Access Required
-
-
- The SurfSense backend must have read access to your Obsidian vault directory.
- For Docker deployments, mount your vault as a volume.
-
-
-
-
-
-
- Step 1: Locate your vault
-
-
-
- macOS/Linux: Right-click any note in Obsidian → "Reveal in
- Finder" to see the vault folder
-
-
- Windows: Right-click any note → "Show in system explorer"
-
-
- Or: Click the vault switcher (bottom-left icon) → "Open
- folder" next to your vault name
-
-
-
-
-
-
- Step 2: Enter the path
-
-
- Running locally (no Docker): Use the direct path to your
- vault:
-
-
- {`/Users/yourname/Documents/MyObsidianVault`}
-
-
- Running in Docker: Mount your vault as a volume in
- docker-compose.yml:
-
-
- {`volumes:
- - /path/to/your/vault:/app/obsidian_vaults/my-vault:ro`}
-
-
- Then use /app/obsidian_vaults/my-vault as your vault path.
-
-
-
-
-
- Step 3: Configure exclusions
-
-
- Common folders to exclude:
-
-
-
- .obsidian - Obsidian config (always recommended)
-
-
- .trash - Obsidian's trash folder
-
-
- templates - If you have a templates folder
-
-
- daily-notes - If you want to exclude daily notes
-
-
-
-
-
-
-
-
-
-
What Gets Indexed
-
-
- Indexed Content
-
- The Obsidian connector indexes:
-
- All Markdown files (.md) in your vault
- YAML frontmatter metadata (title, tags, aliases, dates)
- Wiki-style links between notes
- Inline tags throughout your notes
- Full note content with proper chunking
-
-
-
-
-
-
-
-
);
};
diff --git a/surfsense_web/content/docs/connectors/airtable.mdx b/surfsense_web/content/docs/connectors/airtable.mdx
index 366a6e8e5..db7fe3ac0 100644
--- a/surfsense_web/content/docs/connectors/airtable.mdx
+++ b/surfsense_web/content/docs/connectors/airtable.mdx
@@ -36,20 +36,20 @@ After registration, configure the required scopes (permissions) for your integra
| Scope | Description |
|-------|-------------|
-| ✅ `data.recordComments:read` | See comments in records |
-| ✅ `data.records:read` | See the data in records |
+| `data.recordComments:read` | See comments in records |
+| `data.records:read` | See the data in records |
### Base schema
| Scope | Description |
|-------|-------------|
-| ✅ `schema.bases:read` | See the structure of a base, like table names or field types |
+| `schema.bases:read` | See the structure of a base, like table names or field types |
### User metadata
| Scope | Description |
|-------|-------------|
-| ✅ `user.email:read` | See the user's email address |
+| `user.email:read` | See the user's email address |

@@ -80,7 +80,9 @@ After saving, you'll find your OAuth credentials on the integration page:
1. Copy your **Client ID**
2. Copy your **Client Secret**
-> ⚠️ Never share your client secret publicly.
+
+ Never share your client secret publicly.
+
---
diff --git a/surfsense_web/content/docs/connectors/bookstack.mdx b/surfsense_web/content/docs/connectors/bookstack.mdx
index 8ee581948..f1f695e11 100644
--- a/surfsense_web/content/docs/connectors/bookstack.mdx
+++ b/surfsense_web/content/docs/connectors/bookstack.mdx
@@ -3,4 +3,70 @@ title: Bookstack
description: Connect your Bookstack instance to SurfSense
---
-# Documentation in progress
\ No newline at end of file
+# BookStack Integration Setup Guide
+
+This guide walks you through connecting your BookStack instance to SurfSense.
+
+## How it works
+
+The BookStack connector uses the BookStack REST API to fetch all pages from your BookStack instance that your account has access to.
+
+- For follow up indexing runs, the connector retrieves pages that have been updated since the last indexing attempt.
+- Indexing should be configured to run periodically, so updates should appear in your search results within minutes.
+
+---
+
+## Authorization
+
+
+You need to create an API token from your BookStack instance. The token requires "Access System API" permission.
+
+
+### Step 1: Create an API Token
+
+1. Log in to your BookStack instance
+2. Click on your profile icon → Edit Profile
+3. Navigate to the "API Tokens" tab
+4. Click "Create Token" and give it a name
+5. Copy both the **Token ID** and **Token Secret**
+6. Paste them in the connector form
+
+### Step 2: Grant necessary access
+
+Your user account must have "Access System API" permission. The connector will only index content your account can view.
+
+
+BookStack API has a rate limit of 180 requests per minute. The connector automatically handles rate limiting to ensure reliable indexing.
+
+
+
+Enable periodic sync to automatically re-index pages when content changes. Available frequencies: Every 5 minutes, 15 minutes, hourly, every 6 hours, daily, or weekly.
+
+
+---
+
+## Connecting to SurfSense
+
+1. Navigate to the Connector Dashboard and select the **BookStack** Connector.
+2. Fill in the required fields:
+
+| Field | Description | Example |
+|-------|-------------|---------|
+| **Connector Name** | A friendly name to identify this connector | `My BookStack Connector` |
+| **BookStack Base URL** | The full URL of your BookStack instance | `https://docs.example.com` |
+| **Token ID** | Your API Token ID from BookStack | |
+| **Token Secret** | Your API Token Secret from BookStack | |
+
+3. Click **Connect** to establish the connection.
+4. Once connected, your BookStack pages will be indexed automatically.
+
+### What Gets Indexed
+
+The BookStack connector indexes the following data:
+
+| Data Type | Description |
+|-----------|-------------|
+| Pages | All pages from your BookStack instance |
+| Content | Page content in Markdown format |
+| Metadata | Page titles and metadata |
+| Structure | Book and chapter hierarchy information |
diff --git a/surfsense_web/content/docs/connectors/circleback.mdx b/surfsense_web/content/docs/connectors/circleback.mdx
index a5c90a28f..709e35f45 100644
--- a/surfsense_web/content/docs/connectors/circleback.mdx
+++ b/surfsense_web/content/docs/connectors/circleback.mdx
@@ -1,8 +1,133 @@
---
title: Circleback
-description: Connect your circleback to SurfSense
+description: Connect your Circleback meetings to SurfSense
---
-# Documentation in progress
+# Circleback Integration Setup Guide
+This guide walks you through connecting your Circleback meetings to SurfSense for meeting search and AI-powered insights.
+## How it works
+
+The Circleback connector uses a **webhook-based integration**. Unlike other connectors that pull data from APIs, Circleback automatically pushes meeting data to SurfSense when meetings are processed.
+
+- Meetings are sent to SurfSense immediately after Circleback processes them
+- No API keys or credentials required
+- No periodic indexing needed - data arrives in real-time
+- Each meeting is stored with its notes, transcript, action items, and insights
+
+---
+
+## What Gets Indexed
+
+| Content Type | Description |
+|--------------|-------------|
+| Meeting Notes | AI-generated meeting notes in Markdown format |
+| Transcript | Full meeting transcript with speaker identification and timestamps |
+| Action Items | Tasks with assignees and status |
+| Attendees | Names and emails of meeting participants |
+| Insights | Custom insights extracted from the meeting |
+| Metadata | Date, duration, tags, meeting URL, and Circleback link |
+
+
+ Meeting recordings are not stored in SurfSense, but a link to the recording (valid for 24 hours from Circleback) is included in the indexed document.
+
+
+---
+
+## Setup Guide
+
+### Step 1: Create the Connector in SurfSense
+
+1. Navigate to **Connectors** → **Add Connector** → **Circleback**
+2. Enter a connector name (e.g., `My Circleback Meetings`)
+3. Click **Connect** to create the connector
+
+
+Circleback uses webhooks, so no API key or authentication is required. The webhook URL is unique to your search space.
+
+
+### Step 2: Copy Your Webhook URL
+
+After creating the connector:
+
+1. Open the connector settings
+2. Find the **Webhook URL** field
+3. Click **Copy** to copy the URL to your clipboard
+
+The webhook URL looks like:
+```
+https://your-surfsense-url/api/v1/webhooks/circleback/{search_space_id}
+```
+
+### Step 3: Configure Circleback Automation
+
+1. Log into your [Circleback account](https://app.circleback.ai)
+2. Go to **Automations** (in the main navigation)
+3. Click **Create automation**
+4. Add conditions to filter which meetings trigger the webhook (optional):
+ - Specific calendar(s)
+ - Meeting tags
+ - Meeting participants
+5. Select **Send webhook request** as the action
+6. Enter your SurfSense webhook URL
+7. Select the meeting outcomes to include (notes, action items, transcript, insights)
+8. Optionally test with your most recent meeting
+9. Name and save the automation
+
+For detailed instructions, see [Circleback's official webhook documentation](https://circleback.ai/docs/webhook-integration).
+
+
+ Make sure your SurfSense backend is accessible from the internet for Circleback to send webhooks. If self-hosting, you may need to configure a reverse proxy or use a tunneling service.
+
+
+---
+
+## Connector Configuration
+
+| Field | Description | Required |
+|-------|-------------|----------|
+| **Connector Name** | A friendly name to identify this connector | Yes |
+
+
+ Unlike other connectors, Circleback doesn't support periodic indexing since it's webhook-based. Meetings are automatically pushed to SurfSense when processed by Circleback.
+
+
+---
+
+## Verifying the Integration
+
+Once configured, new meetings will automatically appear in SurfSense after Circleback processes them. To verify:
+
+1. Attend or process a meeting with Circleback
+2. Wait for Circleback to complete processing (usually a few minutes after the meeting ends)
+3. Check your SurfSense search space for the new meeting document
+
+Each meeting document includes:
+- A direct link to view the meeting on Circleback
+- Full searchable transcript
+- Extracted action items with assignees
+- Meeting metadata and insights
+
+---
+
+## Troubleshooting
+
+**Meetings not appearing**
+- Verify the webhook URL is correctly configured in Circleback
+- Check that your SurfSense backend is accessible from the internet
+- Look for errors in your SurfSense backend logs
+
+**Webhook URL not showing**
+- Refresh the connector configuration page
+- Ensure the connector was created successfully
+- Check that your backend is running
+
+**Duplicate meetings**
+- Each meeting is uniquely identified by its Circleback meeting ID
+- If content changes, the existing document is updated rather than duplicated
+
+**Delayed indexing**
+- Meeting data is processed asynchronously
+- Documents should appear within a minute of receiving the webhook
+- Check the task queue if experiencing longer delays
diff --git a/surfsense_web/content/docs/connectors/clickup.mdx b/surfsense_web/content/docs/connectors/clickup.mdx
index 1b732c968..960b88370 100644
--- a/surfsense_web/content/docs/connectors/clickup.mdx
+++ b/surfsense_web/content/docs/connectors/clickup.mdx
@@ -36,7 +36,9 @@ After creating the app, you'll see your credentials:
1. Copy your **Client ID**
2. Copy your **Client Secret** (click "Show" to reveal, or "Regenerate" if needed)
-> ⚠️ Never share your client secret publicly.
+
+ Never share your client secret publicly.
+
---
diff --git a/surfsense_web/content/docs/connectors/confluence.mdx b/surfsense_web/content/docs/connectors/confluence.mdx
index fad9f3e3d..57116cf29 100644
--- a/surfsense_web/content/docs/connectors/confluence.mdx
+++ b/surfsense_web/content/docs/connectors/confluence.mdx
@@ -28,7 +28,9 @@ This guide walks you through setting up an Atlassian OAuth 2.0 (3LO) integration
2. Check the box to agree to Atlassian's developer terms
3. Click **"Create"**
-> ℹ️ New OAuth 2.0 integrations use rotating refresh tokens, which improve security by limiting token validity and enabling automatic detection of token reuse.
+
+ New OAuth 2.0 integrations use rotating refresh tokens, which improve security by limiting token validity and enabling automatic detection of token reuse.
+

@@ -41,7 +43,9 @@ This guide walks you through setting up an Atlassian OAuth 2.0 (3LO) integration
```
3. Click **"Save changes"**
-> ℹ️ You can enter up to 10 redirect URIs, one per line.
+
+ You can enter up to 10 redirect URIs, one per line.
+

@@ -62,7 +66,7 @@ Select the **"Classic scopes"** tab and enable:
| Scope Name | Code | Description |
|------------|------|-------------|
-| ✅ Read user | `read:confluence-user` | View user information in Confluence that you have access to, including usernames, email addresses and profile pictures |
+| Read user | `read:confluence-user` | View user information in Confluence that you have access to, including usernames, email addresses and profile pictures |

@@ -72,9 +76,9 @@ Select the **"Granular scopes"** tab and enable:
| Scope Name | Code | Description |
|------------|------|-------------|
-| ✅ View pages | `read:page:confluence` | View page content |
-| ✅ View comments | `read:comment:confluence` | View comments on pages or blogposts |
-| ✅ View spaces | `read:space:confluence` | View space details |
+| View pages | `read:page:confluence` | View page content |
+| View comments | `read:comment:confluence` | View comments on pages or blogposts |
+| View spaces | `read:space:confluence` | View space details |
4. Click **"Save"**
@@ -85,7 +89,9 @@ Select the **"Granular scopes"** tab and enable:
1. In the left sidebar, click **"Settings"**
2. Copy your **Client ID** and **Client Secret**
-> ⚠️ Never share your client secret publicly.
+
+ Never share your client secret publicly.
+
---
diff --git a/surfsense_web/content/docs/connectors/discord.mdx b/surfsense_web/content/docs/connectors/discord.mdx
index 6bb64e7e7..a90bcfe87 100644
--- a/surfsense_web/content/docs/connectors/discord.mdx
+++ b/surfsense_web/content/docs/connectors/discord.mdx
@@ -38,7 +38,9 @@ You'll also see your **Application ID** and **Public Key** on this page.
http://localhost:8000/api/v1/auth/discord/connector/callback
```
-> ⚠️ Keep **Public Client** disabled (off) since SurfSense uses a server to make requests.
+
+ Keep **Public Client** disabled (off) since SurfSense uses a server to make requests.
+

@@ -46,13 +48,15 @@ You'll also see your **Application ID** and **Public Key** on this page.
1. In the left sidebar, click **"Bot"**
2. Configure the **Authorization Flow**:
- - ✅ **Public Bot** - Enable to allow anyone to add the bot to servers
+ - **Public Bot** - Enable to allow anyone to add the bot to servers
3. Enable **Privileged Gateway Intents**:
- - ✅ **Server Members Intent** - Required to receive GUILD_MEMBERS events
- - ✅ **Message Content Intent** - Required to receive message content
+ - **Server Members Intent** - Required to receive GUILD_MEMBERS events
+ - **Message Content Intent** - Required to receive message content
-> ⚠️ Once your bot reaches 100+ servers, these intents will require verification and approval.
+
+ Once your bot reaches 100+ servers, these intents will require verification and approval.
+

diff --git a/surfsense_web/content/docs/connectors/elasticsearch.mdx b/surfsense_web/content/docs/connectors/elasticsearch.mdx
index ac43cca4e..f2615a9c6 100644
--- a/surfsense_web/content/docs/connectors/elasticsearch.mdx
+++ b/surfsense_web/content/docs/connectors/elasticsearch.mdx
@@ -3,4 +3,115 @@ title: Elasticsearch
description: Connect your Elasticsearch cluster to SurfSense
---
-# Documentation in progress
\ No newline at end of file
+# Elasticsearch Integration Setup Guide
+
+This guide walks you through connecting your Elasticsearch cluster to SurfSense.
+
+## How it works
+
+The Elasticsearch connector allows you to search and retrieve documents from your Elasticsearch cluster. Configure connection details, select specific indices, and set search parameters to make your existing data searchable within SurfSense.
+
+- For follow-up indexing runs, the connector retrieves documents that have been updated since the last indexing attempt.
+- Indexing should be configured to run periodically, so updates should appear in your search results within minutes.
+
+---
+
+## Authorization
+
+
+Elasticsearch requires authentication. You can use either an API key or username/password authentication.
+
+
+### Step 1: Get Your Elasticsearch Endpoint
+
+You'll need the endpoint URL for your Elasticsearch cluster. This typically looks like:
+
+- **Cloud:** `https://your-cluster.es.region.aws.com:443`
+- **Self-hosted:** `https://elasticsearch.example.com:9200`
+
+### Step 2: Configure Authentication
+
+Elasticsearch requires authentication. You can use either:
+
+**API Key:** A base64-encoded API key. You can create one in Elasticsearch by running:
+
+```bash
+POST /_security/api_key
+```
+
+**Username & Password:** Basic authentication using your Elasticsearch username and password.
+
+### Step 3: Select Indices
+
+Specify which indices to search. You can:
+
+- Use wildcards: `logs-*` to match multiple indices
+- List specific indices: `logs-2024, documents-2024`
+- Leave empty to search all accessible indices (not recommended for performance)
+
+
+Enable periodic sync to automatically re-index documents when content changes. Available frequencies: Every 5 minutes, 15 minutes, hourly, every 6 hours, daily, or weekly.
+
+
+---
+
+## Connecting to SurfSense
+
+1. Navigate to the Connector Dashboard and select the **Elasticsearch** Connector.
+2. Fill in the required fields:
+
+| Field | Description | Example |
+|-------|-------------|---------|
+| **Connector Name** | A friendly name to identify this connector | `My Elasticsearch Connector` |
+| **Elasticsearch Endpoint URL** | The full URL of your Elasticsearch cluster | `https://your-cluster.es.region.aws.com:443` |
+| **API Key** | Your base64-encoded API key (if using API key auth) | |
+| **Username** | Your Elasticsearch username (if using basic auth) | |
+| **Password** | Your Elasticsearch password (if using basic auth) | |
+| **Indices** | Comma-separated list of indices to search | `logs-*, documents-2024` |
+
+3. Click **Connect** to establish the connection.
+4. Once connected, your Elasticsearch documents will be indexed automatically.
+
+---
+
+## Advanced Configuration
+
+### Search Query
+
+The default query used for searches. Use `*` to match all documents, or specify a more complex Elasticsearch query.
+
+### Search Fields
+
+Limit searches to specific fields for better performance. Common fields include:
+
+- `title` - Document titles
+- `content` - Main content
+- `description` - Descriptions
+
+Leave empty to search all fields in your documents.
+
+### Maximum Documents
+
+Set a limit on the number of documents retrieved per search (1-10,000). This helps control response times and resource usage. Leave empty to use Elasticsearch's default limit.
+
+---
+
+## Troubleshooting
+
+### Connection Issues
+
+- **Invalid URL:** Ensure your endpoint URL includes the protocol (https://) and port number if required.
+- **SSL/TLS Errors:** Verify that your cluster uses HTTPS and the certificate is valid. Self-signed certificates may require additional configuration.
+- **Connection Timeout:** Check your network connectivity and firewall settings. Ensure the Elasticsearch cluster is accessible from SurfSense servers.
+
+### Authentication Issues
+
+- **Invalid Credentials:** Double-check your username/password or API key. API keys must be base64-encoded.
+- **Permission Denied:** Ensure your API key or user account has read permissions for the indices you want to search.
+- **API Key Format:** Elasticsearch API keys are typically base64-encoded strings. Make sure you're using the full key value.
+
+### Search Issues
+
+- **No Results:** Verify that your index selection matches existing indices. Use wildcards carefully.
+- **Slow Searches:** Limit the number of indices or use specific index names instead of wildcards. Reduce the maximum documents limit.
+- **Field Not Found:** Ensure the search fields you specify actually exist in your Elasticsearch documents.
diff --git a/surfsense_web/content/docs/connectors/github.mdx b/surfsense_web/content/docs/connectors/github.mdx
index 6a4574ec4..ce60d2a32 100644
--- a/surfsense_web/content/docs/connectors/github.mdx
+++ b/surfsense_web/content/docs/connectors/github.mdx
@@ -3,9 +3,18 @@ title: GitHub
description: Connect your GitHub repositories to SurfSense
---
-# GitHub Connector
+# GitHub Integration Setup Guide
-Connect your GitHub repositories to SurfSense for code search and AI-powered insights. The connector uses [gitingest](https://gitingest.com) to efficiently index entire codebases.
+This guide walks you through connecting your GitHub repositories to SurfSense for code search and AI-powered insights.
+
+## How it works
+
+The GitHub connector uses [gitingest](https://gitingest.com) to fetch and process repository contents from GitHub.
+
+- For follow-up indexing runs, the connector retrieves the latest repository state and updates changed files.
+- Indexing should be configured to run periodically, so updates should appear in your search results within minutes.
+
+---
## What Gets Indexed
@@ -15,7 +24,9 @@ Connect your GitHub repositories to SurfSense for code search and AI-powered ins
| Documentation | README files, Markdown documents, text files |
| Configuration | JSON, YAML, TOML, .env examples, Dockerfiles |
-> ⚠️ Binary files and files larger than 5MB are automatically excluded.
+
+ Binary files and files larger than 5MB are automatically excluded.
+
---
@@ -39,7 +50,13 @@ For private repos, you need a GitHub Personal Access Token (PAT).
2. Set an expiration
3. Click **Generate token** and copy it
-> ⚠️ The token starts with `ghp_`. Store it securely.
+
+ The token starts with `ghp_`. Store it securely.
+
+
+
+Enable periodic sync to automatically re-index repositories when content changes. Available frequencies: Every 5 minutes, 15 minutes, hourly, every 6 hours, daily, or weekly.
+
---
@@ -53,21 +70,6 @@ For private repos, you need a GitHub Personal Access Token (PAT).
---
-## Periodic Sync
-
-Enable periodic sync to automatically re-index repositories when content changes:
-
-| Frequency | Use Case |
-|-----------|----------|
-| Every 5 minutes | Active development |
-| Every 15 minutes | Frequent commits |
-| Every hour | Regular workflow |
-| Every 6 hours | Less active repos |
-| Daily | Reference repositories |
-| Weekly | Stable codebases |
-
----
-
## Troubleshooting
**Repository not found**
diff --git a/surfsense_web/content/docs/connectors/gmail.mdx b/surfsense_web/content/docs/connectors/gmail.mdx
index 434e6ae4d..2b514f89e 100644
--- a/surfsense_web/content/docs/connectors/gmail.mdx
+++ b/surfsense_web/content/docs/connectors/gmail.mdx
@@ -60,7 +60,9 @@ This guide walks you through setting up a Google OAuth 2.0 integration for SurfS
1. After creating the OAuth client, you'll see a dialog with your credentials
2. Copy your **Client ID** and **Client Secret**
-> ⚠️ Never share your client secret publicly.
+
+ Never share your client secret publicly.
+

diff --git a/surfsense_web/content/docs/connectors/google-calendar.mdx b/surfsense_web/content/docs/connectors/google-calendar.mdx
index cc1eae545..7919d0361 100644
--- a/surfsense_web/content/docs/connectors/google-calendar.mdx
+++ b/surfsense_web/content/docs/connectors/google-calendar.mdx
@@ -59,7 +59,9 @@ This guide walks you through setting up a Google OAuth 2.0 integration for SurfS
1. After creating the OAuth client, you'll see a dialog with your credentials
2. Copy your **Client ID** and **Client Secret**
-> ⚠️ Never share your client secret publicly.
+
+ Never share your client secret publicly.
+

diff --git a/surfsense_web/content/docs/connectors/google-drive.mdx b/surfsense_web/content/docs/connectors/google-drive.mdx
index 00ea2f610..402b25566 100644
--- a/surfsense_web/content/docs/connectors/google-drive.mdx
+++ b/surfsense_web/content/docs/connectors/google-drive.mdx
@@ -60,7 +60,9 @@ This guide walks you through setting up a Google OAuth 2.0 integration for SurfS
1. After creating the OAuth client, you'll see a dialog with your credentials
2. Copy your **Client ID** and **Client Secret**
-> ⚠️ Never share your client secret publicly.
+
+ Never share your client secret publicly.
+

diff --git a/surfsense_web/content/docs/connectors/jira.mdx b/surfsense_web/content/docs/connectors/jira.mdx
index ebe639d6d..c6b5a26e1 100644
--- a/surfsense_web/content/docs/connectors/jira.mdx
+++ b/surfsense_web/content/docs/connectors/jira.mdx
@@ -28,7 +28,9 @@ This guide walks you through setting up an Atlassian OAuth 2.0 (3LO) integration
2. Check the box to agree to Atlassian's developer terms
3. Click **"Create"**
-> ℹ️ New OAuth 2.0 integrations use rotating refresh tokens, which improve security by limiting token validity and enabling automatic detection of token reuse.
+
+ New OAuth 2.0 integrations use rotating refresh tokens, which improve security by limiting token validity and enabling automatic detection of token reuse.
+

@@ -41,7 +43,9 @@ This guide walks you through setting up an Atlassian OAuth 2.0 (3LO) integration
```
3. Click **"Save changes"**
-> ℹ️ You can enter up to 10 redirect URIs, one per line.
+
+ You can enter up to 10 redirect URIs, one per line.
+

@@ -60,8 +64,8 @@ This guide walks you through setting up an Atlassian OAuth 2.0 (3LO) integration
| Scope Name | Code | Description |
|------------|------|-------------|
-| ✅ View Jira issue data | `read:jira-work` | Read Jira project and issue data, search for issues, and objects associated with issues like attachments and worklogs |
-| ✅ View user profiles | `read:jira-user` | View user information in Jira that the user has access to, including usernames, email addresses, and avatars |
+| View Jira issue data | `read:jira-work` | Read Jira project and issue data, search for issues, and objects associated with issues like attachments and worklogs |
+| View user profiles | `read:jira-user` | View user information in Jira that the user has access to, including usernames, email addresses, and avatars |
4. Click **"Save"**
@@ -72,7 +76,9 @@ This guide walks you through setting up an Atlassian OAuth 2.0 (3LO) integration
1. In the left sidebar, click **"Settings"**
2. Copy your **Client ID** and **Client Secret**
-> ⚠️ Never share your client secret publicly.
+
+ Never share your client secret publicly.
+
---
diff --git a/surfsense_web/content/docs/connectors/linear.mdx b/surfsense_web/content/docs/connectors/linear.mdx
index f9dc9a62b..5fb7bc8c5 100644
--- a/surfsense_web/content/docs/connectors/linear.mdx
+++ b/surfsense_web/content/docs/connectors/linear.mdx
@@ -30,7 +30,7 @@ Fill in the application details:
### Settings
-- ✅ **Public** - Enable this to allow the application to be installed by other workspaces
+- **Public** - Enable this to allow the application to be installed by other workspaces
Click **Create** to create the application.
@@ -43,7 +43,9 @@ After creating the application, you'll see your OAuth credentials:
1. Copy your **Client ID**
2. Copy your **Client Secret**
-> ⚠️ Never share your client secret publicly.
+
+ Never share your client secret publicly.
+

diff --git a/surfsense_web/content/docs/connectors/luma.mdx b/surfsense_web/content/docs/connectors/luma.mdx
index e16e5a949..a218be46b 100644
--- a/surfsense_web/content/docs/connectors/luma.mdx
+++ b/surfsense_web/content/docs/connectors/luma.mdx
@@ -3,4 +3,68 @@ title: Luma
description: Connect your Luma events to SurfSense
---
-# Documentation in progress
\ No newline at end of file
+# Luma Integration Setup Guide
+
+This guide walks you through connecting your Luma events to SurfSense for event search and AI-powered insights.
+
+## How it works
+
+The Luma connector uses the Luma API to fetch all events that your API key has access to.
+
+- For follow-up indexing runs, the connector retrieves events that have been updated since the last indexing attempt.
+- Indexing should be configured to run periodically, so updates should appear in your search results within minutes.
+
+---
+
+## Authorization
+
+
+You need a Luma API key to use this connector. The key will be used to read your Luma events with read-only permissions.
+
+
+### Step 1: Get Your API Key
+
+1. Log into your Luma account
+2. Navigate to your account settings
+3. Go to API settings or Developer settings
+4. Generate a new API key
+5. Copy the generated API key
+
+You can also visit [Luma API Docs](https://docs.luma.com/reference/getting-started-with-your-api) for more information.
+
+### Step 2: Grant Necessary Access
+
+The API key will have access to all events that your user account can see. Make sure your account has appropriate permissions for the events you want to index.
+
+
+Enable periodic sync to automatically re-index events when content changes. Available frequencies: Every 5 minutes, 15 minutes, hourly, every 6 hours, daily, or weekly.
+
+
+---
+
+## Connecting to SurfSense
+
+1. Navigate to the Connector Dashboard and select the **Luma** Connector.
+2. Fill in the required fields:
+
+| Field | Description | Example |
+|-------|-------------|---------|
+| **Connector Name** | A friendly name to identify this connector | `My Luma Connector` |
+| **Luma API Key** | Your Luma API key (will be encrypted and stored securely) | |
+
+3. Click **Connect** to establish the connection.
+4. Once connected, your Luma events will be indexed automatically.
+
+### What Gets Indexed
+
+The Luma connector indexes the following data:
+
+| Data Type | Description |
+|-----------|-------------|
+| Event Details | Titles, descriptions, metadata |
+| Attendee Info | Attendee information and lists |
+| Event Metadata | Dates, locations, settings |
+
+
+ Event attachments and linked files are not indexed by this connector.
+
diff --git a/surfsense_web/content/docs/connectors/meta.json b/surfsense_web/content/docs/connectors/meta.json
index b41e92ab9..c6f7ab383 100644
--- a/surfsense_web/content/docs/connectors/meta.json
+++ b/surfsense_web/content/docs/connectors/meta.json
@@ -17,7 +17,8 @@
"luma",
"circleback",
"elasticsearch",
- "bookstack"
+ "bookstack",
+ "obsidian"
],
"defaultOpen": false
}
diff --git a/surfsense_web/content/docs/connectors/microsoft-teams.mdx b/surfsense_web/content/docs/connectors/microsoft-teams.mdx
index daa6eb375..53f36c249 100644
--- a/surfsense_web/content/docs/connectors/microsoft-teams.mdx
+++ b/surfsense_web/content/docs/connectors/microsoft-teams.mdx
@@ -58,7 +58,9 @@ After registration, you'll be taken to the app's **Overview** page. Here you'll

-> ⚠️ Never share your client secret publicly or include it in code repositories.
+
+ Never share your client secret publicly or include it in code repositories.
+
## Step 6: Configure API Permissions
@@ -78,7 +80,9 @@ After registration, you'll be taken to the app's **Overview** page. Here you'll
6. Click **"Add permissions"**
-> ⚠️ The `ChannelMessage.Read.All` permission requires admin consent. An admin will need to click **"Grant admin consent for [Directory]"** for full functionality.
+
+ The `ChannelMessage.Read.All` permission requires admin consent. An admin will need to click **"Grant admin consent for [Directory]"** for full functionality.
+

diff --git a/surfsense_web/content/docs/connectors/notion.mdx b/surfsense_web/content/docs/connectors/notion.mdx
index 936972f7e..0612c4f4f 100644
--- a/surfsense_web/content/docs/connectors/notion.mdx
+++ b/surfsense_web/content/docs/connectors/notion.mdx
@@ -52,14 +52,13 @@ After creating the integration, you'll see the configuration page with your cred
### Set Required Capabilities
-Under **Content Capabilities**, enable:
-- ✅ Read content
+Enable the following capabilities:
-Under **Comment Capabilities**, enable:
-- ✅ Read comments
-
-Under **User Capabilities**, select:
-- 🔘 Read user information including email addresses
+| Capability Type | Required Setting |
+|----------------|------------------|
+| **Content Capabilities** | Read content |
+| **Comment Capabilities** | Read comments |
+| **User Capabilities** | Read user information including email addresses |
Click **Save** to apply the capabilities.
diff --git a/surfsense_web/content/docs/connectors/obsidian.mdx b/surfsense_web/content/docs/connectors/obsidian.mdx
new file mode 100644
index 000000000..c8475c97f
--- /dev/null
+++ b/surfsense_web/content/docs/connectors/obsidian.mdx
@@ -0,0 +1,143 @@
+---
+title: Obsidian
+description: Connect your Obsidian vault to SurfSense
+---
+
+# Obsidian Integration Setup Guide
+
+This guide walks you through connecting your Obsidian vault to SurfSense for note search and AI-powered insights.
+
+
+ This connector requires direct file system access and only works with self-hosted SurfSense installations.
+
+
+## How it works
+
+The Obsidian connector scans your local Obsidian vault directory and indexes all Markdown files. It preserves your note structure and extracts metadata from YAML frontmatter.
+
+- For follow-up indexing runs, the connector uses content hashing to skip unchanged files for faster sync.
+- Indexing should be configured to run periodically, so updates should appear in your search results within minutes.
+
+---
+
+## What Gets Indexed
+
+| Content Type | Description |
+|--------------|-------------|
+| Markdown Files | All `.md` files in your vault |
+| Frontmatter | YAML metadata (title, tags, aliases, dates) |
+| Wiki Links | Links between notes (`[[note]]`) |
+| Inline Tags | Tags throughout your notes (`#tag`) |
+| Note Content | Full content with intelligent chunking |
+
+
+ Binary files and attachments are not indexed by default. Enable "Include Attachments" to index embedded files.
+
+
+---
+
+## Quick Start (Local Installation)
+
+1. Navigate to **Connectors** → **Add Connector** → **Obsidian**
+2. Enter your vault path: `/Users/yourname/Documents/MyVault`
+3. Enter a vault name (e.g., `Personal Notes`)
+4. Click **Connect Obsidian**
+
+
+ Find your vault path: In Obsidian, right-click any note → "Reveal in Finder" (macOS) or "Show in Explorer" (Windows).
+
+
+
+Enable periodic sync to automatically re-index notes when content changes. Available frequencies: Every 5 minutes, 15 minutes, hourly, every 6 hours, daily, or weekly.
+
+
+---
+
+## Docker Setup
+
+For Docker deployments, you need to mount your Obsidian vault as a volume.
+
+### Step 1: Update docker-compose.yml
+
+Add your vault as a volume mount to the SurfSense backend service:
+
+```yaml
+services:
+ surfsense:
+ # ... other config
+ volumes:
+ - /path/to/your/obsidian/vault:/app/obsidian_vaults/my-vault:ro
+```
+
+
+ The `:ro` flag mounts the vault as read-only, which is recommended for security.
+
+
+### Step 2: Configure the Connector
+
+Use the **container path** (not your local path) when setting up the connector:
+
+| Your Local Path | Container Path (use this) |
+|-----------------|---------------------------|
+| `/Users/john/Documents/MyVault` | `/app/obsidian_vaults/my-vault` |
+| `C:\Users\john\Documents\MyVault` | `/app/obsidian_vaults/my-vault` |
+
+### Example: Multiple Vaults
+
+```yaml
+volumes:
+ - /Users/john/Documents/PersonalNotes:/app/obsidian_vaults/personal:ro
+ - /Users/john/Documents/WorkNotes:/app/obsidian_vaults/work:ro
+```
+
+Then create separate connectors for each vault using `/app/obsidian_vaults/personal` and `/app/obsidian_vaults/work`.
+
+---
+
+## Connector Configuration
+
+| Field | Description | Required |
+|-------|-------------|----------|
+| **Connector Name** | A friendly name to identify this connector | Yes |
+| **Vault Path** | Absolute path to your vault (container path for Docker) | Yes |
+| **Vault Name** | Display name for your vault in search results | Yes |
+| **Exclude Folders** | Comma-separated folder names to skip | No |
+| **Include Attachments** | Index embedded files (images, PDFs) | No |
+
+---
+
+## Recommended Exclusions
+
+Common folders to exclude from indexing:
+
+| Folder | Reason |
+|--------|--------|
+| `.obsidian` | Obsidian config files (always exclude) |
+| `.trash` | Obsidian's trash folder |
+| `templates` | Template files you don't want searchable |
+| `daily-notes` | If you want to exclude daily notes |
+| `attachments` | If not using "Include Attachments" |
+
+Default exclusions: `.obsidian,.trash`
+
+---
+
+## Troubleshooting
+
+**Vault not found / Permission denied**
+- Verify the path exists and is accessible
+- For Docker: ensure the volume is mounted correctly in `docker-compose.yml`
+- Check file permissions: SurfSense needs read access to the vault directory
+
+**No notes indexed**
+- Ensure your vault contains `.md` files
+- Check that notes aren't in excluded folders
+- Verify the path points to the vault root (contains `.obsidian` folder)
+
+**Changes not appearing**
+- Wait for the next sync cycle, or manually trigger re-indexing
+- For Docker: restart the container if you modified volume mounts
+
+**Docker: "path not found" error**
+- Use the container path (`/app/obsidian_vaults/...`), not your local path
+- Verify the volume mount in `docker-compose.yml` matches
diff --git a/surfsense_web/content/docs/connectors/slack.mdx b/surfsense_web/content/docs/connectors/slack.mdx
index ccabe6f9e..072b83343 100644
--- a/surfsense_web/content/docs/connectors/slack.mdx
+++ b/surfsense_web/content/docs/connectors/slack.mdx
@@ -21,7 +21,9 @@ This guide walks you through setting up a Slack OAuth integration for SurfSense.
2. Select the workspace to develop your app in
3. Click **"Create App"**
-> ⚠️ You won't be able to change the workspace later. The workspace will control the app even if you leave it.
+
+ You won't be able to change the workspace later. The workspace will control the app even if you leave it.
+

@@ -32,7 +34,9 @@ After creating the app, you'll be taken to the **Basic Information** page. Here
1. Copy your **Client ID**
2. Copy your **Client Secret** (click Show to reveal)
-> ⚠️ Never share your app credentials publicly.
+
+ Never share your app credentials publicly.
+

From 3368a65b0c5ab714e7673128bdb4782d31734b63 Mon Sep 17 00:00:00 2001
From: Rohan Verma <122026167+MODSetter@users.noreply.github.com>
Date: Sat, 24 Jan 2026 16:11:27 -0800
Subject: [PATCH 33/51] Change video link in README
Updated video link in README.
---
README.md | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 0c5f06029..4dd368c04 100644
--- a/README.md
+++ b/README.md
@@ -29,8 +29,7 @@ SurfSense is a highly customizable AI research agent, connected to external sour
# Video
-https://github.com/user-attachments/assets/42a29ea1-d4d8-4213-9c69-972b5b806d58
-
+https://github.com/user-attachments/assets/cc0c84d3-1f2f-4f7a-b519-2ecce22310b1
## Podcast Sample
From 20efc63f3003971a0db6c62c1c34cfdbf756cc3c Mon Sep 17 00:00:00 2001
From: "DESKTOP-RTLN3BA\\$punk"
Date: Sat, 24 Jan 2026 17:42:44 -0800
Subject: [PATCH 34/51] feat: implement dynamic connector and document type
discovery for knowledge base tool
- Added functionality to dynamically discover available connectors and document types for the knowledge base tool, enhancing its flexibility and usability.
- Introduced new mapping functions and updated existing search methods to accommodate Composio connectors, improving integration with external services.
- Enhanced error handling and logging for connector discovery processes, ensuring better feedback during failures.
---
.../app/agents/new_chat/chat_deepagent.py | 112 ++++++
.../app/agents/new_chat/tools/__init__.py | 4 +-
.../agents/new_chat/tools/knowledge_base.py | 306 +++++++++++----
.../app/agents/new_chat/tools/registry.py | 5 +
.../app/services/connector_service.py | 347 ++++++++++++++++++
5 files changed, 708 insertions(+), 66 deletions(-)
diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
index 5bc6ac2e2..53e1b14bd 100644
--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@@ -7,6 +7,7 @@ via NewLLMConfig.
"""
from collections.abc import Sequence
+from typing import Any
from deepagents import create_deep_agent
from langchain_core.tools import BaseTool
@@ -23,6 +24,90 @@ from app.agents.new_chat.system_prompt import (
from app.agents.new_chat.tools.registry import build_tools_async
from app.services.connector_service import ConnectorService
+# =============================================================================
+# Connector Type Mapping
+# =============================================================================
+
+# Maps SearchSourceConnectorType enum values to the searchable document/connector types
+# used by the knowledge_base tool. Some connectors map to different document types.
+_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
+ # Direct mappings (connector type == searchable type)
+ "TAVILY_API": "TAVILY_API",
+ "SEARXNG_API": "SEARXNG_API",
+ "LINKUP_API": "LINKUP_API",
+ "BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
+ "SLACK_CONNECTOR": "SLACK_CONNECTOR",
+ "TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
+ "NOTION_CONNECTOR": "NOTION_CONNECTOR",
+ "GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
+ "LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
+ "DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
+ "JIRA_CONNECTOR": "JIRA_CONNECTOR",
+ "CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
+ "CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
+ "GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
+ "GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
+ "GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type
+ "AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
+ "LUMA_CONNECTOR": "LUMA_CONNECTOR",
+ "ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
+ "WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type
+ "BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
+ "CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type
+ "OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
+ # Composio connectors
+ "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ "COMPOSIO_GMAIL_CONNECTOR": "COMPOSIO_GMAIL_CONNECTOR",
+ "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+}
+
+# Document types that don't come from SearchSourceConnector but should always be searchable
+_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
+ "EXTENSION", # Browser extension data
+ "FILE", # Uploaded files
+ "NOTE", # User notes
+ "YOUTUBE_VIDEO", # YouTube videos
+]
+
+
+def _map_connectors_to_searchable_types(
+ connector_types: list[Any],
+) -> list[str]:
+ """
+ Map SearchSourceConnectorType enums to searchable document/connector types.
+
+ This function:
+ 1. Converts connector type enums to their searchable counterparts
+ 2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
+ 3. Deduplicates while preserving order
+
+ Args:
+ connector_types: List of SearchSourceConnectorType enum values
+
+ Returns:
+ List of searchable connector/document type strings
+ """
+ result_set: set[str] = set()
+ result_list: list[str] = []
+
+ # Add always-available document types first
+ for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
+ if doc_type not in result_set:
+ result_set.add(doc_type)
+ result_list.append(doc_type)
+
+ # Map each connector type to its searchable equivalent
+ for ct in connector_types:
+ # Handle both enum and string types
+ ct_str = ct.value if hasattr(ct, "value") else str(ct)
+ searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
+ if searchable and searchable not in result_set:
+ result_set.add(searchable)
+ result_list.append(searchable)
+
+ return result_list
+
+
# =============================================================================
# Deep Agent Factory
# =============================================================================
@@ -116,6 +201,30 @@ async def create_surfsense_deep_agent(
additional_tools=[my_custom_tool]
)
"""
+ # Discover available connectors and document types for this search space
+ # This enables dynamic tool docstrings that inform the LLM about what's actually available
+ available_connectors: list[str] | None = None
+ available_document_types: list[str] | None = None
+
+ try:
+ # Get enabled search source connectors for this search space
+ connector_types = await connector_service.get_available_connectors(
+ search_space_id
+ )
+ if connector_types:
+ # Convert enum values to strings and also include mapped document types
+ available_connectors = _map_connectors_to_searchable_types(connector_types)
+
+ # Get document types that have at least one document indexed
+ available_document_types = await connector_service.get_available_document_types(
+ search_space_id
+ )
+ except Exception as e:
+ # Log but don't fail - fall back to all connectors if discovery fails
+ import logging
+
+ logging.warning(f"Failed to discover available connectors/document types: {e}")
+
# Build dependencies dict for the tools registry
dependencies = {
"search_space_id": search_space_id,
@@ -123,6 +232,9 @@ async def create_surfsense_deep_agent(
"connector_service": connector_service,
"firecrawl_api_key": firecrawl_api_key,
"user_id": user_id, # Required for memory tools
+ # Dynamic connector/document type discovery for knowledge base tool
+ "available_connectors": available_connectors,
+ "available_document_types": available_document_types,
}
# Build tools using the async registry (includes MCP tools)
diff --git a/surfsense_backend/app/agents/new_chat/tools/__init__.py b/surfsense_backend/app/agents/new_chat/tools/__init__.py
index acbdbcb3a..9e1a4f19c 100644
--- a/surfsense_backend/app/agents/new_chat/tools/__init__.py
+++ b/surfsense_backend/app/agents/new_chat/tools/__init__.py
@@ -19,6 +19,7 @@ Available tools:
# Tool factory exports (for direct use)
from .display_image import create_display_image_tool
from .knowledge_base import (
+ CONNECTOR_DESCRIPTIONS,
create_search_knowledge_base_tool,
format_documents_for_context,
search_knowledge_base_async,
@@ -40,6 +41,8 @@ from .user_memory import create_recall_memory_tool, create_save_memory_tool
__all__ = [
# Registry
"BUILTIN_TOOLS",
+ # Knowledge base utilities
+ "CONNECTOR_DESCRIPTIONS",
"ToolDefinition",
"build_tools",
# Tool factories
@@ -51,7 +54,6 @@ __all__ = [
"create_scrape_webpage_tool",
"create_search_knowledge_base_tool",
"create_search_surfsense_docs_tool",
- # Knowledge base utilities
"format_documents_for_context",
"get_all_tool_names",
"get_default_enabled_tools",
diff --git a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
index 552019dda..a11e4ac38 100644
--- a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
+++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
@@ -12,7 +12,8 @@ import json
from datetime import datetime
from typing import Any
-from langchain_core.tools import tool
+from langchain_core.tools import StructuredTool
+from pydantic import BaseModel, Field
from sqlalchemy.ext.asyncio import AsyncSession
from app.services.connector_service import ConnectorService
@@ -22,6 +23,7 @@ from app.services.connector_service import ConnectorService
# =============================================================================
# Canonical connector values used internally by ConnectorService
+# Includes all document types and search source connectors
_ALL_CONNECTORS: list[str] = [
"EXTENSION",
"FILE",
@@ -50,41 +52,117 @@ _ALL_CONNECTORS: list[str] = [
"CRAWLED_URL",
"CIRCLEBACK",
"OBSIDIAN_CONNECTOR",
+ # Composio connectors
+ "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ "COMPOSIO_GMAIL_CONNECTOR",
+ "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
]
+# Human-readable descriptions for each connector type
+# Used for generating dynamic docstrings and informing the LLM
+CONNECTOR_DESCRIPTIONS: dict[str, str] = {
+ "EXTENSION": "Web content saved via SurfSense browser extension (personal browsing history)",
+ "FILE": "User-uploaded documents (PDFs, Word, etc.) (personal files)",
+ "NOTE": "SurfSense Notes (notes created inside SurfSense)",
+ "SLACK_CONNECTOR": "Slack conversations and shared content (personal workspace communications)",
+ "TEAMS_CONNECTOR": "Microsoft Teams messages and conversations (personal Teams communications)",
+ "NOTION_CONNECTOR": "Notion workspace pages and databases (personal knowledge management)",
+ "YOUTUBE_VIDEO": "YouTube video transcripts and metadata (personally saved videos)",
+ "GITHUB_CONNECTOR": "GitHub repository content and issues (personal repositories and interactions)",
+ "ELASTICSEARCH_CONNECTOR": "Elasticsearch indexed documents and data (personal Elasticsearch instances)",
+ "LINEAR_CONNECTOR": "Linear project issues and discussions (personal project management)",
+ "JIRA_CONNECTOR": "Jira project issues, tickets, and comments (personal project tracking)",
+ "CONFLUENCE_CONNECTOR": "Confluence pages and comments (personal project documentation)",
+ "CLICKUP_CONNECTOR": "ClickUp tasks and project data (personal task management)",
+ "GOOGLE_CALENDAR_CONNECTOR": "Google Calendar events, meetings, and schedules (personal calendar)",
+ "GOOGLE_GMAIL_CONNECTOR": "Google Gmail emails and conversations (personal emails)",
+ "GOOGLE_DRIVE_FILE": "Google Drive files and documents (personal cloud storage)",
+ "DISCORD_CONNECTOR": "Discord server conversations and shared content (personal community)",
+ "AIRTABLE_CONNECTOR": "Airtable records, tables, and database content (personal data)",
+ "TAVILY_API": "Tavily web search API results (real-time web search)",
+ "SEARXNG_API": "SearxNG search API results (privacy-focused web search)",
+ "LINKUP_API": "Linkup search API results (web search)",
+ "BAIDU_SEARCH_API": "Baidu search API results (Chinese web search)",
+ "LUMA_CONNECTOR": "Luma events and meetings",
+ "WEBCRAWLER_CONNECTOR": "Webpages indexed by SurfSense (personally selected websites)",
+ "CRAWLED_URL": "Webpages indexed by SurfSense (personally selected websites)",
+ "BOOKSTACK_CONNECTOR": "BookStack pages (personal documentation)",
+ "CIRCLEBACK": "Circleback meeting notes, transcripts, and action items",
+ "OBSIDIAN_CONNECTOR": "Obsidian vault notes and markdown files (personal notes)",
+ # Composio connectors
+ "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "Google Drive files via Composio (personal cloud storage)",
+ "COMPOSIO_GMAIL_CONNECTOR": "Gmail emails via Composio (personal emails)",
+ "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "Google Calendar events via Composio (personal calendar)",
+}
-def _normalize_connectors(connectors_to_search: list[str] | None) -> list[str]:
+
+def _normalize_connectors(
+ connectors_to_search: list[str] | None,
+ available_connectors: list[str] | None = None,
+) -> list[str]:
"""
Normalize connectors provided by the model.
- Accepts user-facing enums like WEBCRAWLER_CONNECTOR and maps them to canonical
ConnectorService types.
- Drops unknown values.
- - If None/empty, defaults to searching across all known connectors.
+ - If available_connectors is provided, only includes connectors from that list.
+ - If connectors_to_search is None/empty, defaults to available_connectors or all.
+
+ Args:
+ connectors_to_search: List of connectors requested by the model
+ available_connectors: List of connectors actually available in the search space
+
+ Returns:
+ List of normalized connector strings to search
"""
+ # Determine the set of valid connectors to consider
+ valid_set = (
+ set(available_connectors) if available_connectors else set(_ALL_CONNECTORS)
+ )
+
if not connectors_to_search:
- return list(_ALL_CONNECTORS)
+ # Search all available connectors if none specified
+ return (
+ list(available_connectors)
+ if available_connectors
+ else list(_ALL_CONNECTORS)
+ )
normalized: list[str] = []
for raw in connectors_to_search:
c = (raw or "").strip().upper()
if not c:
continue
+ # Map user-facing aliases to canonical names
if c == "WEBCRAWLER_CONNECTOR":
c = "CRAWLED_URL"
normalized.append(c)
- # de-dupe while preserving order + filter unknown
+ # de-dupe while preserving order + filter to valid connectors
seen: set[str] = set()
out: list[str] = []
for c in normalized:
if c in seen:
continue
+ # Only include if it's a known connector AND available
if c not in _ALL_CONNECTORS:
continue
+ if c not in valid_set:
+ continue
seen.add(c)
out.append(c)
- return out if out else list(_ALL_CONNECTORS)
+
+ # Fallback to all available if nothing matched
+ return (
+ out
+ if out
+ else (
+ list(available_connectors)
+ if available_connectors
+ else list(_ALL_CONNECTORS)
+ )
+ )
# =============================================================================
@@ -233,6 +311,7 @@ async def search_knowledge_base_async(
top_k: int = 10,
start_date: datetime | None = None,
end_date: datetime | None = None,
+ available_connectors: list[str] | None = None,
) -> str:
"""
Search the user's knowledge base for relevant documents.
@@ -248,6 +327,8 @@ async def search_knowledge_base_async(
top_k: Number of results per connector
start_date: Optional start datetime (UTC) for filtering documents
end_date: Optional end datetime (UTC) for filtering documents
+ available_connectors: Optional list of connectors actually available in the search space.
+ If provided, only these connectors will be searched.
Returns:
Formatted string with search results
@@ -262,7 +343,7 @@ async def search_knowledge_base_async(
end_date=end_date,
)
- connectors = _normalize_connectors(connectors_to_search)
+ connectors = _normalize_connectors(connectors_to_search, available_connectors)
for connector in connectors:
try:
@@ -316,6 +397,16 @@ async def search_knowledge_base_async(
)
all_documents.extend(chunks)
+ elif connector == "TEAMS_CONNECTOR":
+ _, chunks = await connector_service.search_teams(
+ user_query=query,
+ search_space_id=search_space_id,
+ top_k=top_k,
+ start_date=resolved_start_date,
+ end_date=resolved_end_date,
+ )
+ all_documents.extend(chunks)
+
elif connector == "NOTION_CONNECTOR":
_, chunks = await connector_service.search_notion(
user_query=query,
@@ -519,6 +610,39 @@ async def search_knowledge_base_async(
)
all_documents.extend(chunks)
+ # =========================================================
+ # Composio Connectors
+ # =========================================================
+ elif connector == "COMPOSIO_GOOGLE_DRIVE_CONNECTOR":
+ _, chunks = await connector_service.search_composio_google_drive(
+ user_query=query,
+ search_space_id=search_space_id,
+ top_k=top_k,
+ start_date=resolved_start_date,
+ end_date=resolved_end_date,
+ )
+ all_documents.extend(chunks)
+
+ elif connector == "COMPOSIO_GMAIL_CONNECTOR":
+ _, chunks = await connector_service.search_composio_gmail(
+ user_query=query,
+ search_space_id=search_space_id,
+ top_k=top_k,
+ start_date=resolved_start_date,
+ end_date=resolved_end_date,
+ )
+ all_documents.extend(chunks)
+
+ elif connector == "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR":
+ _, chunks = await connector_service.search_composio_google_calendar(
+ user_query=query,
+ search_space_id=search_space_id,
+ top_k=top_k,
+ start_date=resolved_start_date,
+ end_date=resolved_end_date,
+ )
+ all_documents.extend(chunks)
+
except Exception as e:
print(f"Error searching connector {connector}: {e}")
continue
@@ -543,11 +667,68 @@ async def search_knowledge_base_async(
return format_documents_for_context(deduplicated)
+def _build_connector_docstring(available_connectors: list[str] | None) -> str:
+ """
+ Build the connector documentation section for the tool docstring.
+
+ Args:
+ available_connectors: List of available connector types, or None for all
+
+ Returns:
+ Formatted docstring section listing available connectors
+ """
+ connectors = available_connectors if available_connectors else list(_ALL_CONNECTORS)
+
+ lines = []
+ for connector in connectors:
+ # Skip internal names, prefer user-facing aliases
+ if connector == "CRAWLED_URL":
+ # Show as WEBCRAWLER_CONNECTOR for user-facing docs
+ description = CONNECTOR_DESCRIPTIONS.get(connector, connector)
+ lines.append(f"- WEBCRAWLER_CONNECTOR: {description}")
+ else:
+ description = CONNECTOR_DESCRIPTIONS.get(connector, connector)
+ lines.append(f"- {connector}: {description}")
+
+ return "\n".join(lines)
+
+
+# =============================================================================
+# Tool Input Schema
+# =============================================================================
+
+
+class SearchKnowledgeBaseInput(BaseModel):
+ """Input schema for the search_knowledge_base tool."""
+
+ query: str = Field(
+ description="The search query - be specific and include key terms"
+ )
+ top_k: int = Field(
+ default=10,
+ description="Number of results to retrieve (default: 10)",
+ )
+ start_date: str | None = Field(
+ default=None,
+ description="Optional ISO date/datetime (e.g. '2025-12-12' or '2025-12-12T00:00:00+00:00')",
+ )
+ end_date: str | None = Field(
+ default=None,
+ description="Optional ISO date/datetime (e.g. '2025-12-19' or '2025-12-19T23:59:59+00:00')",
+ )
+ connectors_to_search: list[str] | None = Field(
+ default=None,
+ description="Optional list of connector enums to search. If omitted, searches all available.",
+ )
+
+
def create_search_knowledge_base_tool(
search_space_id: int,
db_session: AsyncSession,
connector_service: ConnectorService,
-):
+ available_connectors: list[str] | None = None,
+ available_document_types: list[str] | None = None,
+) -> StructuredTool:
"""
Factory function to create the search_knowledge_base tool with injected dependencies.
@@ -555,72 +736,57 @@ def create_search_knowledge_base_tool(
search_space_id: The user's search space ID
db_session: Database session
connector_service: Initialized connector service
+ available_connectors: Optional list of connector types available in the search space.
+ Used to dynamically generate the tool docstring.
+ available_document_types: Optional list of document types that have data in the search space.
+ Used to inform the LLM about what data exists.
Returns:
- A configured tool function
+ A configured StructuredTool instance
"""
+ # Build connector documentation dynamically
+ connector_docs = _build_connector_docstring(available_connectors)
- @tool
- async def search_knowledge_base(
+ # Build context about available document types
+ doc_types_info = ""
+ if available_document_types:
+ doc_types_info = f"""
+
+## Document types with indexed content in this search space
+
+The following document types have content available for search:
+{", ".join(available_document_types)}
+
+Focus searches on these types for best results."""
+
+ # Build the dynamic description for the tool
+ # This is what the LLM sees when deciding whether/how to use the tool
+ dynamic_description = f"""Search the user's personal knowledge base for relevant information.
+
+Use this tool to find documents, notes, files, web pages, and other content that may help answer the user's question.
+
+IMPORTANT:
+- If the user requests a specific source type (e.g. "my notes", "Slack messages"), pass `connectors_to_search=[...]` using the enums below.
+- If `connectors_to_search` is omitted/empty, the system will search broadly.
+- Only connectors that are enabled/configured for this search space are available.{doc_types_info}
+
+## Available connector enums for `connectors_to_search`
+
+{connector_docs}
+
+NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`."""
+
+ # Capture for closure
+ _available_connectors = available_connectors
+
+ async def _search_knowledge_base_impl(
query: str,
top_k: int = 10,
start_date: str | None = None,
end_date: str | None = None,
connectors_to_search: list[str] | None = None,
) -> str:
- """
- Search the user's personal knowledge base for relevant information.
-
- Use this tool to find documents, notes, files, web pages, and other content
- that may help answer the user's question.
-
- IMPORTANT:
- - If the user requests a specific source type (e.g. "my notes", "Slack messages"),
- pass `connectors_to_search=[...]` using the enums below.
- - If `connectors_to_search` is omitted/empty, the system will search broadly.
-
- ## Available connector enums for `connectors_to_search`
-
- - EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history)
- - FILE: "User-uploaded documents (PDFs, Word, etc.)" (personal files)
- - NOTE: "SurfSense Notes" (notes created inside SurfSense)
- - SLACK_CONNECTOR: "Slack conversations and shared content" (personal workspace communications)
- - TEAMS_CONNECTOR: "Microsoft Teams messages and conversations" (personal Teams communications)
- - NOTION_CONNECTOR: "Notion workspace pages and databases" (personal knowledge management)
- - YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos)
- - GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions)
- - ELASTICSEARCH_CONNECTOR: "Elasticsearch indexed documents and data" (personal Elasticsearch instances and custom data sources)
- - LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management)
- - JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking)
- - CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation)
- - CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management)
- - GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management)
- - GOOGLE_GMAIL_CONNECTOR: "Google Gmail emails and conversations" (personal emails and communications)
- - GOOGLE_DRIVE_FILE: "Google Drive files and documents" (personal cloud storage and file management)
- - DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications)
- - AIRTABLE_CONNECTOR: "Airtable records, tables, and database content" (personal data management and organization)
- - TAVILY_API: "Tavily search API results" (personalized search results)
- - SEARXNG_API: "SearxNG search API results" (personalized search results)
- - LINKUP_API: "Linkup search API results" (personalized search results)
- - BAIDU_SEARCH_API: "Baidu search API results" (personalized search results)
- - LUMA_CONNECTOR: "Luma events"
- - WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites)
- - BOOKSTACK_CONNECTOR: "BookStack pages" (personal documentation)
- - CIRCLEBACK: "Circleback meeting notes, transcripts, and action items" (personal meeting records)
- - OBSIDIAN_CONNECTOR: "Obsidian vault notes and markdown files" (personal notes and knowledge management)
-
- NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`.
-
- Args:
- query: The search query - be specific and include key terms
- top_k: Number of results to retrieve (default: 10)
- start_date: Optional ISO date/datetime (e.g. "2025-12-12" or "2025-12-12T00:00:00+00:00")
- end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00")
- connectors_to_search: Optional list of connector enums to search. If omitted, searches all.
-
- Returns:
- Formatted string with relevant documents and their content
- """
+ """Implementation function for knowledge base search."""
from app.agents.new_chat.utils import parse_date_or_datetime
parsed_start: datetime | None = None
@@ -640,6 +806,16 @@ def create_search_knowledge_base_tool(
top_k=top_k,
start_date=parsed_start,
end_date=parsed_end,
+ available_connectors=_available_connectors,
)
- return search_knowledge_base
+ # Create StructuredTool with dynamic description
+ # This properly sets the description that the LLM sees
+ tool = StructuredTool(
+ name="search_knowledge_base",
+ description=dynamic_description,
+ coroutine=_search_knowledge_base_impl,
+ args_schema=SearchKnowledgeBaseInput,
+ )
+
+ return tool
diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py
index e4ce7a6b7..968e51445 100644
--- a/surfsense_backend/app/agents/new_chat/tools/registry.py
+++ b/surfsense_backend/app/agents/new_chat/tools/registry.py
@@ -85,6 +85,7 @@ class ToolDefinition:
# Contributors: Add your new tools here!
BUILTIN_TOOLS: list[ToolDefinition] = [
# Core tool - searches the user's knowledge base
+ # Now supports dynamic connector/document type discovery
ToolDefinition(
name="search_knowledge_base",
description="Search the user's personal knowledge base for relevant information",
@@ -92,8 +93,12 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
search_space_id=deps["search_space_id"],
db_session=deps["db_session"],
connector_service=deps["connector_service"],
+ # Optional: dynamically discovered connectors/document types
+ available_connectors=deps.get("available_connectors"),
+ available_document_types=deps.get("available_document_types"),
),
requires=["search_space_id", "db_session", "connector_service"],
+ # Note: available_connectors and available_document_types are optional
),
# Podcast generation tool
ToolDefinition(
diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py
index dc43697e7..4c5599815 100644
--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@@ -2871,3 +2871,350 @@ class ConnectorService:
}
return result_object, obsidian_docs
+
+ # =========================================================================
+ # Composio Connector Search Methods
+ # =========================================================================
+
+ async def search_composio_google_drive(
+ self,
+ user_query: str,
+ search_space_id: int,
+ top_k: int = 20,
+ start_date: datetime | None = None,
+ end_date: datetime | None = None,
+ ) -> tuple:
+ """
+ Search for Composio Google Drive files and return both the source information
+ and langchain documents.
+
+ Uses combined chunk-level and document-level hybrid search with RRF fusion.
+
+ Args:
+ user_query: The user's query
+ search_space_id: The search space ID to search in
+ top_k: Maximum number of results to return
+ start_date: Optional start date for filtering documents by updated_at
+ end_date: Optional end date for filtering documents by updated_at
+
+ Returns:
+ tuple: (sources_info, langchain_documents)
+ """
+ composio_drive_docs = await self._combined_rrf_search(
+ query_text=user_query,
+ search_space_id=search_space_id,
+ document_type="COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ top_k=top_k,
+ start_date=start_date,
+ end_date=end_date,
+ )
+
+ # Early return if no results
+ if not composio_drive_docs:
+ return {
+ "id": 54,
+ "name": "Google Drive (Composio)",
+ "type": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ "sources": [],
+ }, []
+
+ def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+ return (
+ doc_info.get("title")
+ or metadata.get("title")
+ or metadata.get("file_name")
+ or "Untitled Document"
+ )
+
+ def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+ return metadata.get("url") or metadata.get("web_view_link") or ""
+
+ def _description_fn(
+ chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+ ) -> str:
+ description = self._chunk_preview(chunk.get("content", ""), limit=200)
+ info_parts = []
+ mime_type = metadata.get("mime_type")
+ modified_time = metadata.get("modified_time")
+ if mime_type:
+ info_parts.append(f"Type: {mime_type}")
+ if modified_time:
+ info_parts.append(f"Modified: {modified_time}")
+ if info_parts:
+ description = (description + " | " + " | ".join(info_parts)).strip(" |")
+ return description
+
+ def _extra_fields_fn(
+ _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+ ) -> dict[str, Any]:
+ return {
+ "mime_type": metadata.get("mime_type", ""),
+ "file_id": metadata.get("file_id", ""),
+ "modified_time": metadata.get("modified_time", ""),
+ }
+
+ sources_list = self._build_chunk_sources_from_documents(
+ composio_drive_docs,
+ title_fn=_title_fn,
+ url_fn=_url_fn,
+ description_fn=_description_fn,
+ extra_fields_fn=_extra_fields_fn,
+ )
+
+ # Create result object
+ result_object = {
+ "id": 54,
+ "name": "Google Drive (Composio)",
+ "type": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+ "sources": sources_list,
+ }
+
+ return result_object, composio_drive_docs
+
+ async def search_composio_gmail(
+ self,
+ user_query: str,
+ search_space_id: int,
+ top_k: int = 20,
+ start_date: datetime | None = None,
+ end_date: datetime | None = None,
+ ) -> tuple:
+ """
+ Search for Composio Gmail messages and return both the source information
+ and langchain documents.
+
+ Uses combined chunk-level and document-level hybrid search with RRF fusion.
+
+ Args:
+ user_query: The user's query
+ search_space_id: The search space ID to search in
+ top_k: Maximum number of results to return
+ start_date: Optional start date for filtering documents by updated_at
+ end_date: Optional end date for filtering documents by updated_at
+
+ Returns:
+ tuple: (sources_info, langchain_documents)
+ """
+ composio_gmail_docs = await self._combined_rrf_search(
+ query_text=user_query,
+ search_space_id=search_space_id,
+ document_type="COMPOSIO_GMAIL_CONNECTOR",
+ top_k=top_k,
+ start_date=start_date,
+ end_date=end_date,
+ )
+
+ # Early return if no results
+ if not composio_gmail_docs:
+ return {
+ "id": 55,
+ "name": "Gmail (Composio)",
+ "type": "COMPOSIO_GMAIL_CONNECTOR",
+ "sources": [],
+ }, []
+
+ def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+ return (
+ doc_info.get("title")
+ or metadata.get("subject")
+ or metadata.get("title")
+ or "Untitled Email"
+ )
+
+ def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+ return metadata.get("url") or ""
+
+ def _description_fn(
+ chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+ ) -> str:
+ description = self._chunk_preview(chunk.get("content", ""), limit=200)
+ info_parts = []
+ sender = metadata.get("from") or metadata.get("sender")
+ date = metadata.get("date") or metadata.get("received_at")
+ if sender:
+ info_parts.append(f"From: {sender}")
+ if date:
+ info_parts.append(f"Date: {date}")
+ if info_parts:
+ description = (description + " | " + " | ".join(info_parts)).strip(" |")
+ return description
+
+ def _extra_fields_fn(
+ _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+ ) -> dict[str, Any]:
+ return {
+ "message_id": metadata.get("message_id", ""),
+ "thread_id": metadata.get("thread_id", ""),
+ "from": metadata.get("from", ""),
+ "to": metadata.get("to", ""),
+ "date": metadata.get("date", ""),
+ }
+
+ sources_list = self._build_chunk_sources_from_documents(
+ composio_gmail_docs,
+ title_fn=_title_fn,
+ url_fn=_url_fn,
+ description_fn=_description_fn,
+ extra_fields_fn=_extra_fields_fn,
+ )
+
+ # Create result object
+ result_object = {
+ "id": 55,
+ "name": "Gmail (Composio)",
+ "type": "COMPOSIO_GMAIL_CONNECTOR",
+ "sources": sources_list,
+ }
+
+ return result_object, composio_gmail_docs
+
+ async def search_composio_google_calendar(
+ self,
+ user_query: str,
+ search_space_id: int,
+ top_k: int = 20,
+ start_date: datetime | None = None,
+ end_date: datetime | None = None,
+ ) -> tuple:
+ """
+ Search for Composio Google Calendar events and return both the source information
+ and langchain documents.
+
+ Uses combined chunk-level and document-level hybrid search with RRF fusion.
+
+ Args:
+ user_query: The user's query
+ search_space_id: The search space ID to search in
+ top_k: Maximum number of results to return
+ start_date: Optional start date for filtering documents by updated_at
+ end_date: Optional end date for filtering documents by updated_at
+
+ Returns:
+ tuple: (sources_info, langchain_documents)
+ """
+ composio_calendar_docs = await self._combined_rrf_search(
+ query_text=user_query,
+ search_space_id=search_space_id,
+ document_type="COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+ top_k=top_k,
+ start_date=start_date,
+ end_date=end_date,
+ )
+
+ # Early return if no results
+ if not composio_calendar_docs:
+ return {
+ "id": 56,
+ "name": "Google Calendar (Composio)",
+ "type": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+ "sources": [],
+ }, []
+
+ def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+ return (
+ doc_info.get("title")
+ or metadata.get("summary")
+ or metadata.get("title")
+ or "Untitled Event"
+ )
+
+ def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+ return metadata.get("url") or metadata.get("html_link") or ""
+
+ def _description_fn(
+ chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+ ) -> str:
+ description = self._chunk_preview(chunk.get("content", ""), limit=200)
+ info_parts = []
+ start_time = metadata.get("start_time") or metadata.get("start")
+ end_time = metadata.get("end_time") or metadata.get("end")
+ if start_time:
+ info_parts.append(f"Start: {start_time}")
+ if end_time:
+ info_parts.append(f"End: {end_time}")
+ if info_parts:
+ description = (description + " | " + " | ".join(info_parts)).strip(" |")
+ return description
+
+ def _extra_fields_fn(
+ _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+ ) -> dict[str, Any]:
+ return {
+ "event_id": metadata.get("event_id", ""),
+ "calendar_id": metadata.get("calendar_id", ""),
+ "start_time": metadata.get("start_time", ""),
+ "end_time": metadata.get("end_time", ""),
+ "location": metadata.get("location", ""),
+ }
+
+ sources_list = self._build_chunk_sources_from_documents(
+ composio_calendar_docs,
+ title_fn=_title_fn,
+ url_fn=_url_fn,
+ description_fn=_description_fn,
+ extra_fields_fn=_extra_fields_fn,
+ )
+
+ # Create result object
+ result_object = {
+ "id": 56,
+ "name": "Google Calendar (Composio)",
+ "type": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+ "sources": sources_list,
+ }
+
+ return result_object, composio_calendar_docs
+
+ # =========================================================================
+ # Utility Methods for Connector Discovery
+ # =========================================================================
+
+ async def get_available_connectors(
+ self,
+ search_space_id: int,
+ ) -> list[SearchSourceConnectorType]:
+ """
+ Get all available (enabled) connector types for a search space.
+
+ Args:
+ search_space_id: The search space ID
+
+ Returns:
+ List of SearchSourceConnectorType enums for enabled connectors
+ """
+ query = (
+ select(SearchSourceConnector.connector_type)
+ .filter(
+ SearchSourceConnector.search_space_id == search_space_id,
+ )
+ .distinct()
+ )
+
+ result = await self.session.execute(query)
+ connector_types = result.scalars().all()
+ return list(connector_types)
+
+ async def get_available_document_types(
+ self,
+ search_space_id: int,
+ ) -> list[str]:
+ """
+ Get all document types that have at least one document in the search space.
+
+ Args:
+ search_space_id: The search space ID
+
+ Returns:
+ List of document type strings that have documents indexed
+ """
+ from sqlalchemy import distinct
+
+ from app.db import Document
+
+ query = select(distinct(Document.document_type)).filter(
+ Document.search_space_id == search_space_id,
+ )
+
+ result = await self.session.execute(query)
+ doc_types = result.scalars().all()
+ return [str(dt) for dt in doc_types]
From 555df90c842c52ac50a708a38f424287e1fc88b5 Mon Sep 17 00:00:00 2001
From: "DESKTOP-RTLN3BA\\$punk"
Date: Sat, 24 Jan 2026 17:47:18 -0800
Subject: [PATCH 35/51] chore: New connector statuses for Composio and GitHub
---
.../config/connector-status-config.json | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json b/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json
index b729c3f8b..2c1010b1c 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json
+++ b/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json
@@ -24,6 +24,16 @@
"enabled": true,
"status": "warning",
"statusMessage": "Some requests may be blocked if not using Firecrawl."
+ },
+ "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": {
+ "enabled": false,
+ "status": "disabled",
+ "statusMessage": "Not available yet."
+ },
+ "GITHUB_CONNECTOR": {
+ "enabled": false,
+ "status": "warning",
+ "statusMessage": "Some issues with indexing repositories."
}
},
"globalSettings": {
From 09162ad5cad4d627aa070f881830f9ca95b9d2ee Mon Sep 17 00:00:00 2001
From: "DESKTOP-RTLN3BA\\$punk"
Date: Sat, 24 Jan 2026 17:53:57 -0800
Subject: [PATCH 36/51] release: 0.0.12
---
surfsense_backend/pyproject.toml | 2 +-
surfsense_backend/uv.lock | 2 +-
surfsense_browser_extension/package.json | 2 +-
surfsense_web/package.json | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml
index ffe9e5232..57dbdc7b5 100644
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "surf-new-backend"
-version = "0.0.11"
+version = "0.0.12"
description = "SurfSense Backend"
requires-python = ">=3.12"
dependencies = [
diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock
index 18f04288e..16b77a7b2 100644
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
@@ -6545,7 +6545,7 @@ wheels = [
[[package]]
name = "surf-new-backend"
-version = "0.0.11"
+version = "0.0.12"
source = { editable = "." }
dependencies = [
{ name = "alembic" },
diff --git a/surfsense_browser_extension/package.json b/surfsense_browser_extension/package.json
index b225bc206..bf926d09f 100644
--- a/surfsense_browser_extension/package.json
+++ b/surfsense_browser_extension/package.json
@@ -1,7 +1,7 @@
{
"name": "surfsense_browser_extension",
"displayName": "Surfsense Browser Extension",
- "version": "0.0.11",
+ "version": "0.0.12",
"description": "Extension to collect Browsing History for SurfSense.",
"author": "https://github.com/MODSetter",
"engines": {
diff --git a/surfsense_web/package.json b/surfsense_web/package.json
index 7ec05c95d..235f4b9db 100644
--- a/surfsense_web/package.json
+++ b/surfsense_web/package.json
@@ -1,6 +1,6 @@
{
"name": "surfsense_web",
- "version": "0.0.11",
+ "version": "0.0.12",
"private": true,
"description": "SurfSense Frontend",
"scripts": {
From db7e865c036f69d81f5ad74151d4223a2b64abdc Mon Sep 17 00:00:00 2001
From: "DESKTOP-RTLN3BA\\$punk"
Date: Sun, 25 Jan 2026 00:07:00 -0800
Subject: [PATCH 37/51] feat(fix): fix mentioned docs chat citation reference
- Updated `format_mentioned_documents_as_context` to include detailed document metadata, including URL and JSON representation of metadata.
- Improved citation handling by using chunk IDs for mentioned documents.
- Adjusted the fetching of mentioned documents to load associated chunks for better citation accuracy.
- Cleaned up the context formatting for better readability and structure.
---
.../app/tasks/chat/stream_new_chat.py | 65 ++++++++++++++++---
1 file changed, 55 insertions(+), 10 deletions(-)
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index a49c244eb..8dfff4895 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -54,21 +54,64 @@ def format_attachments_as_context(attachments: list[ChatAttachment]) -> str:
def format_mentioned_documents_as_context(documents: list[Document]) -> str:
- """Format mentioned documents as context for the agent."""
+ """
+ Format mentioned documents as context for the agent.
+
+ Uses the same XML structure as knowledge_base.format_documents_for_context
+ to ensure citations work properly with chunk IDs.
+ """
if not documents:
return ""
context_parts = [""]
context_parts.append(
"The user has explicitly mentioned the following documents from their knowledge base. "
- "These documents are directly relevant to the query and should be prioritized as primary sources."
+ "These documents are directly relevant to the query and should be prioritized as primary sources. "
+ "Use [citation:CHUNK_ID] format for citations (e.g., [citation:123])."
)
- for i, doc in enumerate(documents, 1):
- context_parts.append(
- f""
+ context_parts.append("")
+
+ for doc in documents:
+ # Build metadata JSON
+ metadata = doc.document_metadata or {}
+ metadata_json = json.dumps(metadata, ensure_ascii=False)
+
+ # Get URL from metadata
+ url = (
+ metadata.get("url")
+ or metadata.get("source")
+ or metadata.get("page_url")
+ or ""
)
- context_parts.append(f"")
+
+ context_parts.append("")
+ context_parts.append("")
+ context_parts.append(f" {doc.id} ")
+ context_parts.append(f" {doc.document_type.value} ")
+ context_parts.append(f" ")
+ context_parts.append(f" ")
+ context_parts.append(f" ")
+ context_parts.append(" ")
+ context_parts.append("")
+ context_parts.append("")
+
+ # Use chunks if available (preferred for proper citations)
+ if hasattr(doc, "chunks") and doc.chunks:
+ for chunk in doc.chunks:
+ context_parts.append(
+ f" "
+ )
+ else:
+ # Fallback to document content if chunks not loaded
+ # Use document ID as chunk ID prefix for consistency
+ context_parts.append(
+ f" "
+ )
+
+ context_parts.append(" ")
context_parts.append(" ")
+ context_parts.append("")
+
context_parts.append(" ")
return "\n".join(context_parts)
@@ -81,8 +124,6 @@ def format_mentioned_surfsense_docs_as_context(
if not documents:
return ""
- import json
-
context_parts = [""]
context_parts.append(
"The user has explicitly mentioned the following SurfSense documentation pages. "
@@ -262,11 +303,15 @@ async def stream_new_chat(
# Build input with message history from frontend
langchain_messages = []
- # Fetch mentioned documents if any
+ # Fetch mentioned documents if any (with chunks for proper citations)
mentioned_documents: list[Document] = []
if mentioned_document_ids:
+ from sqlalchemy.orm import selectinload as doc_selectinload
+
result = await session.execute(
- select(Document).filter(
+ select(Document)
+ .options(doc_selectinload(Document.chunks))
+ .filter(
Document.id.in_(mentioned_document_ids),
Document.search_space_id == search_space_id,
)
From 2d17d1a1b6684e83d591601ab677fa3254ae976a Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sun, 25 Jan 2026 15:23:45 +0530
Subject: [PATCH 38/51] feat: replace Loader2 with Spinner component for
consistent loading indicators
---
.../(manage)/components/DocumentsTableShell.tsx | 5 +++--
.../editor/[documentId]/page.tsx | 11 +++++------
.../dashboard/[search_space_id]/onboard/page.tsx | 4 ++--
.../dashboard/[search_space_id]/team/page.tsx | 14 +++++++-------
.../user/settings/components/ProfileContent.tsx | 7 ++++---
surfsense_web/app/invite/[invite_code]/page.tsx | 7 +++----
.../components/assistant-ui/attachment.tsx | 7 ++++---
.../assistant-ui/chat-session-status.tsx | 4 ++--
.../components/assistant-ui/connector-popup.tsx | 5 +++--
.../components/connector-card.tsx | 7 ++++---
.../views/connector-connect-view.tsx | 5 +++--
.../views/connector-edit-view.tsx | 7 ++++---
.../views/indexing-configuration-view.tsx | 5 +++--
.../tabs/active-connectors-tab.tsx | 7 ++++---
.../views/connector-accounts-list-view.tsx | 7 ++++---
.../views/youtube-crawler-view.tsx | 5 +++--
.../comment-item/comment-actions.tsx | 6 ++++--
.../member-mention-picker.tsx | 4 ++--
.../connectors/composio-drive-folder-tree.tsx | 6 +++---
.../connectors/google-drive-folder-tree.tsx | 6 +++---
.../ui/dialogs/CreateSearchSpaceDialog.tsx | 5 +++--
.../layout/ui/sidebar/AllPrivateChatsSidebar.tsx | 6 +++---
.../layout/ui/sidebar/AllSharedChatsSidebar.tsx | 6 +++---
.../components/layout/ui/sidebar/NavSection.tsx | 7 -------
.../components/new-chat/model-selector.tsx | 4 ++--
.../components/settings/llm-role-manager.tsx | 4 ++--
.../components/settings/model-config-manager.tsx | 6 +++---
.../components/shared/llm-config-form.tsx | 4 ++--
.../components/sources/DocumentUploadTab.tsx | 5 +++--
.../components/tool-ui/generate-podcast.tsx | 9 +++++----
surfsense_web/components/tool-ui/image/index.tsx | 5 +++--
.../components/tool-ui/media-card/index.tsx | 16 ++++++++--------
surfsense_web/components/tool-ui/write-todos.tsx | 4 ++--
surfsense_web/messages/en.json | 8 ++++----
34 files changed, 113 insertions(+), 105 deletions(-)
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index 38d61a6ce..6d28f9166 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -1,6 +1,6 @@
"use client";
-import { ChevronDown, ChevronUp, FileX, Loader2, Plus } from "lucide-react";
+import { ChevronDown, ChevronUp, FileX, Plus } from "lucide-react";
import { motion } from "motion/react";
import { useParams } from "next/navigation";
import { useTranslations } from "next-intl";
@@ -8,6 +8,7 @@ import React from "react";
import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
import { DocumentViewer } from "@/components/document-viewer";
import { Button } from "@/components/ui/button";
+import { Spinner } from "@/components/ui/spinner";
import { Checkbox } from "@/components/ui/checkbox";
import {
Table,
@@ -114,7 +115,7 @@ export function DocumentsTableShell({
{loading ? (
diff --git a/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx
index 2320b3b9a..74104f450 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx
@@ -1,8 +1,7 @@
"use client";
-import { useQueryClient } from "@tanstack/react-query";
import { useAtom } from "jotai";
-import { AlertCircle, ArrowLeft, FileText, Loader2, Save } from "lucide-react";
+import { AlertCircle, ArrowLeft, FileText, Save } from "lucide-react";
import { motion } from "motion/react";
import { useParams, useRouter } from "next/navigation";
import { useEffect, useMemo, useState } from "react";
@@ -21,6 +20,7 @@ import {
} from "@/components/ui/alert-dialog";
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { Spinner } from "@/components/ui/spinner";
import { notesApiService } from "@/lib/apis/notes-api.service";
import { authenticatedFetch, getBearerToken, redirectToLogin } from "@/lib/auth-utils";
@@ -78,7 +78,6 @@ function extractTitleFromBlockNote(blocknoteDocument: BlockNoteDocument): string
export default function EditorPage() {
const params = useParams();
const router = useRouter();
- const queryClient = useQueryClient();
const documentId = params.documentId as string;
const searchSpaceId = Number(params.search_space_id);
const isNewNote = documentId === "new";
@@ -349,8 +348,8 @@ export default function EditorPage() {
-
- Loading editor...
+
+ Loading editor
@@ -437,7 +436,7 @@ export default function EditorPage() {
>
{saving ? (
<>
-
+
{isNewNote ? "Creating" : "Saving"}
>
) : (
diff --git a/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx
index 25f189203..1b7fa297f 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx
@@ -1,7 +1,6 @@
"use client";
import { useAtomValue } from "jotai";
-import { Loader2 } from "lucide-react";
import { motion } from "motion/react";
import { useParams, useRouter } from "next/navigation";
import { useEffect, useRef, useState } from "react";
@@ -17,6 +16,7 @@ import {
import { Logo } from "@/components/Logo";
import { LLMConfigForm, type LLMConfigFormData } from "@/components/shared/llm-config-form";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Spinner } from "@/components/ui/spinner";
import { getBearerToken, redirectToLogin } from "@/lib/auth-utils";
export default function OnboardPage() {
@@ -156,7 +156,7 @@ export default function OnboardPage() {
diff --git a/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx
index b661e9222..c535da9f1 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx
@@ -14,7 +14,6 @@ import {
Hash,
Link2,
LinkIcon,
- Loader2,
Logs,
type LucideIcon,
MessageCircle,
@@ -106,6 +105,7 @@ import {
} from "@/components/ui/table";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { Textarea } from "@/components/ui/textarea";
+import { Spinner } from "@/components/ui/spinner";
import type {
CreateInviteRequest,
DeleteInviteRequest,
@@ -321,7 +321,7 @@ export default function TeamManagementPage() {
animate={{ opacity: 1, scale: 1 }}
className="flex flex-col items-center gap-4"
>
-
+
Loading team data...
@@ -571,7 +571,7 @@ function MembersTab({
if (loading) {
return (
-
+
);
}
@@ -911,7 +911,7 @@ function RolesTab({
if (loading) {
return (
-
+
);
}
@@ -1068,7 +1068,7 @@ function InvitesTab({
if (loading) {
return (
-
+
);
}
@@ -1446,7 +1446,7 @@ function CreateInviteDialog({
{creating ? (
<>
-
+
Creating
>
) : (
@@ -1699,7 +1699,7 @@ function CreateRoleDialog({
{creating ? (
<>
-
+
Creating
>
) : (
diff --git a/surfsense_web/app/dashboard/user/settings/components/ProfileContent.tsx b/surfsense_web/app/dashboard/user/settings/components/ProfileContent.tsx
index 511a09fd1..a1ff4d781 100644
--- a/surfsense_web/app/dashboard/user/settings/components/ProfileContent.tsx
+++ b/surfsense_web/app/dashboard/user/settings/components/ProfileContent.tsx
@@ -1,7 +1,7 @@
"use client";
import { useAtomValue } from "jotai";
-import { Loader2, Menu, User } from "lucide-react";
+import { Menu, User } from "lucide-react";
import { AnimatePresence, motion } from "motion/react";
import { useTranslations } from "next-intl";
import { useEffect, useState } from "react";
@@ -11,6 +11,7 @@ import { currentUserAtom } from "@/atoms/user/user-query.atoms";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
+import { Spinner } from "@/components/ui/spinner";
interface ProfileContentProps {
onMenuClick: () => void;
@@ -129,7 +130,7 @@ export function ProfileContent({ onMenuClick }: ProfileContentProps) {
>
{isUserLoading ? (
-
+
) : (