- This connection uses Composio's managed OAuth, which means you don't need to
- wait for app verification. Your data is securely accessed through Composio.
-
- Connect these services for future indexing support. Currently available for connection only.
-
-
- {nonIndexableToolkits.map((toolkit) => (
-
-
-
- {getToolkitIcon(toolkit.id, "size-5")}
-
-
- Soon
-
-
-
{toolkit.name}
-
- {toolkit.description}
-
-
-
- ))}
-
-
-
- {/* Info footer */}
-
-
-
-
-
-
-
Why use Composio?
-
- Composio provides pre-verified OAuth apps, so you don't need to wait for Google app verification.
- Your data is securely processed through Composio's managed authentication.
-
-
-
-
-
-
- );
-};
From 4cbf80d73a74170a532cf1b531d7a9d670cc4663 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 04:44:37 +0530
Subject: [PATCH 03/21] feat: enhance Composio integration with pagination and
improved error handling
- Updated the list_gmail_messages method to support pagination with page tokens, allowing for more efficient message retrieval.
- Modified the return structure to include next_page_token and result_size_estimate for better client-side handling.
- Improved error handling and logging throughout the Gmail indexing process, ensuring better visibility into failures.
- Implemented batch processing for Gmail messages, committing changes incrementally to prevent data loss.
- Ensured consistent timestamp updates for connectors, even when no documents are indexed, to maintain accurate UI states.
- Refactored the indexing logic to streamline message processing and enhance overall performance.
---
.../app/connectors/composio_connector.py | 15 +-
.../routes/search_source_connectors_routes.py | 16 +-
.../app/services/composio_service.py | 54 +-
.../app/tasks/composio_indexer.py | 579 ++++++++++++------
4 files changed, 451 insertions(+), 213 deletions(-)
diff --git a/surfsense_backend/app/connectors/composio_connector.py b/surfsense_backend/app/connectors/composio_connector.py
index 18fd9564c..21e339d12 100644
--- a/surfsense_backend/app/connectors/composio_connector.py
+++ b/surfsense_backend/app/connectors/composio_connector.py
@@ -151,21 +151,23 @@ class ComposioConnector:
async def list_gmail_messages(
self,
query: str = "",
- max_results: int = 100,
- ) -> tuple[list[dict[str, Any]], str | None]:
+ max_results: int = 50,
+ page_token: str | None = None,
+ ) -> tuple[list[dict[str, Any]], str | None, int | None, str | None]:
"""
- List Gmail messages via Composio.
+ List Gmail messages via Composio with pagination support.
Args:
query: Gmail search query.
- max_results: Maximum number of messages.
+ max_results: Maximum number of messages per page (default: 50).
+ page_token: Optional pagination token for next page.
Returns:
- Tuple of (messages list, error message).
+ Tuple of (messages list, next_page_token, result_size_estimate, error message).
"""
connected_account_id = await self.get_connected_account_id()
if not connected_account_id:
- return [], "No connected account ID found"
+ return [], None, None, "No connected account ID found"
entity_id = await self.get_entity_id()
service = await self._get_service()
@@ -174,6 +176,7 @@ class ComposioConnector:
entity_id=entity_id,
query=query,
max_results=max_results,
+ page_token=page_token,
)
async def get_gmail_message_detail(
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 9ad03fba8..1578ad0d5 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -957,7 +957,7 @@ async def _update_connector_timestamp_by_id(session: AsyncSession, connector_id:
connector = result.scalars().first()
if connector:
- connector.last_indexed_at = datetime.now()
+ connector.last_indexed_at = datetime.now(UTC) # Use UTC for timezone consistency
await session.commit()
logger.info(f"Updated last_indexed_at for connector {connector_id}")
except Exception as e:
@@ -1097,18 +1097,22 @@ async def _run_indexing_with_notifications(
)
await update_timestamp_func(session, connector_id)
+ await session.commit() # Commit timestamp update
logger.info(
f"Indexing completed successfully: {documents_processed} documents processed"
)
# Update notification on success
if notification:
+ # Refresh notification to ensure it's not stale after timestamp update commit
+ await session.refresh(notification)
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
indexed_count=documents_processed,
error_message=None,
)
+ await session.commit() # Commit to ensure Electric SQL syncs the notification update
elif documents_processed > 0:
# Update notification to storing stage
if notification:
@@ -1124,24 +1128,30 @@ async def _run_indexing_with_notifications(
f"Indexing completed successfully: {documents_processed} documents processed"
)
if notification:
+ # Refresh notification to ensure it's not stale after indexing function commits
+ await session.refresh(notification)
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
indexed_count=documents_processed,
error_message=None,
)
+ await session.commit() # Commit to ensure Electric SQL syncs the notification update
else:
# No new documents processed - check if this is an error or just no changes
if error_or_warning:
# Actual failure
logger.error(f"Indexing failed: {error_or_warning}")
if notification:
+ # Refresh notification to ensure it's not stale after indexing function commits
+ await session.refresh(notification)
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
indexed_count=0,
error_message=error_or_warning,
)
+ await session.commit() # Commit to ensure Electric SQL syncs the notification update
else:
# Success - just no new documents to index (all skipped/unchanged)
logger.info(
@@ -1150,13 +1160,17 @@ async def _run_indexing_with_notifications(
# Still update timestamp so ElectricSQL syncs and clears "Syncing" UI
if update_timestamp_func:
await update_timestamp_func(session, connector_id)
+ await session.commit() # Commit timestamp update
if notification:
+ # Refresh notification to ensure it's not stale after timestamp update commit
+ await session.refresh(notification)
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
indexed_count=0,
error_message=None, # No error - sync succeeded
)
+ await session.commit() # Commit to ensure Electric SQL syncs the notification update
except Exception as e:
logger.error(f"Error in indexing task: {e!s}", exc_info=True)
diff --git a/surfsense_backend/app/services/composio_service.py b/surfsense_backend/app/services/composio_service.py
index 17fbd64e0..e32cbf8a0 100644
--- a/surfsense_backend/app/services/composio_service.py
+++ b/surfsense_backend/app/services/composio_service.py
@@ -256,7 +256,6 @@ class ComposioService:
"user_id": getattr(acc, "user_id", None),
})
- logger.info(f"DEBUG: Found {len(result)} TOTAL connections in Composio")
return result
except Exception as e:
logger.error(f"Failed to list all connections: {e!s}")
@@ -273,7 +272,6 @@ class ComposioService:
List of connected account details.
"""
try:
- logger.info(f"DEBUG: Calling connected_accounts.list(user_id='{user_id}')")
accounts_response = self.client.connected_accounts.list(user_id=user_id)
# Handle paginated response (may have .items attribute) or direct list
@@ -358,7 +356,6 @@ class ComposioService:
# - connected_account_id: for authentication
# - user_id: user identifier (SDK uses user_id, not entity_id)
# - dangerously_skip_version_check: skip version check for manual execution
- logger.info(f"DEBUG: Executing tool {tool_name} with params: {params}")
result = self.client.tools.execute(
slug=tool_name,
connected_account_id=connected_account_id,
@@ -366,8 +363,6 @@ class ComposioService:
arguments=params or {},
dangerously_skip_version_check=True,
)
- logger.info(f"DEBUG: Tool {tool_name} raw result type: {type(result)}")
- logger.info(f"DEBUG: Tool {tool_name} raw result: {result}")
return {"success": True, "data": result}
except Exception as e:
logger.error(f"Failed to execute tool {tool_name}: {e!s}")
@@ -417,7 +412,6 @@ class ComposioService:
return [], None, result.get("error", "Unknown error")
data = result.get("data", {})
- logger.info(f"DEBUG: Drive data type: {type(data)}, keys: {data.keys() if isinstance(data, dict) else 'N/A'}")
# Handle nested response structure from Composio
files = []
@@ -429,7 +423,6 @@ class ComposioService:
elif isinstance(data, list):
files = data
- logger.info(f"DEBUG: Extracted {len(files)} drive files")
return files, next_token, None
except Exception as e:
@@ -478,25 +471,30 @@ class ComposioService:
connected_account_id: str,
entity_id: str,
query: str = "",
- max_results: int = 100,
- ) -> tuple[list[dict[str, Any]], str | None]:
+ max_results: int = 50,
+ page_token: str | None = None,
+ ) -> tuple[list[dict[str, Any]], str | None, int | None, str | None]:
"""
- List Gmail messages via Composio.
+ List Gmail messages via Composio with pagination support.
Args:
connected_account_id: Composio connected account ID.
entity_id: The entity/user ID that owns the connected account.
query: Gmail search query.
- max_results: Maximum number of messages to return.
+ max_results: Maximum number of messages to return per page (default: 50 to avoid payload size issues).
+ page_token: Optional pagination token for next page.
Returns:
- Tuple of (messages list, error message).
+ Tuple of (messages list, next_page_token, result_size_estimate, error message).
"""
try:
- # Composio uses snake_case for parameters, max is 500
- params = {"max_results": min(max_results, 500)}
+ # Use smaller batch size to avoid 413 payload too large errors
+ # Composio uses snake_case for parameters
+ params = {"max_results": min(max_results, 50)} # Reduced from 500 to 50
if query:
params["query"] = query # Composio uses 'query' not 'q'
+ if page_token:
+ params["page_token"] = page_token
result = await self.execute_tool(
connected_account_id=connected_account_id,
@@ -506,25 +504,38 @@ class ComposioService:
)
if not result.get("success"):
- return [], result.get("error", "Unknown error")
+ return [], None, result.get("error", "Unknown error")
data = result.get("data", {})
- logger.info(f"DEBUG: Gmail data type: {type(data)}, keys: {data.keys() if isinstance(data, dict) else 'N/A'}")
- logger.info(f"DEBUG: Gmail full data: {data}")
# Try different possible response structures
messages = []
+ next_token = None
+ result_size_estimate = None
if isinstance(data, dict):
messages = data.get("messages", []) or data.get("data", {}).get("messages", []) or data.get("emails", [])
+ # Check for pagination token in various possible locations
+ next_token = (
+ data.get("nextPageToken")
+ or data.get("next_page_token")
+ or data.get("data", {}).get("nextPageToken")
+ or data.get("data", {}).get("next_page_token")
+ )
+ # Extract resultSizeEstimate if available (Gmail API provides this)
+ result_size_estimate = (
+ data.get("resultSizeEstimate")
+ or data.get("result_size_estimate")
+ or data.get("data", {}).get("resultSizeEstimate")
+ or data.get("data", {}).get("result_size_estimate")
+ )
elif isinstance(data, list):
messages = data
- logger.info(f"DEBUG: Extracted {len(messages)} messages")
- return messages, None
+ return messages, next_token, result_size_estimate, None
except Exception as e:
logger.error(f"Failed to list Gmail messages: {e!s}")
- return [], str(e)
+ return [], None, str(e)
async def get_gmail_message_detail(
self, connected_account_id: str, entity_id: str, message_id: str
@@ -603,8 +614,6 @@ class ComposioService:
return [], result.get("error", "Unknown error")
data = result.get("data", {})
- logger.info(f"DEBUG: Calendar data type: {type(data)}, keys: {data.keys() if isinstance(data, dict) else 'N/A'}")
- logger.info(f"DEBUG: Calendar full data: {data}")
# Try different possible response structures
events = []
@@ -613,7 +622,6 @@ class ComposioService:
elif isinstance(data, list):
events = data
- logger.info(f"DEBUG: Extracted {len(events)} calendar events")
return events, None
except Exception as e:
diff --git a/surfsense_backend/app/tasks/composio_indexer.py b/surfsense_backend/app/tasks/composio_indexer.py
index 8762561ee..c9cd74234 100644
--- a/surfsense_backend/app/tasks/composio_indexer.py
+++ b/surfsense_backend/app/tasks/composio_indexer.py
@@ -9,6 +9,7 @@ to avoid circular import issues with the connector_indexers package.
import logging
from datetime import UTC, datetime
+from typing import Any
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession
@@ -26,6 +27,7 @@ from app.db import (
from app.services.composio_service import INDEXABLE_TOOLKITS, TOOLKIT_TO_DOCUMENT_TYPE
from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService
+from app.tasks.connector_indexers.base import calculate_date_range
from app.utils.document_converters import (
create_document_chunks,
generate_content_hash,
@@ -75,7 +77,7 @@ async def update_connector_last_indexed(
) -> None:
"""Update the last_indexed_at timestamp for a connector."""
if update_last_indexed:
- connector.last_indexed_at = datetime.now()
+ connector.last_indexed_at = datetime.now(UTC) # Use UTC for timezone consistency
logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
@@ -287,6 +289,9 @@ async def _index_composio_google_drive(
await task_logger.log_task_success(
log_entry, success_msg, {"files_count": 0}
)
+ # CRITICAL: Update timestamp even when no files found so Electric SQL syncs and UI shows indexed status
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+ await session.commit()
return 0, None # Return None (not error) when no items found - this is success with 0 items
logger.info(f"Found {len(all_files)} Google Drive files to index via Composio")
@@ -380,6 +385,13 @@ async def _index_composio_google_drive(
existing_document.updated_at = get_current_timestamp()
documents_indexed += 1
+
+ # Batch commit every 10 documents
+ if documents_indexed % 10 == 0:
+ logger.info(
+ f"Committing batch: {documents_indexed} Google Drive files processed so far"
+ )
+ await session.commit()
continue
# Create new document
@@ -425,7 +437,11 @@ async def _index_composio_google_drive(
session.add(document)
documents_indexed += 1
+ # Batch commit every 10 documents
if documents_indexed % 10 == 0:
+ logger.info(
+ f"Committing batch: {documents_indexed} Google Drive files processed so far"
+ )
await session.commit()
except Exception as e:
@@ -433,10 +449,19 @@ async def _index_composio_google_drive(
documents_skipped += 1
continue
- if documents_indexed > 0:
- await update_connector_last_indexed(session, connector, update_last_indexed)
+ # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+ # This ensures the UI shows "Last indexed" instead of "Never indexed"
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+ # Final commit to ensure all documents are persisted (safety net)
+ # This matches the pattern used in non-Composio Gmail indexer
+ logger.info(
+ f"Final commit: Total {documents_indexed} Google Drive files processed"
+ )
await session.commit()
+ logger.info(
+ "Successfully committed all Composio Google Drive document changes to database"
+ )
await task_logger.log_task_success(
log_entry,
@@ -454,154 +479,89 @@ async def _index_composio_google_drive(
return 0, f"Failed to index Google Drive via Composio: {e!s}"
-async def _index_composio_gmail(
+async def _process_gmail_message_batch(
session: AsyncSession,
- connector,
+ messages: list[dict[str, Any]],
+ composio_connector: ComposioConnector,
connector_id: int,
search_space_id: int,
user_id: str,
- start_date: str | None,
- end_date: str | None,
- task_logger: TaskLoggingService,
- log_entry,
- update_last_indexed: bool = True,
- max_items: int = 1000,
-) -> tuple[int, str]:
- """Index Gmail messages via Composio."""
- try:
- composio_connector = ComposioConnector(session, connector_id)
+ total_documents_indexed: int = 0,
+) -> tuple[int, int]:
+ """
+ Process a batch of Gmail messages and index them.
+
+ Args:
+ total_documents_indexed: Running total of documents indexed so far (for batch commits).
+
+ Returns:
+ Tuple of (documents_indexed, documents_skipped)
+ """
+ documents_indexed = 0
+ documents_skipped = 0
- await task_logger.log_task_progress(
- log_entry,
- f"Fetching Gmail messages via Composio for connector {connector_id}",
- {"stage": "fetching_messages"},
- )
+ for message in messages:
+ try:
+ # Composio uses 'messageId' (camelCase), not 'id'
+ message_id = message.get("messageId", "") or message.get("id", "")
+ if not message_id:
+ documents_skipped += 1
+ continue
- # Build query with date range
- query_parts = []
- if start_date:
- query_parts.append(f"after:{start_date.replace('-', '/')}")
- if end_date:
- query_parts.append(f"before:{end_date.replace('-', '/')}")
- query = " ".join(query_parts)
+ # Composio's GMAIL_FETCH_EMAILS already returns full message content
+ # No need for a separate detail API call
- messages, error = await composio_connector.list_gmail_messages(
- query=query,
- max_results=max_items,
- )
+ # Extract message info from Composio response
+ # Composio structure: messageId, messageText, messageTimestamp, payload.headers, labelIds
+ payload = message.get("payload", {})
+ headers = payload.get("headers", [])
- if error:
- await task_logger.log_task_failure(
- log_entry, f"Failed to fetch Gmail messages: {error}", {}
+ subject = "No Subject"
+ sender = "Unknown Sender"
+ date_str = message.get("messageTimestamp", "Unknown Date")
+
+ for header in headers:
+ name = header.get("name", "").lower()
+ value = header.get("value", "")
+ if name == "subject":
+ subject = value
+ elif name == "from":
+ sender = value
+ elif name == "date":
+ date_str = value
+
+ # Format to markdown using the full message data
+ markdown_content = composio_connector.format_gmail_message_to_markdown(message)
+
+ # Check for empty content (defensive parsing per Composio best practices)
+ if not markdown_content.strip():
+ logger.warning(f"Skipping Gmail message with no content: {subject}")
+ documents_skipped += 1
+ continue
+
+ # Generate unique identifier
+ document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"])
+ unique_identifier_hash = generate_unique_identifier_hash(
+ document_type, f"gmail_{message_id}", search_space_id
)
- return 0, f"Failed to fetch Gmail messages: {error}"
- if not messages:
- success_msg = "No Gmail messages found in the specified date range"
- await task_logger.log_task_success(
- log_entry, success_msg, {"messages_count": 0}
+ content_hash = generate_content_hash(markdown_content, search_space_id)
+
+ existing_document = await check_document_by_unique_identifier(
+ session, unique_identifier_hash
)
- return 0, None # Return None (not error) when no items found - this is success with 0 items
- logger.info(f"Found {len(messages)} Gmail messages to index via Composio")
+ # Get label IDs from Composio response
+ label_ids = message.get("labelIds", [])
+ # Extract thread_id if available (for consistency with non-Composio implementation)
+ thread_id = message.get("threadId", "") or message.get("thread_id", "")
- documents_indexed = 0
- documents_skipped = 0
-
- for message in messages:
- try:
- # Composio uses 'messageId' (camelCase), not 'id'
- message_id = message.get("messageId", "") or message.get("id", "")
- if not message_id:
+ if existing_document:
+ if existing_document.content_hash == content_hash:
documents_skipped += 1
continue
- # Composio's GMAIL_FETCH_EMAILS already returns full message content
- # No need for a separate detail API call
-
- # Extract message info from Composio response
- # Composio structure: messageId, messageText, messageTimestamp, payload.headers, labelIds
- payload = message.get("payload", {})
- headers = payload.get("headers", [])
-
- subject = "No Subject"
- sender = "Unknown Sender"
- date_str = message.get("messageTimestamp", "Unknown Date")
-
- for header in headers:
- name = header.get("name", "").lower()
- value = header.get("value", "")
- if name == "subject":
- subject = value
- elif name == "from":
- sender = value
- elif name == "date":
- date_str = value
-
- # Format to markdown using the full message data
- markdown_content = composio_connector.format_gmail_message_to_markdown(message)
-
- # Generate unique identifier
- document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"])
- unique_identifier_hash = generate_unique_identifier_hash(
- document_type, f"gmail_{message_id}", search_space_id
- )
-
- content_hash = generate_content_hash(markdown_content, search_space_id)
-
- existing_document = await check_document_by_unique_identifier(
- session, unique_identifier_hash
- )
-
- # Get label IDs from Composio response
- label_ids = message.get("labelIds", [])
-
- if existing_document:
- if existing_document.content_hash == content_hash:
- documents_skipped += 1
- continue
-
- # Update existing
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
-
- if user_llm:
- document_metadata = {
- "message_id": message_id,
- "subject": subject,
- "sender": sender,
- "document_type": "Gmail Message (Composio)",
- }
- summary_content, summary_embedding = await generate_document_summary(
- markdown_content, user_llm, document_metadata
- )
- else:
- summary_content = f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
- summary_embedding = config.embedding_model_instance.embed(summary_content)
-
- chunks = await create_document_chunks(markdown_content)
-
- existing_document.title = f"Gmail: {subject}"
- existing_document.content = summary_content
- existing_document.content_hash = content_hash
- existing_document.embedding = summary_embedding
- existing_document.document_metadata = {
- "message_id": message_id,
- "subject": subject,
- "sender": sender,
- "date": date_str,
- "labels": label_ids,
- "connector_id": connector_id,
- "source": "composio",
- }
- existing_document.chunks = chunks
- existing_document.updated_at = get_current_timestamp()
-
- documents_indexed += 1
- continue
-
- # Create new document
+ # Update existing
user_llm = await get_user_long_context_llm(
session, user_id, search_space_id
)
@@ -609,6 +569,7 @@ async def _index_composio_gmail(
if user_llm:
document_metadata = {
"message_id": message_id,
+ "thread_id": thread_id,
"subject": subject,
"sender": sender,
"document_type": "Gmail Message (Composio)",
@@ -622,53 +583,276 @@ async def _index_composio_gmail(
chunks = await create_document_chunks(markdown_content)
- document = Document(
- search_space_id=search_space_id,
- title=f"Gmail: {subject}",
- document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]),
- document_metadata={
- "message_id": message_id,
- "subject": subject,
- "sender": sender,
- "date": date_str,
- "labels": label_ids,
- "connector_id": connector_id,
- "toolkit_id": "gmail",
- "source": "composio",
- },
- content=summary_content,
- content_hash=content_hash,
- unique_identifier_hash=unique_identifier_hash,
- embedding=summary_embedding,
- chunks=chunks,
- updated_at=get_current_timestamp(),
- )
- session.add(document)
+ existing_document.title = f"Gmail: {subject}"
+ existing_document.content = summary_content
+ existing_document.content_hash = content_hash
+ existing_document.embedding = summary_embedding
+ existing_document.document_metadata = {
+ "message_id": message_id,
+ "thread_id": thread_id,
+ "subject": subject,
+ "sender": sender,
+ "date": date_str,
+ "labels": label_ids,
+ "connector_id": connector_id,
+ "source": "composio",
+ }
+ existing_document.chunks = chunks
+ existing_document.updated_at = get_current_timestamp()
+
documents_indexed += 1
-
- if documents_indexed % 10 == 0:
+
+ # Batch commit every 10 documents
+ current_total = total_documents_indexed + documents_indexed
+ if current_total % 10 == 0:
+ logger.info(
+ f"Committing batch: {current_total} Gmail messages processed so far"
+ )
await session.commit()
-
- except Exception as e:
- logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
- documents_skipped += 1
continue
- if documents_indexed > 0:
- await update_connector_last_indexed(session, connector, update_last_indexed)
+ # Create new document
+ user_llm = await get_user_long_context_llm(
+ session, user_id, search_space_id
+ )
+ if user_llm:
+ document_metadata = {
+ "message_id": message_id,
+ "thread_id": thread_id,
+ "subject": subject,
+ "sender": sender,
+ "document_type": "Gmail Message (Composio)",
+ }
+ summary_content, summary_embedding = await generate_document_summary(
+ markdown_content, user_llm, document_metadata
+ )
+ else:
+ summary_content = f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
+ summary_embedding = config.embedding_model_instance.embed(summary_content)
+
+ chunks = await create_document_chunks(markdown_content)
+
+ document = Document(
+ search_space_id=search_space_id,
+ title=f"Gmail: {subject}",
+ document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]),
+ document_metadata={
+ "message_id": message_id,
+ "thread_id": thread_id,
+ "subject": subject,
+ "sender": sender,
+ "date": date_str,
+ "labels": label_ids,
+ "connector_id": connector_id,
+ "toolkit_id": "gmail",
+ "source": "composio",
+ },
+ content=summary_content,
+ content_hash=content_hash,
+ unique_identifier_hash=unique_identifier_hash,
+ embedding=summary_embedding,
+ chunks=chunks,
+ updated_at=get_current_timestamp(),
+ )
+ session.add(document)
+ documents_indexed += 1
+
+ # Batch commit every 10 documents
+ current_total = total_documents_indexed + documents_indexed
+ if current_total % 10 == 0:
+ logger.info(
+ f"Committing batch: {current_total} Gmail messages processed so far"
+ )
+ await session.commit()
+
+ except Exception as e:
+ logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
+ documents_skipped += 1
+ # Rollback on error to avoid partial state (per Composio best practices)
+ try:
+ await session.rollback()
+ except Exception as rollback_error:
+ logger.error(f"Error during rollback: {rollback_error!s}", exc_info=True)
+ continue
+
+ return documents_indexed, documents_skipped
+
+
+async def _index_composio_gmail(
+ session: AsyncSession,
+ connector,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ start_date: str | None,
+ end_date: str | None,
+ task_logger: TaskLoggingService,
+ log_entry,
+ update_last_indexed: bool = True,
+ max_items: int = 1000,
+) -> tuple[int, str]:
+ """Index Gmail messages via Composio with pagination and incremental processing."""
+ try:
+ composio_connector = ComposioConnector(session, connector_id)
+
+ # Normalize date values - handle "undefined" strings from frontend
+ if start_date == "undefined" or start_date == "":
+ start_date = None
+ if end_date == "undefined" or end_date == "":
+ end_date = None
+
+ # Calculate date range with defaults (uses last_indexed_at or 365 days back)
+ # This ensures indexing works even when user doesn't specify dates
+ start_date_str, end_date_str = calculate_date_range(
+ connector, start_date, end_date, default_days_back=365
+ )
+
+ # Build query with date range
+ query_parts = []
+ if start_date_str:
+ query_parts.append(f"after:{start_date_str.replace('-', '/')}")
+ if end_date_str:
+ query_parts.append(f"before:{end_date_str.replace('-', '/')}")
+ query = " ".join(query_parts) if query_parts else ""
+
+ logger.info(
+ f"Gmail query for connector {connector_id}: '{query}' "
+ f"(start_date={start_date_str}, end_date={end_date_str})"
+ )
+
+ # Use smaller batch size to avoid 413 payload too large errors
+ batch_size = 50
+ page_token = None
+ total_documents_indexed = 0
+ total_documents_skipped = 0
+ total_messages_fetched = 0
+ result_size_estimate = None # Will be set from first API response
+
+ while total_messages_fetched < max_items:
+ # Calculate how many messages to fetch in this batch
+ remaining = max_items - total_messages_fetched
+ current_batch_size = min(batch_size, remaining)
+
+ # Use result_size_estimate if available, otherwise fall back to max_items
+ estimated_total = result_size_estimate if result_size_estimate is not None else max_items
+ # Cap estimated_total at max_items to avoid showing misleading progress
+ estimated_total = min(estimated_total, max_items)
+
+ await task_logger.log_task_progress(
+ log_entry,
+ f"Fetching Gmail messages batch via Composio for connector {connector_id} "
+ f"({total_messages_fetched}/{estimated_total} fetched, {total_documents_indexed} indexed)",
+ {
+ "stage": "fetching_messages",
+ "batch_size": current_batch_size,
+ "total_fetched": total_messages_fetched,
+ "total_indexed": total_documents_indexed,
+ "estimated_total": estimated_total,
+ },
+ )
+
+ # Fetch batch of messages
+ messages, next_token, result_size_estimate_batch, error = await composio_connector.list_gmail_messages(
+ query=query,
+ max_results=current_batch_size,
+ page_token=page_token,
+ )
+
+ if error:
+ await task_logger.log_task_failure(
+ log_entry, f"Failed to fetch Gmail messages: {error}", {}
+ )
+ return 0, f"Failed to fetch Gmail messages: {error}"
+
+ if not messages:
+ # No more messages available
+ break
+
+ # Update result_size_estimate from first response (Gmail provides this estimate)
+ if result_size_estimate is None and result_size_estimate_batch is not None:
+ result_size_estimate = result_size_estimate_batch
+ logger.info(f"Gmail API estimated {result_size_estimate} total messages for query: '{query}'")
+
+ total_messages_fetched += len(messages)
+ # Recalculate estimated_total after potentially updating result_size_estimate
+ estimated_total = result_size_estimate if result_size_estimate is not None else max_items
+ estimated_total = min(estimated_total, max_items)
+
+ logger.info(
+ f"Fetched batch of {len(messages)} Gmail messages "
+ f"(total: {total_messages_fetched}/{estimated_total})"
+ )
+
+ # Process batch incrementally
+ batch_indexed, batch_skipped = await _process_gmail_message_batch(
+ session=session,
+ messages=messages,
+ composio_connector=composio_connector,
+ connector_id=connector_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ total_documents_indexed=total_documents_indexed,
+ )
+
+ total_documents_indexed += batch_indexed
+ total_documents_skipped += batch_skipped
+
+ logger.info(
+ f"Processed batch: {batch_indexed} indexed, {batch_skipped} skipped "
+ f"(total: {total_documents_indexed} indexed, {total_documents_skipped} skipped)"
+ )
+
+ # Batch commits happen in _process_gmail_message_batch every 10 documents
+ # This ensures progress is saved incrementally, preventing data loss on crashes
+
+ # Check if we should continue
+ if not next_token:
+ # No more pages available
+ break
+
+ if len(messages) < current_batch_size:
+ # Last page had fewer items than requested, we're done
+ break
+
+ # Continue with next page
+ page_token = next_token
+
+ if total_messages_fetched == 0:
+ success_msg = "No Gmail messages found in the specified date range"
+ await task_logger.log_task_success(
+ log_entry, success_msg, {"messages_count": 0}
+ )
+ # CRITICAL: Update timestamp even when no messages found so Electric SQL syncs and UI shows indexed status
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+ await session.commit()
+ return 0, None # Return None (not error) when no items found
+
+ # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+ # This ensures the UI shows "Last indexed" instead of "Never indexed"
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+
+ # Final commit to ensure all documents are persisted (safety net)
+ # This matches the pattern used in non-Composio Gmail indexer
+ logger.info(
+ f"Final commit: Total {total_documents_indexed} Gmail messages processed"
+ )
await session.commit()
+ logger.info(
+ "Successfully committed all Composio Gmail document changes to database"
+ )
await task_logger.log_task_success(
log_entry,
f"Successfully completed Gmail indexing via Composio for connector {connector_id}",
{
- "documents_indexed": documents_indexed,
- "documents_skipped": documents_skipped,
+ "documents_indexed": total_documents_indexed,
+ "documents_skipped": total_documents_skipped,
+ "messages_fetched": total_messages_fetched,
},
)
- return documents_indexed, None
+ return total_documents_indexed, None
except Exception as e:
logger.error(f"Failed to index Gmail via Composio: {e!s}", exc_info=True)
@@ -689,8 +873,6 @@ async def _index_composio_google_calendar(
max_items: int = 2500,
) -> tuple[int, str]:
"""Index Google Calendar events via Composio."""
- from datetime import datetime, timedelta
-
try:
composio_connector = ComposioConnector(session, connector_id)
@@ -700,18 +882,26 @@ async def _index_composio_google_calendar(
{"stage": "fetching_events"},
)
- # Build time range
- if start_date:
- time_min = f"{start_date}T00:00:00Z"
- else:
- # Default to 365 days ago
- default_start = datetime.now() - timedelta(days=365)
- time_min = default_start.strftime("%Y-%m-%dT00:00:00Z")
+ # Normalize date values - handle "undefined" strings from frontend
+ if start_date == "undefined" or start_date == "":
+ start_date = None
+ if end_date == "undefined" or end_date == "":
+ end_date = None
- if end_date:
- time_max = f"{end_date}T23:59:59Z"
- else:
- time_max = datetime.now().strftime("%Y-%m-%dT23:59:59Z")
+ # Calculate date range with defaults (uses last_indexed_at or 365 days back)
+ # This ensures indexing works even when user doesn't specify dates
+ start_date_str, end_date_str = calculate_date_range(
+ connector, start_date, end_date, default_days_back=365
+ )
+
+ # Build time range for API call
+ time_min = f"{start_date_str}T00:00:00Z"
+ time_max = f"{end_date_str}T23:59:59Z"
+
+ logger.info(
+ f"Google Calendar query for connector {connector_id}: "
+ f"(start_date={start_date_str}, end_date={end_date_str})"
+ )
events, error = await composio_connector.list_calendar_events(
time_min=time_min,
@@ -730,6 +920,9 @@ async def _index_composio_google_calendar(
await task_logger.log_task_success(
log_entry, success_msg, {"events_count": 0}
)
+ # CRITICAL: Update timestamp even when no events found so Electric SQL syncs and UI shows indexed status
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+ await session.commit()
return 0, None # Return None (not error) when no items found - this is success with 0 items
logger.info(f"Found {len(events)} Google Calendar events to index via Composio")
@@ -814,6 +1007,13 @@ async def _index_composio_google_calendar(
existing_document.updated_at = get_current_timestamp()
documents_indexed += 1
+
+ # Batch commit every 10 documents
+ if documents_indexed % 10 == 0:
+ logger.info(
+ f"Committing batch: {documents_indexed} Google Calendar events processed so far"
+ )
+ await session.commit()
continue
# Create new document
@@ -863,7 +1063,11 @@ async def _index_composio_google_calendar(
session.add(document)
documents_indexed += 1
+ # Batch commit every 10 documents
if documents_indexed % 10 == 0:
+ logger.info(
+ f"Committing batch: {documents_indexed} Google Calendar events processed so far"
+ )
await session.commit()
except Exception as e:
@@ -871,10 +1075,19 @@ async def _index_composio_google_calendar(
documents_skipped += 1
continue
- if documents_indexed > 0:
- await update_connector_last_indexed(session, connector, update_last_indexed)
+ # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+ # This ensures the UI shows "Last indexed" instead of "Never indexed"
+ await update_connector_last_indexed(session, connector, update_last_indexed)
+ # Final commit to ensure all documents are persisted (safety net)
+ # This matches the pattern used in non-Composio Gmail indexer
+ logger.info(
+ f"Final commit: Total {documents_indexed} Google Calendar events processed"
+ )
await session.commit()
+ logger.info(
+ "Successfully committed all Composio Google Calendar document changes to database"
+ )
await task_logger.log_task_success(
log_entry,
From e6a4ac7c9cd14c3bcae4bbeb91b7b58abd538b80 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 04:56:15 +0530
Subject: [PATCH 04/21] fix: change animation from spring to tween for sliding
---
.../components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx | 2 +-
.../components/layout/ui/sidebar/AllSharedChatsSidebar.tsx | 2 +-
surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx
index 39f1b95bc..c094ff44a 100644
--- a/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx
@@ -231,7 +231,7 @@ export function AllPrivateChatsSidebar({
initial={{ x: "-100%" }}
animate={{ x: 0 }}
exit={{ x: "-100%" }}
- transition={{ type: "spring", damping: 25, stiffness: 300 }}
+ transition={{ type: "tween", duration: 0.3, ease: "easeOut" }}
className="fixed inset-y-0 left-0 z-70 w-80 bg-background shadow-xl flex flex-col pointer-events-auto isolate"
role="dialog"
aria-modal="true"
diff --git a/surfsense_web/components/layout/ui/sidebar/AllSharedChatsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/AllSharedChatsSidebar.tsx
index 8dd593945..76dbf1aad 100644
--- a/surfsense_web/components/layout/ui/sidebar/AllSharedChatsSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/AllSharedChatsSidebar.tsx
@@ -231,7 +231,7 @@ export function AllSharedChatsSidebar({
initial={{ x: "-100%" }}
animate={{ x: 0 }}
exit={{ x: "-100%" }}
- transition={{ type: "spring", damping: 25, stiffness: 300 }}
+ transition={{ type: "tween", duration: 0.3, ease: "easeOut" }}
className="fixed inset-y-0 left-0 z-70 w-80 bg-background shadow-xl flex flex-col pointer-events-auto isolate"
role="dialog"
aria-modal="true"
diff --git a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
index 166d77eca..a3fd3ea14 100644
--- a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
@@ -446,7 +446,7 @@ export function InboxSidebar({
initial={{ x: "-100%" }}
animate={{ x: 0 }}
exit={{ x: "-100%" }}
- transition={{ type: "spring", damping: 25, stiffness: 300 }}
+ transition={{ type: "tween", duration: 0.3, ease: "easeOut" }}
className="fixed inset-y-0 left-0 z-70 w-90 bg-background shadow-xl flex flex-col pointer-events-auto isolate"
role="dialog"
aria-modal="true"
From 7ec7ed5c3b6dde85127e8809d7c07c47fe62fd87 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 05:17:28 +0530
Subject: [PATCH 05/21] feat: enhance Composio Google Drive integration with
folder and file selection
- Added a new endpoint to list folders and files in a user's Composio Google Drive, supporting hierarchical structure.
- Implemented UI components for selecting specific folders and files to index, improving user control over indexing options.
- Introduced indexing options for maximum files per folder and inclusion of subfolders, allowing for customizable indexing behavior.
- Enhanced error handling and logging for Composio Drive operations, ensuring better visibility into issues during file retrieval and indexing.
- Updated the Composio configuration component to reflect new selection capabilities and indexing options.
---
.../app/routes/composio_routes.py | 122 ++++++
.../routes/search_source_connectors_routes.py | 40 +-
.../app/services/composio_service.py | 6 +-
.../app/tasks/composio_indexer.py | 195 +++++++++-
.../components/composio-config.tsx | 294 +++++++++++++-
.../views/connector-edit-view.tsx | 7 +-
.../hooks/use-connector-dialog.ts | 8 +-
.../connectors/composio-drive-folder-tree.tsx | 365 ++++++++++++++++++
.../hooks/use-composio-drive-folders.ts | 29 ++
.../lib/apis/connectors-api.service.ts | 23 ++
surfsense_web/lib/query-client/cache-keys.ts | 4 +
11 files changed, 1069 insertions(+), 24 deletions(-)
create mode 100644 surfsense_web/components/connectors/composio-drive-folder-tree.tsx
create mode 100644 surfsense_web/hooks/use-composio-drive-folders.ts
diff --git a/surfsense_backend/app/routes/composio_routes.py b/surfsense_backend/app/routes/composio_routes.py
index 77891fc88..25e545dfb 100644
--- a/surfsense_backend/app/routes/composio_routes.py
+++ b/surfsense_backend/app/routes/composio_routes.py
@@ -8,6 +8,7 @@ Endpoints:
- GET /composio/toolkits - List available Composio toolkits
- GET /auth/composio/connector/add - Initiate OAuth for a specific toolkit
- GET /auth/composio/connector/callback - Handle OAuth callback
+- GET /connectors/{connector_id}/composio-drive/folders - List folders/files for Composio Google Drive
"""
import asyncio
@@ -369,3 +370,124 @@ async def composio_callback(
raise HTTPException(
status_code=500, detail=f"Failed to complete Composio OAuth: {e!s}"
) from e
+
+
+@router.get("/connectors/{connector_id}/composio-drive/folders")
+async def list_composio_drive_folders(
+ connector_id: int,
+ parent_id: str | None = None,
+ session: AsyncSession = Depends(get_async_session),
+ user: User = Depends(current_active_user),
+):
+ """
+ List folders AND files in user's Google Drive via Composio with hierarchical support.
+
+ This is called at index time from the manage connector page to display
+ the complete file system (folders and files). Only folders are selectable.
+
+ Args:
+ connector_id: ID of the Composio Google Drive connector
+ parent_id: Optional parent folder ID to list contents (None for root)
+
+ Returns:
+ JSON with list of items: {
+ "items": [
+ {"id": str, "name": str, "mimeType": str, "isFolder": bool, ...},
+ ...
+ ]
+ }
+ """
+ if not ComposioService.is_enabled():
+ raise HTTPException(
+ status_code=503,
+ detail="Composio integration is not enabled.",
+ )
+
+ try:
+ # Get connector and verify ownership
+ result = await session.execute(
+ select(SearchSourceConnector).filter(
+ SearchSourceConnector.id == connector_id,
+ SearchSourceConnector.user_id == user.id,
+ SearchSourceConnector.connector_type
+ == SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
+ )
+ )
+ connector = result.scalars().first()
+
+ if not connector:
+ raise HTTPException(
+ status_code=404,
+ detail="Composio Google Drive connector not found or access denied",
+ )
+
+ # Get Composio connected account ID from config
+ composio_connected_account_id = connector.config.get("composio_connected_account_id")
+ if not composio_connected_account_id:
+ raise HTTPException(
+ status_code=400,
+ detail="Composio connected account not found. Please reconnect the connector.",
+ )
+
+ # Initialize Composio service and fetch files
+ service = ComposioService()
+ entity_id = f"surfsense_{user.id}"
+
+ # Fetch files/folders from Composio Google Drive
+ files, next_token, error = await service.get_drive_files(
+ connected_account_id=composio_connected_account_id,
+ entity_id=entity_id,
+ folder_id=parent_id,
+ page_size=100,
+ )
+
+ if error:
+ logger.error(f"Failed to list Composio Drive files: {error}")
+ raise HTTPException(
+ status_code=500, detail=f"Failed to list folder contents: {error}"
+ )
+
+ # Transform files to match the expected format with isFolder field
+ items = []
+ for file_info in files:
+ file_id = file_info.get("id", "") or file_info.get("fileId", "")
+ file_name = file_info.get("name", "") or file_info.get("fileName", "") or "Untitled"
+ mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
+
+ if not file_id:
+ continue
+
+ is_folder = mime_type == "application/vnd.google-apps.folder"
+
+ items.append({
+ "id": file_id,
+ "name": file_name,
+ "mimeType": mime_type,
+ "isFolder": is_folder,
+ "parents": file_info.get("parents", []),
+ "size": file_info.get("size"),
+ "iconLink": file_info.get("iconLink"),
+ })
+
+ # Sort: folders first, then files, both alphabetically
+ folders = sorted([item for item in items if item["isFolder"]], key=lambda x: x["name"].lower())
+ files_list = sorted([item for item in items if not item["isFolder"]], key=lambda x: x["name"].lower())
+ items = folders + files_list
+
+ folder_count = len(folders)
+ file_count = len(files_list)
+
+ logger.info(
+ f"✅ Listed {len(items)} total items ({folder_count} folders, {file_count} files) for Composio connector {connector_id}"
+ + (f" in folder {parent_id}" if parent_id else " in ROOT")
+ )
+
+ return {"items": items}
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error listing Composio Drive contents: {e!s}", exc_info=True)
+ raise HTTPException(
+ status_code=500, detail=f"Failed to list Drive contents: {e!s}"
+ ) from e
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 1578ad0d5..89cdd9f95 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -897,8 +897,46 @@ async def index_connector_content(
)
response_message = "Web page indexing started in the background."
+ elif connector.connector_type == SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR:
+ from app.tasks.celery_tasks.connector_tasks import (
+ index_composio_connector_task,
+ )
+
+ # For Composio Google Drive, if drive_items is provided, update connector config
+ # This allows the UI to pass folder/file selection like the regular Google Drive connector
+ if drive_items and drive_items.has_items():
+ # Update connector config with the selected folders/files
+ config = connector.config or {}
+ config["selected_folders"] = [{"id": f.id, "name": f.name} for f in drive_items.folders]
+ config["selected_files"] = [{"id": f.id, "name": f.name} for f in drive_items.files]
+ if drive_items.indexing_options:
+ config["indexing_options"] = {
+ "max_files_per_folder": drive_items.indexing_options.max_files_per_folder,
+ "incremental_sync": drive_items.indexing_options.incremental_sync,
+ "include_subfolders": drive_items.indexing_options.include_subfolders,
+ }
+ connector.config = config
+ from sqlalchemy.orm.attributes import flag_modified
+ flag_modified(connector, "config")
+ await session.commit()
+ await session.refresh(connector)
+
+ logger.info(
+ f"Triggering Composio Google Drive indexing for connector {connector_id} into search space {search_space_id}, "
+ f"folders: {len(drive_items.folders)}, files: {len(drive_items.files)}"
+ )
+ else:
+ logger.info(
+ f"Triggering Composio Google Drive indexing for connector {connector_id} into search space {search_space_id} "
+ f"using existing config (from {indexing_from} to {indexing_to})"
+ )
+
+ index_composio_connector_task.delay(
+ connector_id, search_space_id, str(user.id), indexing_from, indexing_to
+ )
+ response_message = "Composio Google Drive indexing started in the background."
+
elif connector.connector_type in [
- SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
]:
diff --git a/surfsense_backend/app/services/composio_service.py b/surfsense_backend/app/services/composio_service.py
index e32cbf8a0..5a6148533 100644
--- a/surfsense_backend/app/services/composio_service.py
+++ b/surfsense_backend/app/services/composio_service.py
@@ -397,7 +397,11 @@ class ComposioService:
"page_size": min(page_size, 100),
}
if folder_id:
- params["folder_id"] = folder_id
+ # List contents of a specific folder (exclude shortcuts - we don't have access to them)
+ params["q"] = f"'{folder_id}' in parents and trashed = false and mimeType != 'application/vnd.google-apps.shortcut'"
+ else:
+ # List root-level items only (My Drive root), exclude shortcuts
+ params["q"] = "'root' in parents and trashed = false and mimeType != 'application/vnd.google-apps.shortcut'"
if page_token:
params["page_token"] = page_token
diff --git a/surfsense_backend/app/tasks/composio_indexer.py b/surfsense_backend/app/tasks/composio_indexer.py
index c9cd74234..f568d4134 100644
--- a/surfsense_backend/app/tasks/composio_indexer.py
+++ b/surfsense_backend/app/tasks/composio_indexer.py
@@ -252,37 +252,123 @@ async def _index_composio_google_drive(
update_last_indexed: bool = True,
max_items: int = 1000,
) -> tuple[int, str]:
- """Index Google Drive files via Composio."""
+ """Index Google Drive files via Composio.
+
+ Supports folder/file selection via connector config:
+ - selected_folders: List of {id, name} for folders to index
+ - selected_files: List of {id, name} for individual files to index
+ - indexing_options: {max_files_per_folder, incremental_sync, include_subfolders}
+ """
try:
composio_connector = ComposioConnector(session, connector_id)
+ connector_config = await composio_connector.get_config()
+
+ # Get folder/file selection configuration
+ selected_folders = connector_config.get("selected_folders", [])
+ selected_files = connector_config.get("selected_files", [])
+ indexing_options = connector_config.get("indexing_options", {})
+
+ max_files_per_folder = indexing_options.get("max_files_per_folder", 100)
+ include_subfolders = indexing_options.get("include_subfolders", True)
await task_logger.log_task_progress(
log_entry,
f"Fetching Google Drive files via Composio for connector {connector_id}",
- {"stage": "fetching_files"},
+ {"stage": "fetching_files", "selected_folders": len(selected_folders), "selected_files": len(selected_files)},
)
- # Fetch files
all_files = []
- page_token = None
- while len(all_files) < max_items:
- files, next_token, error = await composio_connector.list_drive_files(
- page_token=page_token,
- page_size=min(100, max_items - len(all_files)),
- )
+ # If specific folders/files are selected, fetch from those
+ if selected_folders or selected_files:
+ # Fetch files from selected folders
+ for folder in selected_folders:
+ folder_id = folder.get("id")
+ folder_name = folder.get("name", "Unknown")
+
+ if not folder_id:
+ continue
+
+ # Handle special case for "root" folder
+ actual_folder_id = None if folder_id == "root" else folder_id
+
+ logger.info(f"Fetching files from folder: {folder_name} ({folder_id})")
+
+ # Fetch files from this folder
+ folder_files = []
+ page_token = None
+
+ while len(folder_files) < max_files_per_folder:
+ files, next_token, error = await composio_connector.list_drive_files(
+ folder_id=actual_folder_id,
+ page_token=page_token,
+ page_size=min(100, max_files_per_folder - len(folder_files)),
+ )
- if error:
- await task_logger.log_task_failure(
- log_entry, f"Failed to fetch Drive files: {error}", {}
+ if error:
+ logger.warning(f"Failed to fetch files from folder {folder_name}: {error}")
+ break
+
+ # Process files
+ for file_info in files:
+ mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
+
+ # If it's a folder and include_subfolders is enabled, recursively fetch
+ if mime_type == "application/vnd.google-apps.folder":
+ if include_subfolders:
+ # Add subfolder files recursively
+ subfolder_files = await _fetch_folder_files_recursively(
+ composio_connector,
+ file_info.get("id"),
+ max_files=max_files_per_folder,
+ current_count=len(folder_files),
+ )
+ folder_files.extend(subfolder_files)
+ else:
+ folder_files.append(file_info)
+
+ if not next_token:
+ break
+ page_token = next_token
+
+ all_files.extend(folder_files[:max_files_per_folder])
+ logger.info(f"Found {len(folder_files)} files in folder {folder_name}")
+
+ # Add specifically selected files
+ for selected_file in selected_files:
+ file_id = selected_file.get("id")
+ file_name = selected_file.get("name", "Unknown")
+
+ if not file_id:
+ continue
+
+ # Add file info (we'll fetch content later during indexing)
+ all_files.append({
+ "id": file_id,
+ "name": file_name,
+ "mimeType": "", # Will be determined later
+ })
+ else:
+ # No selection specified - fetch all files (original behavior)
+ page_token = None
+
+ while len(all_files) < max_items:
+ files, next_token, error = await composio_connector.list_drive_files(
+ page_token=page_token,
+ page_size=min(100, max_items - len(all_files)),
)
- return 0, f"Failed to fetch Drive files: {error}"
- all_files.extend(files)
+ if error:
+ await task_logger.log_task_failure(
+ log_entry, f"Failed to fetch Drive files: {error}", {}
+ )
+ return 0, f"Failed to fetch Drive files: {error}"
- if not next_token:
- break
- page_token = next_token
+ all_files.extend(files)
+
+ if not next_token:
+ break
+ page_token = next_token
if not all_files:
success_msg = "No Google Drive files found"
@@ -479,6 +565,81 @@ async def _index_composio_google_drive(
return 0, f"Failed to index Google Drive via Composio: {e!s}"
+async def _fetch_folder_files_recursively(
+ composio_connector: ComposioConnector,
+ folder_id: str,
+ max_files: int = 100,
+ current_count: int = 0,
+ depth: int = 0,
+ max_depth: int = 10,
+) -> list[dict[str, Any]]:
+ """
+ Recursively fetch files from a Google Drive folder via Composio.
+
+ Args:
+ composio_connector: The Composio connector instance
+ folder_id: Google Drive folder ID
+ max_files: Maximum number of files to fetch
+ current_count: Current number of files already fetched
+ depth: Current recursion depth
+ max_depth: Maximum recursion depth to prevent infinite loops
+
+ Returns:
+ List of file info dictionaries
+ """
+ if depth >= max_depth:
+ logger.warning(f"Max recursion depth reached for folder {folder_id}")
+ return []
+
+ if current_count >= max_files:
+ return []
+
+ all_files = []
+ page_token = None
+
+ try:
+ while len(all_files) + current_count < max_files:
+ files, next_token, error = await composio_connector.list_drive_files(
+ folder_id=folder_id,
+ page_token=page_token,
+ page_size=min(100, max_files - len(all_files) - current_count),
+ )
+
+ if error:
+ logger.warning(f"Error fetching files from subfolder {folder_id}: {error}")
+ break
+
+ for file_info in files:
+ mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
+
+ if mime_type == "application/vnd.google-apps.folder":
+ # Recursively fetch from subfolders
+ subfolder_files = await _fetch_folder_files_recursively(
+ composio_connector,
+ file_info.get("id"),
+ max_files=max_files,
+ current_count=current_count + len(all_files),
+ depth=depth + 1,
+ max_depth=max_depth,
+ )
+ all_files.extend(subfolder_files)
+ else:
+ all_files.append(file_info)
+
+ if len(all_files) + current_count >= max_files:
+ break
+
+ if not next_token:
+ break
+ page_token = next_token
+
+ return all_files[:max_files - current_count]
+
+ except Exception as e:
+ logger.error(f"Error in recursive folder fetch: {e!s}")
+ return all_files
+
+
async def _process_gmail_message_batch(
session: AsyncSession,
messages: list[dict[str, Any]],
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
index a96f906fe..255d0cef4 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-config.tsx
@@ -1,7 +1,20 @@
"use client";
+import { File, FileSpreadsheet, FileText, FolderClosed, Image, Presentation } from "lucide-react";
import type { FC } from "react";
+import { useEffect, useState } from "react";
+import { ComposioDriveFolderTree } from "@/components/connectors/composio-drive-folder-tree";
import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Label } from "@/components/ui/label";
+import {
+ Select,
+ SelectContent,
+ SelectItem,
+ SelectTrigger,
+ SelectValue,
+} from "@/components/ui/select";
+import { Switch } from "@/components/ui/switch";
import type { SearchSourceConnector } from "@/contracts/types/connector.types";
import { cn } from "@/lib/utils";
@@ -11,11 +24,134 @@ interface ComposioConfigProps {
onNameChange?: (name: string) => void;
}
-export const ComposioConfig: FC = ({ connector }) => {
+interface SelectedFolder {
+ id: string;
+ name: string;
+}
+
+interface IndexingOptions {
+ max_files_per_folder: number;
+ incremental_sync: boolean;
+ include_subfolders: boolean;
+}
+
+const DEFAULT_INDEXING_OPTIONS: IndexingOptions = {
+ max_files_per_folder: 100,
+ incremental_sync: true,
+ include_subfolders: true,
+};
+
+// Helper to get appropriate icon for file type based on file name
+function getFileIconFromName(fileName: string, className: string = "size-3.5 shrink-0") {
+ const lowerName = fileName.toLowerCase();
+ // Spreadsheets
+ if (
+ lowerName.endsWith(".xlsx") ||
+ lowerName.endsWith(".xls") ||
+ lowerName.endsWith(".csv") ||
+ lowerName.includes("spreadsheet")
+ ) {
+ return ;
+ }
+ // Presentations
+ if (
+ lowerName.endsWith(".pptx") ||
+ lowerName.endsWith(".ppt") ||
+ lowerName.includes("presentation")
+ ) {
+ return ;
+ }
+ // Documents (word, text only - not PDF)
+ if (
+ lowerName.endsWith(".docx") ||
+ lowerName.endsWith(".doc") ||
+ lowerName.endsWith(".txt") ||
+ lowerName.includes("document") ||
+ lowerName.includes("word") ||
+ lowerName.includes("text")
+ ) {
+ return ;
+ }
+ // Images
+ if (
+ lowerName.endsWith(".png") ||
+ lowerName.endsWith(".jpg") ||
+ lowerName.endsWith(".jpeg") ||
+ lowerName.endsWith(".gif") ||
+ lowerName.endsWith(".webp") ||
+ lowerName.endsWith(".svg")
+ ) {
+ return ;
+ }
+ // Default (including PDF)
+ return ;
+}
+
+export const ComposioConfig: FC = ({ connector, onConfigChange }) => {
const toolkitId = connector.config?.toolkit_id as string;
const isIndexable = connector.config?.is_indexable as boolean;
const composioAccountId = connector.config?.composio_connected_account_id as string;
+ // Check if this is a Google Drive Composio connector
+ const isGoogleDrive = toolkitId === "googledrive";
+
+ // Initialize with existing selected folders and files from connector config
+ const existingFolders =
+ (connector.config?.selected_folders as SelectedFolder[] | undefined) || [];
+ const existingFiles = (connector.config?.selected_files as SelectedFolder[] | undefined) || [];
+ const existingIndexingOptions =
+ (connector.config?.indexing_options as IndexingOptions | undefined) || DEFAULT_INDEXING_OPTIONS;
+
+ const [selectedFolders, setSelectedFolders] = useState(existingFolders);
+ const [selectedFiles, setSelectedFiles] = useState(existingFiles);
+ const [showFolderSelector, setShowFolderSelector] = useState(false);
+ const [indexingOptions, setIndexingOptions] = useState(existingIndexingOptions);
+
+ // Update selected folders and files when connector config changes
+ useEffect(() => {
+ const folders = (connector.config?.selected_folders as SelectedFolder[] | undefined) || [];
+ const files = (connector.config?.selected_files as SelectedFolder[] | undefined) || [];
+ const options =
+ (connector.config?.indexing_options as IndexingOptions | undefined) ||
+ DEFAULT_INDEXING_OPTIONS;
+ setSelectedFolders(folders);
+ setSelectedFiles(files);
+ setIndexingOptions(options);
+ }, [connector.config]);
+
+ const updateConfig = (
+ folders: SelectedFolder[],
+ files: SelectedFolder[],
+ options: IndexingOptions
+ ) => {
+ if (onConfigChange) {
+ onConfigChange({
+ ...connector.config,
+ selected_folders: folders,
+ selected_files: files,
+ indexing_options: options,
+ });
+ }
+ };
+
+ const handleSelectFolders = (folders: SelectedFolder[]) => {
+ setSelectedFolders(folders);
+ updateConfig(folders, selectedFiles, indexingOptions);
+ };
+
+ const handleSelectFiles = (files: SelectedFolder[]) => {
+ setSelectedFiles(files);
+ updateConfig(selectedFolders, files, indexingOptions);
+ };
+
+ const handleIndexingOptionChange = (key: keyof IndexingOptions, value: number | boolean) => {
+ const newOptions = { ...indexingOptions, [key]: value };
+ setIndexingOptions(newOptions);
+ updateConfig(selectedFolders, selectedFiles, newOptions);
+ };
+
+ const totalSelected = selectedFolders.length + selectedFiles.length;
+
return (
Manage your connector settings and sync configuration
diff --git a/surfsense_web/components/assistant-ui/connector-popup/tabs/active-connectors-tab.tsx b/surfsense_web/components/assistant-ui/connector-popup/tabs/active-connectors-tab.tsx
index e45888bb1..2067ca9ad 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/tabs/active-connectors-tab.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/tabs/active-connectors-tab.tsx
@@ -15,6 +15,7 @@ import { connectorsApiService } from "@/lib/apis/connectors-api.service";
import { cn } from "@/lib/utils";
import { COMPOSIO_CONNECTORS, OAUTH_CONNECTORS } from "../constants/connector-constants";
import { getDocumentCountForConnector } from "../utils/connector-document-mapping";
+import { getConnectorDisplayName } from "./all-connectors-tab";
interface ActiveConnectorsTabProps {
searchQuery: string;
@@ -263,8 +264,8 @@ export const ActiveConnectorsTab: FC = ({
-
- {connector.name}
+
+ {getConnectorDisplayName(connector.name)}
{isIndexing ? (
From 08f16b43d72edff44bcd4621a43cad79a61ed103 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 20:36:00 +0530
Subject: [PATCH 14/21] feat: enhance Composio connector naming logic and
improve UI focus
- Updated the Composio connector naming logic to dynamically generate user-friendly names based on existing connectors.
- Introduced new utility functions for counting connectors and retrieving base names for specific connector types.
- Enhanced the UI components to improve accessibility and focus management, ensuring a better user experience when interacting with connector dialogs.
---
.../app/routes/composio_routes.py | 27 +++++++++++--------
.../app/utils/connector_naming.py | 3 +++
.../assistant-ui/connector-popup.tsx | 2 +-
surfsense_web/components/ui/dialog.tsx | 2 +-
4 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/surfsense_backend/app/routes/composio_routes.py b/surfsense_backend/app/routes/composio_routes.py
index 9e9b59f82..14ef9efcf 100644
--- a/surfsense_backend/app/routes/composio_routes.py
+++ b/surfsense_backend/app/routes/composio_routes.py
@@ -35,7 +35,10 @@ from app.services.composio_service import (
ComposioService,
)
from app.users import current_active_user
-from app.utils.connector_naming import generate_unique_connector_name
+from app.utils.connector_naming import (
+ count_connectors_of_type,
+ get_base_name_for_type,
+)
from app.utils.oauth_security import OAuthStateManager
# Note: We no longer use check_duplicate_connector for Composio connectors because
@@ -343,17 +346,19 @@ async def composio_callback(
)
try:
- # Generate a unique, user-friendly connector name
- # Pass just toolkit_name (without "(Composio)") to avoid redundancy
- base_name = await generate_unique_connector_name(
- session,
- connector_type,
- space_id,
- user_id,
- toolkit_name,
+ # Count existing connectors of this type to determine the number
+ count = await count_connectors_of_type(
+ session, connector_type, space_id, user_id
)
- # Append "(Composio)" suffix for identification
- connector_name = f"{base_name} (Composio)"
+
+ # Generate base name (e.g., "Gmail", "Google Drive")
+ base_name = get_base_name_for_type(connector_type)
+
+ # Format: "Gmail (Composio) 1", "Gmail (Composio) 2", etc.
+ if count == 0:
+ connector_name = f"{base_name} (Composio) 1"
+ else:
+ connector_name = f"{base_name} (Composio) {count + 1}"
db_connector = SearchSourceConnector(
name=connector_name,
diff --git a/surfsense_backend/app/utils/connector_naming.py b/surfsense_backend/app/utils/connector_naming.py
index a2b748a3a..7d3efc001 100644
--- a/surfsense_backend/app/utils/connector_naming.py
+++ b/surfsense_backend/app/utils/connector_naming.py
@@ -28,6 +28,9 @@ BASE_NAME_FOR_TYPE = {
SearchSourceConnectorType.CONFLUENCE_CONNECTOR: "Confluence",
SearchSourceConnectorType.AIRTABLE_CONNECTOR: "Airtable",
SearchSourceConnectorType.MCP_CONNECTOR: "Model Context Protocol (MCP)",
+ SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR: "Gmail",
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR: "Google Drive",
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR: "Google Calendar",
}
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index 1ec8fad73..e656c06d6 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -184,7 +184,7 @@ export const ConnectorIndicator: FC = () => {
)}
-
+ Manage Connectors
{/* YouTube Crawler View - shown when adding YouTube videos */}
{isYouTubeView && searchSpaceId ? (
diff --git a/surfsense_web/components/ui/dialog.tsx b/surfsense_web/components/ui/dialog.tsx
index d04d76520..f3fa856d3 100644
--- a/surfsense_web/components/ui/dialog.tsx
+++ b/surfsense_web/components/ui/dialog.tsx
@@ -38,7 +38,7 @@ const DialogContent = React.forwardRef<
Date: Fri, 23 Jan 2026 23:03:29 +0530
Subject: [PATCH 15/21] feat: enhance date handling and indexing logic across
connectors
- Added normalization for "undefined" strings to None in date parameters to prevent parsing errors.
- Improved date range validation to ensure start_date is strictly before end_date, adjusting end_date if necessary.
- Updated Google Calendar and Composio connector indexing logic to handle duplicate content more effectively, logging warnings for skipped events.
- Enhanced error handling during final commits to manage integrity errors gracefully.
- Refactored date handling in various connector indexers for consistency and reliability.
---
.../app/connectors/google_gmail_connector.py | 7 +++++
.../routes/search_source_connectors_routes.py | 26 ++++++++++++++-----
.../app/tasks/connector_indexers/base.py | 7 +++++
.../google_calendar_indexer.py | 19 ++++++++++++++
.../tasks/connector_indexers/luma_indexer.py | 7 +++++
.../assistant-ui/connector-popup.tsx | 8 +++++-
.../views/connector-edit-view.tsx | 3 +--
.../views/indexing-configuration-view.tsx | 3 +--
.../hooks/use-connector-dialog.ts | 16 ++++++++++--
9 files changed, 83 insertions(+), 13 deletions(-)
diff --git a/surfsense_backend/app/connectors/google_gmail_connector.py b/surfsense_backend/app/connectors/google_gmail_connector.py
index 8c0e4690e..c86a96413 100644
--- a/surfsense_backend/app/connectors/google_gmail_connector.py
+++ b/surfsense_backend/app/connectors/google_gmail_connector.py
@@ -285,6 +285,13 @@ class GoogleGmailConnector:
try:
from datetime import datetime, timedelta
+ # Normalize date values - handle "undefined" strings from frontend
+ # This prevents "time data 'undefined' does not match format" errors
+ if start_date == "undefined" or start_date == "":
+ start_date = None
+ if end_date == "undefined" or end_date == "":
+ end_date = None
+
# Build date query
query_parts = []
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 82f452c61..928327d9a 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -644,20 +644,30 @@ async def index_connector_content(
# Handle different connector types
response_message = ""
- today_str = datetime.now().strftime("%Y-%m-%d")
+ # Use UTC for consistency with last_indexed_at storage
+ today_str = datetime.now(UTC).strftime("%Y-%m-%d")
# Determine the actual date range to use
if start_date is None:
# Use last_indexed_at or default to 365 days ago
if connector.last_indexed_at:
- today = datetime.now().date()
- if connector.last_indexed_at.date() == today:
+ # Convert last_indexed_at to timezone-naive for comparison (like calculate_date_range does)
+ last_indexed_naive = (
+ connector.last_indexed_at.replace(tzinfo=None)
+ if connector.last_indexed_at.tzinfo
+ else connector.last_indexed_at
+ )
+ # Use UTC for "today" to match how last_indexed_at is stored
+ today_utc = datetime.now(UTC).replace(tzinfo=None).date()
+ last_indexed_date = last_indexed_naive.date()
+
+ if last_indexed_date == today_utc:
# If last indexed today, go back 1 day to ensure we don't miss anything
- indexing_from = (today - timedelta(days=1)).strftime("%Y-%m-%d")
+ indexing_from = (today_utc - timedelta(days=1)).strftime("%Y-%m-%d")
else:
- indexing_from = connector.last_indexed_at.strftime("%Y-%m-%d")
+ indexing_from = last_indexed_naive.strftime("%Y-%m-%d")
else:
- indexing_from = (datetime.now() - timedelta(days=365)).strftime(
+ indexing_from = (datetime.now(UTC).replace(tzinfo=None) - timedelta(days=365)).strftime(
"%Y-%m-%d"
)
else:
@@ -666,6 +676,7 @@ async def index_connector_content(
# For calendar connectors, default to today but allow future dates if explicitly provided
if connector.connector_type in [
SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
+ SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
SearchSourceConnectorType.LUMA_CONNECTOR,
]:
# Default to today if no end_date provided (users can manually select future dates)
@@ -977,6 +988,9 @@ async def index_connector_content(
index_composio_connector_task,
)
+ # For Composio Gmail and Calendar, use the same date calculation logic as normal connectors
+ # This ensures consistent behavior and uses last_indexed_at to reduce API calls
+ # (includes special case: if indexed today, go back 1 day to avoid missing data)
logger.info(
f"Triggering Composio connector indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
)
diff --git a/surfsense_backend/app/tasks/connector_indexers/base.py b/surfsense_backend/app/tasks/connector_indexers/base.py
index b9a99808e..b390937f0 100644
--- a/surfsense_backend/app/tasks/connector_indexers/base.py
+++ b/surfsense_backend/app/tasks/connector_indexers/base.py
@@ -112,6 +112,13 @@ def calculate_date_range(
Returns:
Tuple of (start_date_str, end_date_str)
"""
+ # Normalize "undefined" strings to None (from frontend)
+ # This prevents parsing errors and ensures consistent behavior across all indexers
+ if start_date == "undefined" or start_date == "":
+ start_date = None
+ if end_date == "undefined" or end_date == "":
+ end_date = None
+
if start_date is not None and end_date is not None:
return start_date, end_date
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
index 09bb8de4b..7787560fa 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@@ -4,6 +4,8 @@ Google Calendar connector indexer.
from datetime import datetime, timedelta
+import pytz
+from dateutil.parser import isoparse
from google.oauth2.credentials import Credentials
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession
@@ -205,6 +207,23 @@ async def index_google_calendar_events(
# Use provided dates (including future dates)
start_date_str = start_date
end_date_str = end_date
+
+ # If start_date and end_date are the same, adjust end_date to be one day later
+ # to ensure valid date range (start_date must be strictly before end_date)
+ if start_date_str == end_date_str:
+ # Parse the date and add one day to ensure valid range
+ dt = isoparse(end_date_str)
+ if dt.tzinfo is None:
+ dt = dt.replace(tzinfo=pytz.UTC)
+ else:
+ dt = dt.astimezone(pytz.UTC)
+ # Add one day to end_date to make it strictly after start_date
+ dt_end = dt + timedelta(days=1)
+ end_date_str = dt_end.strftime("%Y-%m-%d")
+ logger.info(
+ f"Adjusted end_date from {end_date} to {end_date_str} "
+ f"to ensure valid date range (start_date must be strictly before end_date)"
+ )
await task_logger.log_task_progress(
log_entry,
diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
index 91f81ac20..0d7a979be 100644
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@@ -116,6 +116,13 @@ async def index_luma_events(
luma_client = LumaConnector(api_key=api_key)
+ # Handle 'undefined' string from frontend (treat as None)
+ # This prevents "time data 'undefined' does not match format" errors
+ if start_date == "undefined" or start_date == "":
+ start_date = None
+ if end_date == "undefined" or end_date == "":
+ end_date = None
+
# Calculate date range
# For calendar connectors, allow future dates to index upcoming events
if start_date is None or end_date is None:
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index e656c06d6..68a548409 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -259,7 +259,13 @@ export const ConnectorIndicator: FC = () => {
editingConnector.connector_type !== "GOOGLE_DRIVE_CONNECTOR"
? () => {
startIndexing(editingConnector.id);
- handleQuickIndexConnector(editingConnector.id, editingConnector.connector_type, stopIndexing);
+ handleQuickIndexConnector(
+ editingConnector.id,
+ editingConnector.connector_type,
+ stopIndexing,
+ startDate,
+ endDate
+ );
}
: undefined
}
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
index 8951336c5..d12264fbd 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@@ -272,8 +272,7 @@ export const ConnectorEditView: FC = ({
Re-indexing runs in the background
- You can continue using SurfSense while we sync your data. Check the Active tab
- to see progress.
+ You can continue using SurfSense while we sync your data. Check inbox for updates.
- You can continue using SurfSense while we sync your data. Check the Active tab
- to see progress.
+ You can continue using SurfSense while we sync your data. Check inbox for updates.
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index 3e9e1d930..1bcbd4263 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -1400,9 +1400,15 @@ export const useConnectorDialog = () => {
[editingConnector, searchSpaceId, deleteConnector, router, cameFromMCPList]
);
- // Handle quick index (index without date picker, uses backend defaults)
+ // Handle quick index (index with selected date range, or backend defaults if none selected)
const handleQuickIndexConnector = useCallback(
- async (connectorId: number, connectorType?: string, stopIndexing?: (id: number) => void) => {
+ async (
+ connectorId: number,
+ connectorType?: string,
+ stopIndexing?: (id: number) => void,
+ startDate?: Date,
+ endDate?: Date
+ ) => {
if (!searchSpaceId) return;
// Track quick index clicked event
@@ -1411,10 +1417,16 @@ export const useConnectorDialog = () => {
}
try {
+ // Format dates if provided, otherwise pass undefined (backend will use defaults)
+ const startDateStr = startDate ? format(startDate, "yyyy-MM-dd") : undefined;
+ const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined;
+
await indexConnector({
connector_id: connectorId,
queryParams: {
search_space_id: searchSpaceId,
+ start_date: startDateStr,
+ end_date: endDateStr,
},
});
toast.success("Indexing started", {
From c48ba36fa47ccffb10f68a76231ab017321c5dbe Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 23 Jan 2026 23:36:14 +0530
Subject: [PATCH 16/21] feat: improve indexing logic and duplicate handling in
connectors
- Enhanced Google Calendar and Composio connector indexing to track and log duplicate content, preventing re-indexing of already processed events.
- Implemented robust error handling during final commits to manage integrity errors gracefully, ensuring successful indexing despite potential duplicates.
- Updated notification service to differentiate between actual errors and warnings for duplicate content, improving user feedback.
- Refactored date handling to ensure valid date ranges and adjusted end dates when necessary for better indexing accuracy.
---
.../composio_google_calendar_connector.py | 59 +++++++++++++--
.../routes/search_source_connectors_routes.py | 72 +++++++++++++++----
.../app/services/notification_service.py | 28 ++++++--
.../google_calendar_indexer.py | 49 ++++++++++++-
.../views/connector-edit-view.tsx | 14 ++--
.../hooks/use-connector-dialog.ts | 11 ++-
6 files changed, 198 insertions(+), 35 deletions(-)
diff --git a/surfsense_backend/app/connectors/composio_google_calendar_connector.py b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
index ab8bde53c..3ac235848 100644
--- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@@ -18,7 +18,10 @@ from app.db import Document, DocumentType
from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService
-from app.tasks.connector_indexers.base import calculate_date_range
+from app.tasks.connector_indexers.base import (
+ calculate_date_range,
+ check_duplicate_document_by_hash,
+)
from app.utils.document_converters import (
create_document_chunks,
generate_content_hash,
@@ -256,6 +259,7 @@ async def index_composio_google_calendar(
documents_indexed = 0
documents_skipped = 0
+ duplicate_content_count = 0 # Track events skipped due to duplicate content_hash
for event in events:
try:
@@ -349,7 +353,25 @@ async def index_composio_google_calendar(
logger.info(
f"Committing batch: {documents_indexed} Google Calendar events processed so far"
)
- await session.commit()
+ await session.commit( )
+ continue
+
+ # Document doesn't exist by unique_identifier_hash
+ # Check if a document with the same content_hash exists (from standard connector)
+ with session.no_autoflush:
+ duplicate_by_content = await check_duplicate_document_by_hash(
+ session, content_hash
+ )
+
+ if duplicate_by_content:
+ # A document with the same content already exists (likely from standard connector)
+ logger.info(
+ f"Event {summary} already indexed by another connector "
+ f"(existing document ID: {duplicate_by_content.id}, "
+ f"type: {duplicate_by_content.document_type}). Skipping to avoid duplicate content."
+ )
+ duplicate_content_count += 1
+ documents_skipped += 1
continue
# Create new document
@@ -429,10 +451,28 @@ async def index_composio_google_calendar(
logger.info(
f"Final commit: Total {documents_indexed} Google Calendar events processed"
)
- await session.commit()
- logger.info(
- "Successfully committed all Composio Google Calendar document changes to database"
- )
+ try:
+ await session.commit()
+ logger.info(
+ "Successfully committed all Composio Google Calendar document changes to database"
+ )
+ except Exception as e:
+ # Handle any remaining integrity errors gracefully (race conditions, etc.)
+ if "duplicate key value violates unique constraint" in str(e).lower() or "uniqueviolationerror" in str(e).lower():
+ logger.warning(
+ f"Duplicate content_hash detected during final commit. "
+ f"This may occur if the same event was indexed by multiple connectors. "
+ f"Rolling back and continuing. Error: {e!s}"
+ )
+ await session.rollback()
+ # Don't fail the entire task - some documents may have been successfully indexed
+ else:
+ raise
+
+ # Build warning message if duplicates were found
+ warning_message = None
+ if duplicate_content_count > 0:
+ warning_message = f"{duplicate_content_count} skipped (duplicate)"
await task_logger.log_task_success(
log_entry,
@@ -440,10 +480,15 @@ async def index_composio_google_calendar(
{
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
+ "duplicate_content_count": duplicate_content_count,
},
)
- return documents_indexed, None
+ logger.info(
+ f"Composio Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped "
+ f"({duplicate_content_count} due to duplicate content from other connectors)"
+ )
+ return documents_indexed, warning_message
except Exception as e:
logger.error(
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 928327d9a..3b98d7d7c 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -22,6 +22,8 @@ import logging
from datetime import UTC, datetime, timedelta
from typing import Any
+import pytz
+from dateutil.parser import isoparse
from fastapi import APIRouter, Body, Depends, HTTPException, Query
from pydantic import BaseModel, Field, ValidationError
from sqlalchemy.exc import IntegrityError
@@ -681,6 +683,22 @@ async def index_connector_content(
]:
# Default to today if no end_date provided (users can manually select future dates)
indexing_to = today_str if end_date is None else end_date
+
+ # If start_date and end_date are the same, adjust end_date to be one day later
+ # to ensure valid date range (start_date must be strictly before end_date)
+ if indexing_from == indexing_to:
+ dt = isoparse(indexing_to)
+ if dt.tzinfo is None:
+ dt = dt.replace(tzinfo=pytz.UTC)
+ else:
+ dt = dt.astimezone(pytz.UTC)
+ # Add one day to end_date to make it strictly after start_date
+ dt_end = dt + timedelta(days=1)
+ indexing_to = dt_end.strftime("%Y-%m-%d")
+ logger.info(
+ f"Adjusted end_date from {end_date} to {indexing_to} "
+ f"to ensure valid date range (start_date must be strictly before end_date)"
+ )
else:
# For non-calendar connectors, cap at today
indexing_to = end_date if end_date else today_str
@@ -1231,20 +1249,48 @@ async def _run_indexing_with_notifications(
else:
# No new documents processed - check if this is an error or just no changes
if error_or_warning:
- # Actual failure
- logger.error(f"Indexing failed: {error_or_warning}")
- if notification:
- # Refresh notification to ensure it's not stale after indexing function commits
- await session.refresh(notification)
- await NotificationService.connector_indexing.notify_indexing_completed(
- session=session,
- notification=notification,
- indexed_count=0,
- error_message=error_or_warning,
+ # Check if this is a duplicate warning (success case) or an actual error
+ # Handle both normal and Composio calendar connectors
+ error_or_warning_lower = str(error_or_warning).lower() if error_or_warning else ""
+ is_duplicate_warning = "skipped (duplicate)" in error_or_warning_lower
+
+ if is_duplicate_warning:
+ # Duplicate warnings are success cases - sync worked, just found duplicates
+ logger.info(
+ f"Indexing completed successfully: {error_or_warning}"
)
- await (
- session.commit()
- ) # Commit to ensure Electric SQL syncs the notification update
+ # Still update timestamp so ElectricSQL syncs and clears "Syncing" UI
+ if update_timestamp_func:
+ await update_timestamp_func(session, connector_id)
+ await session.commit() # Commit timestamp update
+ if notification:
+ # Refresh notification to ensure it's not stale after timestamp update commit
+ await session.refresh(notification)
+ await NotificationService.connector_indexing.notify_indexing_completed(
+ session=session,
+ notification=notification,
+ indexed_count=0,
+ error_message=error_or_warning, # Pass as warning, not error
+ is_warning=True, # Flag to indicate this is a warning, not an error
+ )
+ await (
+ session.commit()
+ ) # Commit to ensure Electric SQL syncs the notification update
+ else:
+ # Actual failure
+ logger.error(f"Indexing failed: {error_or_warning}")
+ if notification:
+ # Refresh notification to ensure it's not stale after indexing function commits
+ await session.refresh(notification)
+ await NotificationService.connector_indexing.notify_indexing_completed(
+ session=session,
+ notification=notification,
+ indexed_count=0,
+ error_message=error_or_warning,
+ )
+ await (
+ session.commit()
+ ) # Commit to ensure Electric SQL syncs the notification update
else:
# Success - just no new documents to index (all skipped/unchanged)
logger.info(
diff --git a/surfsense_backend/app/services/notification_service.py b/surfsense_backend/app/services/notification_service.py
index 836daeb9e..9fcf807e7 100644
--- a/surfsense_backend/app/services/notification_service.py
+++ b/surfsense_backend/app/services/notification_service.py
@@ -335,6 +335,7 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
notification: Notification,
indexed_count: int,
error_message: str | None = None,
+ is_warning: bool = False,
) -> Notification:
"""
Update notification when connector indexing completes.
@@ -343,7 +344,8 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
session: Database session
notification: Notification to update
indexed_count: Total number of items indexed
- error_message: Error message if indexing failed (optional)
+ error_message: Error message if indexing failed, or warning message (optional)
+ is_warning: If True, treat error_message as a warning (success case) rather than an error
Returns:
Updated notification
@@ -352,10 +354,26 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
"connector_name", "Connector"
)
+ # If there's an error message but items were indexed, treat it as a warning (partial success)
+ # If is_warning is True, treat it as success even with 0 items (e.g., duplicates found)
+ # Otherwise, treat it as a failure
if error_message:
- title = f"Failed: {connector_name}"
- message = f"Sync failed: {error_message}"
- status = "failed"
+ if indexed_count > 0:
+ # Partial success with warnings (e.g., duplicate content from other connectors)
+ title = f"Ready: {connector_name}"
+ item_text = "item" if indexed_count == 1 else "items"
+ message = f"Now searchable! {indexed_count} {item_text} synced. Note: {error_message}"
+ status = "completed"
+ elif is_warning:
+ # Warning case (e.g., duplicates found) - treat as success
+ title = f"Ready: {connector_name}"
+ message = f"Sync completed. {error_message}"
+ status = "completed"
+ else:
+ # Complete failure
+ title = f"Failed: {connector_name}"
+ message = f"Sync failed: {error_message}"
+ status = "failed"
else:
title = f"Ready: {connector_name}"
if indexed_count == 0:
@@ -367,7 +385,7 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
metadata_updates = {
"indexed_count": indexed_count,
- "sync_stage": "completed" if not error_message else "failed",
+ "sync_stage": "completed" if (not error_message or is_warning or indexed_count > 0) else "failed",
"error_message": error_message,
}
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
index 7787560fa..5bc805549 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@@ -23,6 +23,7 @@ from app.utils.document_converters import (
from .base import (
check_document_by_unique_identifier,
+ check_duplicate_document_by_hash,
get_connector_by_id,
get_current_timestamp,
logger,
@@ -289,6 +290,7 @@ async def index_google_calendar_events(
documents_indexed = 0
documents_skipped = 0
skipped_events = []
+ duplicate_content_count = 0 # Track events skipped due to duplicate content_hash
for event in events:
try:
@@ -409,6 +411,27 @@ async def index_google_calendar_events(
)
continue
+ # Document doesn't exist by unique_identifier_hash
+ # Check if a document with the same content_hash exists (from another connector)
+ with session.no_autoflush:
+ duplicate_by_content = await check_duplicate_document_by_hash(
+ session, content_hash
+ )
+
+ if duplicate_by_content:
+ # A document with the same content already exists (likely from Composio connector)
+ logger.info(
+ f"Event {event_summary} already indexed by another connector "
+ f"(existing document ID: {duplicate_by_content.id}, "
+ f"type: {duplicate_by_content.document_type}). Skipping to avoid duplicate content."
+ )
+ duplicate_content_count += 1
+ documents_skipped += 1
+ skipped_events.append(
+ f"{event_summary} (already indexed by another connector)"
+ )
+ continue
+
# Document doesn't exist - create new one
# Generate summary with metadata
user_llm = await get_user_long_context_llm(
@@ -501,7 +524,25 @@ async def index_google_calendar_events(
logger.info(
f"Final commit: Total {documents_indexed} Google Calendar events processed"
)
- await session.commit()
+ try:
+ await session.commit()
+ except Exception as e:
+ # Handle any remaining integrity errors gracefully (race conditions, etc.)
+ if "duplicate key value violates unique constraint" in str(e).lower() or "uniqueviolationerror" in str(e).lower():
+ logger.warning(
+ f"Duplicate content_hash detected during final commit. "
+ f"This may occur if the same event was indexed by multiple connectors. "
+ f"Rolling back and continuing. Error: {e!s}"
+ )
+ await session.rollback()
+ # Don't fail the entire task - some documents may have been successfully indexed
+ else:
+ raise
+
+ # Build warning message if duplicates were found
+ warning_message = None
+ if duplicate_content_count > 0:
+ warning_message = f"{duplicate_content_count} skipped (duplicate)"
await task_logger.log_task_success(
log_entry,
@@ -510,14 +551,16 @@ async def index_google_calendar_events(
"events_processed": total_processed,
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
+ "duplicate_content_count": duplicate_content_count,
"skipped_events_count": len(skipped_events),
},
)
logger.info(
- f"Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped"
+ f"Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped "
+ f"({duplicate_content_count} due to duplicate content from other connectors)"
)
- return total_processed, None
+ return total_processed, warning_message
except SQLAlchemyError as db_error:
await session.rollback()
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
index d12264fbd..8f58db542 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@@ -100,10 +100,14 @@ export const ConnectorEditView: FC = ({
// Reset local quick indexing state when indexing completes or fails
useEffect(() => {
- if (!isIndexing) {
- setIsQuickIndexing(false);
+ if (!isIndexing && isQuickIndexing) {
+ // Small delay to ensure smooth transition
+ const timer = setTimeout(() => {
+ setIsQuickIndexing(false);
+ }, 100);
+ return () => clearTimeout(timer);
}
- }, [isIndexing]);
+ }, [isIndexing, isQuickIndexing]);
const handleDisconnectClick = () => {
setShowDisconnectConfirm(true);
@@ -119,11 +123,11 @@ export const ConnectorEditView: FC = ({
};
const handleQuickIndex = useCallback(() => {
- if (onQuickIndex) {
+ if (onQuickIndex && !isQuickIndexing && !isIndexing) {
setIsQuickIndexing(true);
onQuickIndex();
}
- }, [onQuickIndex]);
+ }, [onQuickIndex, isQuickIndexing, isIndexing]);
return (
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index 1bcbd4263..9a7f15b0c 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -1409,7 +1409,12 @@ export const useConnectorDialog = () => {
startDate?: Date,
endDate?: Date
) => {
- if (!searchSpaceId) return;
+ if (!searchSpaceId) {
+ if (stopIndexing) {
+ stopIndexing(connectorId);
+ }
+ return;
+ }
// Track quick index clicked event
if (connectorType) {
@@ -1437,6 +1442,8 @@ export const useConnectorDialog = () => {
queryClient.invalidateQueries({
queryKey: cacheKeys.logs.summary(Number(searchSpaceId)),
});
+ // Note: Don't call stopIndexing here - let useIndexingConnectors hook
+ // detect when last_indexed_at changes via Electric SQL
} catch (error) {
console.error("Error indexing connector content:", error);
toast.error(error instanceof Error ? error.message : "Failed to start indexing");
@@ -1446,7 +1453,7 @@ export const useConnectorDialog = () => {
}
}
},
- [searchSpaceId, indexConnector]
+ [searchSpaceId, indexConnector, queryClient]
);
// Handle going back from edit view
From 6d14b49d3f4fb39994be6ba96bc93af3f1031831 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 01:20:51 +0530
Subject: [PATCH 17/21] feat: enhance indexing state management and inbox count
formatting
- Improved indexing state management by refining the logic for handling notifications, ensuring accurate updates for in-progress, completed, and failed states.
- Introduced a new utility function to format inbox counts, displaying numbers up to 999 and using "k+" for larger counts, enhancing user interface clarity.
- Updated sidebar components to utilize the new inbox count formatting, improving the overall user experience.
---
.../hooks/use-indexing-connectors.ts | 87 ++++++++++---------
.../layout/providers/LayoutDataProvider.tsx | 13 ++-
.../layout/ui/sidebar/InboxSidebar.tsx | 15 +++-
.../layout/ui/sidebar/NavSection.tsx | 4 +-
4 files changed, 75 insertions(+), 44 deletions(-)
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts
index e82a8eb29..289da475d 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts
@@ -10,8 +10,9 @@ import { isConnectorIndexingMetadata } from "@/contracts/types/inbox.types";
*
* This provides a better UX than polling by:
* 1. Setting indexing state immediately when user triggers indexing (optimistic)
- * 2. Clearing indexing state when Electric SQL detects last_indexed_at changed
- * 3. Clearing indexing state when a failed notification is detected
+ * 2. Detecting in_progress notifications from Electric SQL to restore state after remounts
+ * 3. Clearing indexing state when notifications become completed or failed
+ * 4. Clearing indexing state when Electric SQL detects last_indexed_at changed
*
* The actual `last_indexed_at` value comes from Electric SQL/PGlite, not local state.
*/
@@ -28,65 +29,73 @@ export function useIndexingConnectors(
// Detect when last_indexed_at changes (indexing completed) via Electric SQL
useEffect(() => {
const previousValues = previousLastIndexedAtRef.current;
- const newIndexingIds = new Set(indexingConnectorIds);
- let hasChanges = false;
for (const connector of connectors) {
const previousValue = previousValues.get(connector.id);
const currentValue = connector.last_indexed_at;
- // If last_indexed_at changed and connector was in indexing state, clear it
+ // If last_indexed_at changed, clear it from indexing state
if (
previousValue !== undefined && // We've seen this connector before
- previousValue !== currentValue && // Value changed
- indexingConnectorIds.has(connector.id) // It was marked as indexing
+ previousValue !== currentValue // Value changed
) {
- newIndexingIds.delete(connector.id);
- hasChanges = true;
+ // Use functional update to access current state
+ setIndexingConnectorIds((prev) => {
+ if (prev.has(connector.id)) {
+ const next = new Set(prev);
+ next.delete(connector.id);
+ return next;
+ }
+ return prev;
+ });
}
// Update previous value tracking
previousValues.set(connector.id, currentValue);
}
+ }, [connectors]);
- if (hasChanges) {
- setIndexingConnectorIds(newIndexingIds);
- }
- }, [connectors, indexingConnectorIds]);
-
- // Detect failed notifications and stop indexing state
+ // Detect notification status changes and update indexing state accordingly
+ // This restores spinner state after component remounts and handles all status transitions
useEffect(() => {
if (!inboxItems || inboxItems.length === 0) return;
- const newIndexingIds = new Set(indexingConnectorIds);
- let hasChanges = false;
+ setIndexingConnectorIds((prev) => {
+ const newIndexingIds = new Set(prev);
+ let hasChanges = false;
- for (const item of inboxItems) {
- // Only check connector_indexing notifications
- if (item.type !== "connector_indexing") continue;
+ for (const item of inboxItems) {
+ // Only check connector_indexing notifications
+ if (item.type !== "connector_indexing") continue;
- // Check if this notification indicates a failure
- const metadata = isConnectorIndexingMetadata(item.metadata)
- ? item.metadata
- : null;
- if (!metadata) continue;
+ const metadata = isConnectorIndexingMetadata(item.metadata)
+ ? item.metadata
+ : null;
+ if (!metadata) continue;
- // Check if status is "failed" or if there's an error_message
- const isFailed =
- metadata.status === "failed" ||
- (metadata.error_message && metadata.error_message.trim().length > 0);
-
- // If failed and connector is in indexing state, clear it
- if (isFailed && indexingConnectorIds.has(metadata.connector_id)) {
- newIndexingIds.delete(metadata.connector_id);
- hasChanges = true;
+ // If status is "in_progress", add connector to indexing set
+ if (metadata.status === "in_progress") {
+ if (!newIndexingIds.has(metadata.connector_id)) {
+ newIndexingIds.add(metadata.connector_id);
+ hasChanges = true;
+ }
+ }
+ // If status is "completed" or "failed", remove connector from indexing set
+ else if (
+ metadata.status === "completed" ||
+ metadata.status === "failed" ||
+ (metadata.error_message && metadata.error_message.trim().length > 0)
+ ) {
+ if (newIndexingIds.has(metadata.connector_id)) {
+ newIndexingIds.delete(metadata.connector_id);
+ hasChanges = true;
+ }
+ }
}
- }
- if (hasChanges) {
- setIndexingConnectorIds(newIndexingIds);
- }
- }, [inboxItems, indexingConnectorIds]);
+ return hasChanges ? newIndexingIds : prev;
+ });
+ }, [inboxItems]);
// Add a connector to the indexing set (called when indexing starts)
const startIndexing = useCallback((connectorId: number) => {
diff --git a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
index 52dc7196a..9e3f55c97 100644
--- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
+++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
@@ -38,6 +38,17 @@ interface LayoutDataProviderProps {
breadcrumb?: React.ReactNode;
}
+/**
+ * Format count for display: shows numbers up to 999, then "1k+", "2k+", etc.
+ */
+function formatInboxCount(count: number): string {
+ if (count <= 999) {
+ return count.toString();
+ }
+ const thousands = Math.floor(count / 1000);
+ return `${thousands}k+`;
+}
+
export function LayoutDataProvider({
searchSpaceId,
children,
@@ -172,7 +183,7 @@ export function LayoutDataProvider({
url: "#inbox", // Special URL to indicate this is handled differently
icon: Inbox,
isActive: isInboxSidebarOpen,
- badge: unreadCount > 0 ? (unreadCount > 99 ? "99+" : unreadCount) : undefined,
+ badge: unreadCount > 0 ? formatInboxCount(unreadCount) : undefined,
},
],
[searchSpaceId, pathname, isInboxSidebarOpen, unreadCount]
diff --git a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
index bb06d6a56..e80c6e62d 100644
--- a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx
@@ -70,6 +70,17 @@ function getInitials(name: string | null | undefined, email: string | null | und
return "U";
}
+/**
+ * Format count for display: shows numbers up to 999, then "1k+", "2k+", etc.
+ */
+function formatInboxCount(count: number): string {
+ if (count <= 999) {
+ return count.toString();
+ }
+ const thousands = Math.floor(count / 1000);
+ return `${thousands}k+`;
+}
+
/**
* Get display name for connector type
*/
@@ -732,7 +743,7 @@ export function InboxSidebar({
{t("mentions") || "Mentions"}
- {unreadMentionsCount}
+ {formatInboxCount(unreadMentionsCount)}
@@ -744,7 +755,7 @@ export function InboxSidebar({
{t("status") || "Status"}
- {unreadStatusCount}
+ {formatInboxCount(unreadStatusCount)}
diff --git a/surfsense_web/components/layout/ui/sidebar/NavSection.tsx b/surfsense_web/components/layout/ui/sidebar/NavSection.tsx
index d2d926de8..742a27bbc 100644
--- a/surfsense_web/components/layout/ui/sidebar/NavSection.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/NavSection.tsx
@@ -39,7 +39,7 @@ export function NavSection({ items, onItemClick, isCollapsed = false }: NavSecti
>
{item.badge && (
-
+
{item.badge}
)}
@@ -70,7 +70,7 @@ export function NavSection({ items, onItemClick, isCollapsed = false }: NavSecti
{item.title}
{item.badge && (
-
+
{item.badge}
)}
From f4b1192a063e71437bb24340342fcee2a69f6a1f Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 03:51:57 +0530
Subject: [PATCH 18/21] feat: refine indexing success case handling and
notification messaging
- Enhanced the logic for determining success cases during indexing by distinguishing between duplicate warnings and empty results.
- Updated notification messages to provide clearer feedback for empty results, improving user understanding of indexing outcomes.
- Ensured that notifications reflect accurate statuses, maintaining consistency in user feedback during the indexing process.
---
.../app/routes/search_source_connectors_routes.py | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 3b98d7d7c..487a689dc 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -1249,13 +1249,15 @@ async def _run_indexing_with_notifications(
else:
# No new documents processed - check if this is an error or just no changes
if error_or_warning:
- # Check if this is a duplicate warning (success case) or an actual error
+ # Check if this is a duplicate warning or empty result (success cases) or an actual error
# Handle both normal and Composio calendar connectors
error_or_warning_lower = str(error_or_warning).lower() if error_or_warning else ""
is_duplicate_warning = "skipped (duplicate)" in error_or_warning_lower
+ # "No X found" messages are success cases - sync worked, just found nothing in date range
+ is_empty_result = ("no " in error_or_warning_lower and "found" in error_or_warning_lower)
- if is_duplicate_warning:
- # Duplicate warnings are success cases - sync worked, just found duplicates
+ if is_duplicate_warning or is_empty_result:
+ # These are success cases - sync worked, just found nothing new
logger.info(
f"Indexing completed successfully: {error_or_warning}"
)
@@ -1266,11 +1268,13 @@ async def _run_indexing_with_notifications(
if notification:
# Refresh notification to ensure it's not stale after timestamp update commit
await session.refresh(notification)
+ # For empty results, use a cleaner message
+ notification_message = "No new items found in date range" if is_empty_result else error_or_warning
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
indexed_count=0,
- error_message=error_or_warning, # Pass as warning, not error
+ error_message=notification_message, # Pass as warning, not error
is_warning=True, # Flag to indicate this is a warning, not an error
)
await (
From 5cf6fb15ed9c0f875c584ac4af216d279ae9eb36 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 03:59:17 +0530
Subject: [PATCH 19/21] fix: improve error logging for indexing tasks across
multiple connectors
- Updated error handling in the indexing functions for BookStack, Confluence, Google Calendar, Jira, Linear, and Luma connectors to log specific error messages when failures occur.
- Enhanced logging for cases where no pages or events are found, providing clearer informational messages instead of treating them as critical errors.
- Ensured consistent error reporting across all connector indexers, improving debugging and user feedback during indexing operations.
---
.../app/tasks/connector_indexers/bookstack_indexer.py | 4 ++--
.../app/tasks/connector_indexers/confluence_indexer.py | 4 ++--
.../app/tasks/connector_indexers/google_calendar_indexer.py | 4 ++--
.../app/tasks/connector_indexers/jira_indexer.py | 4 ++--
.../app/tasks/connector_indexers/linear_indexer.py | 4 ++--
.../app/tasks/connector_indexers/luma_indexer.py | 4 ++--
6 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
index 2793f78db..a1067255d 100644
--- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
@@ -136,10 +136,9 @@ async def index_bookstack_pages(
)
if error:
- logger.error(f"Failed to get BookStack pages: {error}")
-
# Don't treat "No pages found" as an error that should stop indexing
if "No pages found" in error:
+ logger.info(f"No BookStack pages found: {error}")
logger.info(
"No pages found is not a critical error, continuing with update"
)
@@ -159,6 +158,7 @@ async def index_bookstack_pages(
)
return 0, None
else:
+ logger.error(f"Failed to get BookStack pages: {error}")
await task_logger.log_task_failure(
log_entry,
f"Failed to get BookStack pages: {error}",
diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
index 7289b0ccd..ddbefafb9 100644
--- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
@@ -120,10 +120,9 @@ async def index_confluence_pages(
)
if error:
- logger.error(f"Failed to get Confluence pages: {error}")
-
# Don't treat "No pages found" as an error that should stop indexing
if "No pages found" in error:
+ logger.info(f"No Confluence pages found: {error}")
logger.info(
"No pages found is not a critical error, continuing with update"
)
@@ -147,6 +146,7 @@ async def index_confluence_pages(
await confluence_client.close()
return 0, None
else:
+ logger.error(f"Failed to get Confluence pages: {error}")
await task_logger.log_task_failure(
log_entry,
f"Failed to get Confluence pages: {error}",
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
index 5bc805549..ef1f821d2 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@@ -243,10 +243,9 @@ async def index_google_calendar_events(
)
if error:
- logger.error(f"Failed to get Google Calendar events: {error}")
-
# Don't treat "No events found" as an error that should stop indexing
if "No events found" in error:
+ logger.info(f"No Google Calendar events found: {error}")
logger.info(
"No events found is not a critical error, continuing with update"
)
@@ -266,6 +265,7 @@ async def index_google_calendar_events(
)
return 0, None
else:
+ logger.error(f"Failed to get Google Calendar events: {error}")
# Check if this is an authentication error that requires re-authentication
error_message = error
error_type = "APIError"
diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
index fdbeb93b0..4851a6466 100644
--- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
@@ -126,10 +126,9 @@ async def index_jira_issues(
)
if error:
- logger.error(f"Failed to get Jira issues: {error}")
-
# Don't treat "No issues found" as an error that should stop indexing
if "No issues found" in error:
+ logger.info(f"No Jira issues found: {error}")
logger.info(
"No issues found is not a critical error, continuing with update"
)
@@ -149,6 +148,7 @@ async def index_jira_issues(
)
return 0, None
else:
+ logger.error(f"Failed to get Jira issues: {error}")
await task_logger.log_task_failure(
log_entry,
f"Failed to get Jira issues: {error}",
diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
index f1bfd42e8..7d8e0c30e 100644
--- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
@@ -145,10 +145,9 @@ async def index_linear_issues(
)
if error:
- logger.error(f"Failed to get Linear issues: {error}")
-
# Don't treat "No issues found" as an error that should stop indexing
if "No issues found" in error:
+ logger.info(f"No Linear issues found: {error}")
logger.info(
"No issues found is not a critical error, continuing with update"
)
@@ -162,6 +161,7 @@ async def index_linear_issues(
)
return 0, None
else:
+ logger.error(f"Failed to get Linear issues: {error}")
return 0, f"Failed to get Linear issues: {error}"
logger.info(f"Retrieved {len(issues)} issues from Linear API")
diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
index 0d7a979be..ead259a44 100644
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@@ -179,10 +179,9 @@ async def index_luma_events(
)
if error:
- logger.error(f"Failed to get Luma events: {error}")
-
# Don't treat "No events found" as an error that should stop indexing
if "No events found" in error or "no events" in error.lower():
+ logger.info(f"No Luma events found: {error}")
logger.info(
"No events found is not a critical error, continuing with update"
)
@@ -202,6 +201,7 @@ async def index_luma_events(
)
return 0, None
else:
+ logger.error(f"Failed to get Luma events: {error}")
await task_logger.log_task_failure(
log_entry,
f"Failed to get Luma events: {error}",
From 97d7207bd4e76a5c76b1d6ed88a0784ea76f0445 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 04:33:10 +0530
Subject: [PATCH 20/21] fix: update Google Drive indexer to use SQLAlchemy
casting for metadata queries
- Modified the Google Drive indexer to use SQLAlchemy's cast function for querying document metadata, ensuring proper type handling for file IDs.
- Improved the consistency of metadata queries across the indexing functions, enhancing reliability in document retrieval and processing.
---
.../app/tasks/connector_indexers/google_drive_indexer.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
index 48282a1af..af180c36b 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -578,7 +578,7 @@ async def _check_rename_only_update(
- (True, message): Only filename changed, document was updated
- (False, None): Content changed or new file, needs full processing
"""
- from sqlalchemy import select
+ from sqlalchemy import cast, select, String
from sqlalchemy.orm.attributes import flag_modified
from app.db import Document
@@ -603,7 +603,7 @@ async def _check_rename_only_update(
select(Document).where(
Document.search_space_id == search_space_id,
Document.document_type == DocumentType.GOOGLE_DRIVE_FILE,
- Document.document_metadata["google_drive_file_id"].astext == file_id,
+ cast(Document.document_metadata["google_drive_file_id"], String) == file_id,
)
)
existing_document = result.scalar_one_or_none()
@@ -755,7 +755,7 @@ async def _remove_document(session: AsyncSession, file_id: str, search_space_id:
Handles both new (file_id-based) and legacy (filename-based) hash schemes.
"""
- from sqlalchemy import select
+ from sqlalchemy import cast, select, String
from app.db import Document
@@ -774,7 +774,7 @@ async def _remove_document(session: AsyncSession, file_id: str, search_space_id:
select(Document).where(
Document.search_space_id == search_space_id,
Document.document_type == DocumentType.GOOGLE_DRIVE_FILE,
- Document.document_metadata["google_drive_file_id"].astext == file_id,
+ cast(Document.document_metadata["google_drive_file_id"], String) == file_id,
)
)
existing_document = result.scalar_one_or_none()
From a5103da3d74fded873e311108b601d8b36740fce Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 24 Jan 2026 04:36:34 +0530
Subject: [PATCH 21/21] chore: ran linting
---
.../connectors/composio_gmail_connector.py | 1 -
.../composio_google_calendar_connector.py | 14 +-
.../composio_google_drive_connector.py | 83 +++++-----
.../connectors/google_calendar_connector.py | 17 ++-
.../app/connectors/google_gmail_connector.py | 17 ++-
.../app/routes/composio_routes.py | 4 +-
.../routes/search_source_connectors_routes.py | 31 ++--
.../app/services/composio_service.py | 142 ++++++++++--------
.../app/services/notification_service.py | 4 +-
.../google_calendar_indexer.py | 21 ++-
.../google_drive_indexer.py | 10 +-
.../google_gmail_indexer.py | 13 +-
.../assistant-ui/connector-popup.tsx | 5 +-
.../components/composio-calendar-config.tsx | 1 -
.../components/composio-drive-config.tsx | 24 +--
.../components/composio-gmail-config.tsx | 1 -
.../components/google-drive-config.tsx | 10 +-
.../views/connector-edit-view.tsx | 3 +-
.../views/indexing-configuration-view.tsx | 17 ++-
.../hooks/use-connector-dialog.ts | 18 +--
.../hooks/use-indexing-connectors.ts | 4 +-
21 files changed, 259 insertions(+), 181 deletions(-)
diff --git a/surfsense_backend/app/connectors/composio_gmail_connector.py b/surfsense_backend/app/connectors/composio_gmail_connector.py
index 5a9645a66..953e2e8fc 100644
--- a/surfsense_backend/app/connectors/composio_gmail_connector.py
+++ b/surfsense_backend/app/connectors/composio_gmail_connector.py
@@ -611,4 +611,3 @@ async def index_composio_gmail(
except Exception as e:
logger.error(f"Failed to index Gmail via Composio: {e!s}", exc_info=True)
return 0, f"Failed to index Gmail via Composio: {e!s}"
-
diff --git a/surfsense_backend/app/connectors/composio_google_calendar_connector.py b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
index 3ac235848..ec5b22b7f 100644
--- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@@ -259,7 +259,9 @@ async def index_composio_google_calendar(
documents_indexed = 0
documents_skipped = 0
- duplicate_content_count = 0 # Track events skipped due to duplicate content_hash
+ duplicate_content_count = (
+ 0 # Track events skipped due to duplicate content_hash
+ )
for event in events:
try:
@@ -353,7 +355,7 @@ async def index_composio_google_calendar(
logger.info(
f"Committing batch: {documents_indexed} Google Calendar events processed so far"
)
- await session.commit( )
+ await session.commit()
continue
# Document doesn't exist by unique_identifier_hash
@@ -362,7 +364,7 @@ async def index_composio_google_calendar(
duplicate_by_content = await check_duplicate_document_by_hash(
session, content_hash
)
-
+
if duplicate_by_content:
# A document with the same content already exists (likely from standard connector)
logger.info(
@@ -458,7 +460,10 @@ async def index_composio_google_calendar(
)
except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.)
- if "duplicate key value violates unique constraint" in str(e).lower() or "uniqueviolationerror" in str(e).lower():
+ if (
+ "duplicate key value violates unique constraint" in str(e).lower()
+ or "uniqueviolationerror" in str(e).lower()
+ ):
logger.warning(
f"Duplicate content_hash detected during final commit. "
f"This may occur if the same event was indexed by multiple connectors. "
@@ -495,4 +500,3 @@ async def index_composio_google_calendar(
f"Failed to index Google Calendar via Composio: {e!s}", exc_info=True
)
return 0, f"Failed to index Google Calendar via Composio: {e!s}"
-
diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py
index e19436611..e3b988676 100644
--- a/surfsense_backend/app/connectors/composio_google_drive_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py
@@ -453,8 +453,8 @@ async def check_document_by_unique_identifier(
session: AsyncSession, unique_identifier_hash: str
) -> Document | None:
"""Check if a document with the given unique identifier hash already exists."""
- from sqlalchemy.orm import selectinload
from sqlalchemy.future import select
+ from sqlalchemy.orm import selectinload
existing_doc_result = await session.execute(
select(Document)
@@ -517,14 +517,20 @@ async def index_composio_google_drive(
# Route to delta sync or full scan
if use_delta_sync:
- logger.info(f"Using delta sync for Composio Google Drive connector {connector_id}")
+ logger.info(
+ f"Using delta sync for Composio Google Drive connector {connector_id}"
+ )
await task_logger.log_task_progress(
log_entry,
f"Starting delta sync for Google Drive via Composio (connector {connector_id})",
{"stage": "delta_sync", "token": stored_page_token[:20] + "..."},
)
- documents_indexed, documents_skipped, processing_errors = await _index_composio_drive_delta_sync(
+ (
+ documents_indexed,
+ documents_skipped,
+ processing_errors,
+ ) = await _index_composio_drive_delta_sync(
session=session,
composio_connector=composio_connector,
connector_id=connector_id,
@@ -536,7 +542,9 @@ async def index_composio_google_drive(
log_entry=log_entry,
)
else:
- logger.info(f"Using full scan for Composio Google Drive connector {connector_id} (first sync or no token)")
+ logger.info(
+ f"Using full scan for Composio Google Drive connector {connector_id} (first sync or no token)"
+ )
await task_logger.log_task_progress(
log_entry,
f"Fetching Google Drive files via Composio for connector {connector_id}",
@@ -547,7 +555,11 @@ async def index_composio_google_drive(
},
)
- documents_indexed, documents_skipped, processing_errors = await _index_composio_drive_full_scan(
+ (
+ documents_indexed,
+ documents_skipped,
+ processing_errors,
+ ) = await _index_composio_drive_full_scan(
session=session,
composio_connector=composio_connector,
connector_id=connector_id,
@@ -580,9 +592,13 @@ async def index_composio_google_drive(
await update_connector_last_indexed(session, connector, update_last_indexed)
# Final commit
- logger.info(f"Final commit: Total {documents_indexed} Google Drive files processed")
+ logger.info(
+ f"Final commit: Total {documents_indexed} Google Drive files processed"
+ )
await session.commit()
- logger.info("Successfully committed all Composio Google Drive document changes to database")
+ logger.info(
+ "Successfully committed all Composio Google Drive document changes to database"
+ )
# Handle processing errors
error_message = None
@@ -731,7 +747,9 @@ async def _index_composio_drive_delta_sync(
processing_errors.append(error_msg)
documents_skipped += 1
- logger.info(f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped")
+ logger.info(
+ f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped"
+ )
return documents_indexed, documents_skipped, processing_errors
@@ -858,20 +876,18 @@ async def _index_composio_drive_full_scan(
logger.info("No Google Drive files found")
return 0, 0, []
- logger.info(f"Found {len(all_files)} Google Drive files to index via Composio (full scan)")
+ logger.info(
+ f"Found {len(all_files)} Google Drive files to index via Composio (full scan)"
+ )
for file_info in all_files:
try:
# Handle both standard Google API and potential Composio variations
file_id = file_info.get("id", "") or file_info.get("fileId", "")
file_name = (
- file_info.get("name", "")
- or file_info.get("fileName", "")
- or "Untitled"
- )
- mime_type = file_info.get("mimeType", "") or file_info.get(
- "mime_type", ""
+ file_info.get("name", "") or file_info.get("fileName", "") or "Untitled"
)
+ mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
if not file_id:
documents_skipped += 1
@@ -901,7 +917,9 @@ async def _index_composio_drive_full_scan(
# Batch commit every 10 documents
if documents_indexed > 0 and documents_indexed % 10 == 0:
- logger.info(f"Committing batch: {documents_indexed} Google Drive files processed so far")
+ logger.info(
+ f"Committing batch: {documents_indexed} Google Drive files processed so far"
+ )
await session.commit()
except Exception as e:
@@ -910,7 +928,9 @@ async def _index_composio_drive_full_scan(
processing_errors.append(error_msg)
documents_skipped += 1
- logger.info(f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped")
+ logger.info(
+ f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped"
+ )
return documents_indexed, documents_skipped, processing_errors
@@ -948,9 +968,7 @@ async def _process_single_drive_file(
content, content_error = await composio_connector.get_drive_file_content(file_id)
if content_error or not content:
- logger.warning(
- f"Could not get content for file {file_name}: {content_error}"
- )
+ logger.warning(f"Could not get content for file {file_name}: {content_error}")
# Use metadata as content fallback
markdown_content = f"# {file_name}\n\n"
markdown_content += f"**File ID:** {file_id}\n"
@@ -985,9 +1003,7 @@ async def _process_single_drive_file(
return 0, 1, processing_errors # Skipped
# Update existing document
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
+ user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
if user_llm:
document_metadata = {
@@ -1003,12 +1019,8 @@ async def _process_single_drive_file(
markdown_content, user_llm, document_metadata
)
else:
- summary_content = (
- f"Google Drive File: {file_name}\n\nType: {mime_type}"
- )
- summary_embedding = config.embedding_model_instance.embed(
- summary_content
- )
+ summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
+ summary_embedding = config.embedding_model_instance.embed(summary_content)
chunks = await create_document_chunks(markdown_content)
@@ -1030,9 +1042,7 @@ async def _process_single_drive_file(
return 1, 0, processing_errors # Indexed
# Create new document
- user_llm = await get_user_long_context_llm(
- session, user_id, search_space_id
- )
+ user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
if user_llm:
document_metadata = {
@@ -1048,12 +1058,8 @@ async def _process_single_drive_file(
markdown_content, user_llm, document_metadata
)
else:
- summary_content = (
- f"Google Drive File: {file_name}\n\nType: {mime_type}"
- )
- summary_embedding = config.embedding_model_instance.embed(
- summary_content
- )
+ summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
+ summary_embedding = config.embedding_model_instance.embed(summary_content)
chunks = await create_document_chunks(markdown_content)
@@ -1159,4 +1165,3 @@ async def _fetch_folder_files_recursively(
except Exception as e:
logger.error(f"Error in recursive folder fetch: {e!s}")
return all_files
-
diff --git a/surfsense_backend/app/connectors/google_calendar_connector.py b/surfsense_backend/app/connectors/google_calendar_connector.py
index ac60b02a8..d8160cf25 100644
--- a/surfsense_backend/app/connectors/google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/google_calendar_connector.py
@@ -144,7 +144,10 @@ class GoogleCalendarConnector:
except Exception as e:
error_str = str(e)
# Check if this is an invalid_grant error (token expired/revoked)
- if "invalid_grant" in error_str.lower() or "token has been expired or revoked" in error_str.lower():
+ if (
+ "invalid_grant" in error_str.lower()
+ or "token has been expired or revoked" in error_str.lower()
+ ):
raise Exception(
"Google Calendar authentication failed. Please re-authenticate."
) from e
@@ -173,7 +176,11 @@ class GoogleCalendarConnector:
except Exception as e:
error_str = str(e)
# If the error already contains a user-friendly re-authentication message, preserve it
- if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
+ if (
+ "re-authenticate" in error_str.lower()
+ or "expired or been revoked" in error_str.lower()
+ or "authentication failed" in error_str.lower()
+ ):
raise Exception(error_str) from e
raise Exception(f"Failed to create Google Calendar service: {e!s}") from e
@@ -283,7 +290,11 @@ class GoogleCalendarConnector:
except Exception as e:
error_str = str(e)
# If the error already contains a user-friendly re-authentication message, preserve it
- if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
+ if (
+ "re-authenticate" in error_str.lower()
+ or "expired or been revoked" in error_str.lower()
+ or "authentication failed" in error_str.lower()
+ ):
return [], error_str
return [], f"Error fetching events: {e!s}"
diff --git a/surfsense_backend/app/connectors/google_gmail_connector.py b/surfsense_backend/app/connectors/google_gmail_connector.py
index c86a96413..7c7262bff 100644
--- a/surfsense_backend/app/connectors/google_gmail_connector.py
+++ b/surfsense_backend/app/connectors/google_gmail_connector.py
@@ -143,7 +143,10 @@ class GoogleGmailConnector:
except Exception as e:
error_str = str(e)
# Check if this is an invalid_grant error (token expired/revoked)
- if "invalid_grant" in error_str.lower() or "token has been expired or revoked" in error_str.lower():
+ if (
+ "invalid_grant" in error_str.lower()
+ or "token has been expired or revoked" in error_str.lower()
+ ):
raise Exception(
"Gmail authentication failed. Please re-authenticate."
) from e
@@ -172,7 +175,11 @@ class GoogleGmailConnector:
except Exception as e:
error_str = str(e)
# If the error already contains a user-friendly re-authentication message, preserve it
- if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
+ if (
+ "re-authenticate" in error_str.lower()
+ or "expired or been revoked" in error_str.lower()
+ or "authentication failed" in error_str.lower()
+ ):
raise Exception(error_str) from e
raise Exception(f"Failed to create Gmail service: {e!s}") from e
@@ -237,7 +244,11 @@ class GoogleGmailConnector:
except Exception as e:
error_str = str(e)
# If the error already contains a user-friendly re-authentication message, preserve it
- if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
+ if (
+ "re-authenticate" in error_str.lower()
+ or "expired or been revoked" in error_str.lower()
+ or "authentication failed" in error_str.lower()
+ ):
return [], error_str
return [], f"Error fetching messages list: {e!s}"
diff --git a/surfsense_backend/app/routes/composio_routes.py b/surfsense_backend/app/routes/composio_routes.py
index 14ef9efcf..a28361132 100644
--- a/surfsense_backend/app/routes/composio_routes.py
+++ b/surfsense_backend/app/routes/composio_routes.py
@@ -350,10 +350,10 @@ async def composio_callback(
count = await count_connectors_of_type(
session, connector_type, space_id, user_id
)
-
+
# Generate base name (e.g., "Gmail", "Google Drive")
base_name = get_base_name_for_type(connector_type)
-
+
# Format: "Gmail (Composio) 1", "Gmail (Composio) 2", etc.
if count == 0:
connector_name = f"{base_name} (Composio) 1"
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 487a689dc..191c6f954 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -662,16 +662,16 @@ async def index_connector_content(
# Use UTC for "today" to match how last_indexed_at is stored
today_utc = datetime.now(UTC).replace(tzinfo=None).date()
last_indexed_date = last_indexed_naive.date()
-
+
if last_indexed_date == today_utc:
# If last indexed today, go back 1 day to ensure we don't miss anything
indexing_from = (today_utc - timedelta(days=1)).strftime("%Y-%m-%d")
else:
indexing_from = last_indexed_naive.strftime("%Y-%m-%d")
else:
- indexing_from = (datetime.now(UTC).replace(tzinfo=None) - timedelta(days=365)).strftime(
- "%Y-%m-%d"
- )
+ indexing_from = (
+ datetime.now(UTC).replace(tzinfo=None) - timedelta(days=365)
+ ).strftime("%Y-%m-%d")
else:
indexing_from = start_date
@@ -683,7 +683,7 @@ async def index_connector_content(
]:
# Default to today if no end_date provided (users can manually select future dates)
indexing_to = today_str if end_date is None else end_date
-
+
# If start_date and end_date are the same, adjust end_date to be one day later
# to ensure valid date range (start_date must be strictly before end_date)
if indexing_from == indexing_to:
@@ -1251,16 +1251,19 @@ async def _run_indexing_with_notifications(
if error_or_warning:
# Check if this is a duplicate warning or empty result (success cases) or an actual error
# Handle both normal and Composio calendar connectors
- error_or_warning_lower = str(error_or_warning).lower() if error_or_warning else ""
+ error_or_warning_lower = (
+ str(error_or_warning).lower() if error_or_warning else ""
+ )
is_duplicate_warning = "skipped (duplicate)" in error_or_warning_lower
# "No X found" messages are success cases - sync worked, just found nothing in date range
- is_empty_result = ("no " in error_or_warning_lower and "found" in error_or_warning_lower)
-
+ is_empty_result = (
+ "no " in error_or_warning_lower
+ and "found" in error_or_warning_lower
+ )
+
if is_duplicate_warning or is_empty_result:
# These are success cases - sync worked, just found nothing new
- logger.info(
- f"Indexing completed successfully: {error_or_warning}"
- )
+ logger.info(f"Indexing completed successfully: {error_or_warning}")
# Still update timestamp so ElectricSQL syncs and clears "Syncing" UI
if update_timestamp_func:
await update_timestamp_func(session, connector_id)
@@ -1269,7 +1272,11 @@ async def _run_indexing_with_notifications(
# Refresh notification to ensure it's not stale after timestamp update commit
await session.refresh(notification)
# For empty results, use a cleaner message
- notification_message = "No new items found in date range" if is_empty_result else error_or_warning
+ notification_message = (
+ "No new items found in date range"
+ if is_empty_result
+ else error_or_warning
+ )
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
diff --git a/surfsense_backend/app/services/composio_service.py b/surfsense_backend/app/services/composio_service.py
index 3ea2d1bf2..ad7841a8b 100644
--- a/surfsense_backend/app/services/composio_service.py
+++ b/surfsense_backend/app/services/composio_service.py
@@ -81,7 +81,9 @@ class ComposioService:
# Default download directory for files from Composio
DEFAULT_DOWNLOAD_DIR = "/tmp/composio_downloads"
- def __init__(self, api_key: str | None = None, file_download_dir: str | None = None):
+ def __init__(
+ self, api_key: str | None = None, file_download_dir: str | None = None
+ ):
"""
Initialize the Composio service.
@@ -90,18 +92,20 @@ class ComposioService:
file_download_dir: Directory for downloaded files. Defaults to /tmp/composio_downloads.
"""
import os
-
+
self.api_key = api_key or config.COMPOSIO_API_KEY
if not self.api_key:
raise ValueError("COMPOSIO_API_KEY is required but not configured")
-
+
# Set up download directory
self.file_download_dir = file_download_dir or self.DEFAULT_DOWNLOAD_DIR
os.makedirs(self.file_download_dir, exist_ok=True)
-
+
# Initialize Composio client with download directory
# Per docs: file_download_dir configures where files are downloaded
- self.client = Composio(api_key=self.api_key, file_download_dir=self.file_download_dir)
+ self.client = Composio(
+ api_key=self.api_key, file_download_dir=self.file_download_dir
+ )
@staticmethod
def is_enabled() -> bool:
@@ -512,7 +516,7 @@ class ComposioService:
Tuple of (file content bytes, error message).
"""
from pathlib import Path
-
+
try:
result = await self.execute_tool(
connected_account_id=connected_account_id,
@@ -532,35 +536,37 @@ class ComposioService:
# Response structure: {data: {...}, error: ..., successful: ...}
# The actual file info is nested inside data["data"]
file_path = None
-
+
if isinstance(data, dict):
# Handle nested response structure: data contains {data, error, successful}
# The actual file info is in data["data"]
inner_data = data
if "data" in data and isinstance(data["data"], dict):
inner_data = data["data"]
- logger.debug(f"Found nested data structure. Inner keys: {list(inner_data.keys())}")
+ logger.debug(
+ f"Found nested data structure. Inner keys: {list(inner_data.keys())}"
+ )
elif "successful" in data and "data" in data:
# Standard Composio response wrapper
inner_data = data["data"] if data["data"] else data
-
+
# Try documented fields: file_path, downloaded_file_content, path, uri
file_path = (
- inner_data.get("file_path") or
- inner_data.get("downloaded_file_content") or
- inner_data.get("path") or
- inner_data.get("uri")
+ inner_data.get("file_path")
+ or inner_data.get("downloaded_file_content")
+ or inner_data.get("path")
+ or inner_data.get("uri")
)
-
+
# Handle nested dict case where downloaded_file_content contains the path
if isinstance(file_path, dict):
file_path = (
- file_path.get("file_path") or
- file_path.get("downloaded_file_content") or
- file_path.get("path") or
- file_path.get("uri")
+ file_path.get("file_path")
+ or file_path.get("downloaded_file_content")
+ or file_path.get("path")
+ or file_path.get("uri")
)
-
+
# If still no path, check if inner_data itself has the nested structure
if not file_path and isinstance(inner_data, dict):
for key in ["downloaded_file_content", "file_path", "path", "uri"]:
@@ -572,15 +578,17 @@ class ComposioService:
elif isinstance(val, dict):
# One more level of nesting
file_path = (
- val.get("file_path") or
- val.get("downloaded_file_content") or
- val.get("path") or
- val.get("uri")
+ val.get("file_path")
+ or val.get("downloaded_file_content")
+ or val.get("path")
+ or val.get("uri")
)
if file_path:
break
-
- logger.debug(f"Composio response keys: {list(data.keys())}, inner keys: {list(inner_data.keys()) if isinstance(inner_data, dict) else 'N/A'}, extracted path: {file_path}")
+
+ logger.debug(
+ f"Composio response keys: {list(data.keys())}, inner keys: {list(inner_data.keys()) if isinstance(inner_data, dict) else 'N/A'}, extracted path: {file_path}"
+ )
elif isinstance(data, str):
# Direct string response (could be path or content)
file_path = data
@@ -591,24 +599,31 @@ class ComposioService:
# Read file from the path
if file_path and isinstance(file_path, str):
path_obj = Path(file_path)
-
+
# Check if it's a valid file path (absolute or in .composio directory)
- if path_obj.is_absolute() or '.composio' in str(path_obj):
+ if path_obj.is_absolute() or ".composio" in str(path_obj):
try:
if path_obj.exists():
content = path_obj.read_bytes()
- logger.info(f"Successfully read {len(content)} bytes from Composio file: {file_path}")
+ logger.info(
+ f"Successfully read {len(content)} bytes from Composio file: {file_path}"
+ )
return content, None
else:
- logger.warning(f"File path from Composio does not exist: {file_path}")
+ logger.warning(
+ f"File path from Composio does not exist: {file_path}"
+ )
return None, f"File not found at path: {file_path}"
except Exception as e:
- logger.error(f"Failed to read file from Composio path {file_path}: {e!s}")
+ logger.error(
+ f"Failed to read file from Composio path {file_path}: {e!s}"
+ )
return None, f"Failed to read file: {e!s}"
else:
# Not a file path - might be base64 encoded content
try:
import base64
+
content = base64.b64decode(file_path)
return content, None
except Exception:
@@ -625,8 +640,11 @@ class ComposioService:
f"Inner data keys: {list(inner_data.keys()) if isinstance(inner_data, dict) else type(inner_data).__name__}, "
f"Full inner data: {inner_data}"
)
- return None, f"No file path in Composio response. Keys: {list(data.keys())}, inner: {list(inner_data.keys()) if isinstance(inner_data, dict) else 'N/A'}"
-
+ return (
+ None,
+ f"No file path in Composio response. Keys: {list(data.keys())}, inner: {list(inner_data.keys()) if isinstance(inner_data, dict) else 'N/A'}",
+ )
+
return None, f"Unexpected data type from Composio: {type(data).__name__}"
except Exception as e:
@@ -638,14 +656,14 @@ class ComposioService:
) -> tuple[str | None, str | None]:
"""
Get the starting page token for Google Drive change tracking.
-
+
This token represents the current state and is used for future delta syncs.
Per Composio docs: Use GOOGLEDRIVE_GET_CHANGES_START_PAGE_TOKEN to get initial token.
-
+
Args:
connected_account_id: Composio connected account ID.
entity_id: The entity/user ID that owns the connected account.
-
+
Returns:
Tuple of (start_page_token, error message).
"""
@@ -656,27 +674,27 @@ class ComposioService:
params={},
entity_id=entity_id,
)
-
+
if not result.get("success"):
return None, result.get("error", "Unknown error")
-
+
data = result.get("data", {})
# Handle nested response: {data: {startPageToken: ...}, successful: ...}
if isinstance(data, dict):
inner_data = data.get("data", data)
token = (
- inner_data.get("startPageToken") or
- inner_data.get("start_page_token") or
- data.get("startPageToken") or
- data.get("start_page_token")
+ inner_data.get("startPageToken")
+ or inner_data.get("start_page_token")
+ or data.get("startPageToken")
+ or data.get("start_page_token")
)
if token:
logger.info(f"Got Drive start page token: {token}")
return token, None
-
+
logger.warning(f"Could not extract start page token from response: {data}")
return None, "No start page token in response"
-
+
except Exception as e:
logger.error(f"Failed to get Drive start page token: {e!s}")
return None, str(e)
@@ -691,18 +709,18 @@ class ComposioService:
) -> tuple[list[dict[str, Any]], str | None, str | None]:
"""
List changes in Google Drive since the given page token.
-
+
Per Composio docs: GOOGLEDRIVE_LIST_CHANGES tracks modifications to files/folders.
If pageToken is not provided, it auto-fetches the current start page token.
Response includes nextPageToken for pagination and newStartPageToken for future syncs.
-
+
Args:
connected_account_id: Composio connected account ID.
entity_id: The entity/user ID that owns the connected account.
page_token: Page token from previous sync (optional - will auto-fetch if not provided).
page_size: Number of changes per page.
include_removed: Whether to include removed items in the response.
-
+
Returns:
Tuple of (changes list, new_start_page_token, error message).
"""
@@ -713,42 +731,44 @@ class ComposioService:
}
if page_token:
params["pageToken"] = page_token
-
+
result = await self.execute_tool(
connected_account_id=connected_account_id,
tool_name="GOOGLEDRIVE_LIST_CHANGES",
params=params,
entity_id=entity_id,
)
-
+
if not result.get("success"):
return [], None, result.get("error", "Unknown error")
-
+
data = result.get("data", {})
-
+
# Handle nested response structure
changes = []
new_start_token = None
-
+
if isinstance(data, dict):
inner_data = data.get("data", data)
changes = inner_data.get("changes", []) or data.get("changes", [])
-
+
# Get the token for next sync
# newStartPageToken is returned when all changes have been fetched
# nextPageToken is for pagination within the current fetch
new_start_token = (
- inner_data.get("newStartPageToken") or
- inner_data.get("new_start_page_token") or
- inner_data.get("nextPageToken") or
- inner_data.get("next_page_token") or
- data.get("newStartPageToken") or
- data.get("nextPageToken")
+ inner_data.get("newStartPageToken")
+ or inner_data.get("new_start_page_token")
+ or inner_data.get("nextPageToken")
+ or inner_data.get("next_page_token")
+ or data.get("newStartPageToken")
+ or data.get("nextPageToken")
)
-
- logger.info(f"Got {len(changes)} Drive changes, new token: {new_start_token[:20] if new_start_token else 'None'}...")
+
+ logger.info(
+ f"Got {len(changes)} Drive changes, new token: {new_start_token[:20] if new_start_token else 'None'}..."
+ )
return changes, new_start_token, None
-
+
except Exception as e:
logger.error(f"Failed to list Drive changes: {e!s}")
return [], None, str(e)
diff --git a/surfsense_backend/app/services/notification_service.py b/surfsense_backend/app/services/notification_service.py
index 9fcf807e7..04f39d8ef 100644
--- a/surfsense_backend/app/services/notification_service.py
+++ b/surfsense_backend/app/services/notification_service.py
@@ -385,7 +385,9 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
metadata_updates = {
"indexed_count": indexed_count,
- "sync_stage": "completed" if (not error_message or is_warning or indexed_count > 0) else "failed",
+ "sync_stage": "completed"
+ if (not error_message or is_warning or indexed_count > 0)
+ else "failed",
"error_message": error_message,
}
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
index ef1f821d2..2365ff984 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@@ -208,7 +208,7 @@ async def index_google_calendar_events(
# Use provided dates (including future dates)
start_date_str = start_date
end_date_str = end_date
-
+
# If start_date and end_date are the same, adjust end_date to be one day later
# to ensure valid date range (start_date must be strictly before end_date)
if start_date_str == end_date_str:
@@ -269,10 +269,14 @@ async def index_google_calendar_events(
# Check if this is an authentication error that requires re-authentication
error_message = error
error_type = "APIError"
- if "re-authenticate" in error.lower() or "expired or been revoked" in error.lower() or "authentication failed" in error.lower():
+ if (
+ "re-authenticate" in error.lower()
+ or "expired or been revoked" in error.lower()
+ or "authentication failed" in error.lower()
+ ):
error_message = "Google Calendar authentication failed. Please re-authenticate."
error_type = "AuthenticationError"
-
+
await task_logger.log_task_failure(
log_entry,
error_message,
@@ -290,7 +294,9 @@ async def index_google_calendar_events(
documents_indexed = 0
documents_skipped = 0
skipped_events = []
- duplicate_content_count = 0 # Track events skipped due to duplicate content_hash
+ duplicate_content_count = (
+ 0 # Track events skipped due to duplicate content_hash
+ )
for event in events:
try:
@@ -417,7 +423,7 @@ async def index_google_calendar_events(
duplicate_by_content = await check_duplicate_document_by_hash(
session, content_hash
)
-
+
if duplicate_by_content:
# A document with the same content already exists (likely from Composio connector)
logger.info(
@@ -528,7 +534,10 @@ async def index_google_calendar_events(
await session.commit()
except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.)
- if "duplicate key value violates unique constraint" in str(e).lower() or "uniqueviolationerror" in str(e).lower():
+ if (
+ "duplicate key value violates unique constraint" in str(e).lower()
+ or "uniqueviolationerror" in str(e).lower()
+ ):
logger.warning(
f"Duplicate content_hash detected during final commit. "
f"This may occur if the same event was indexed by multiple connectors. "
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
index af180c36b..f50e149d3 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -578,7 +578,7 @@ async def _check_rename_only_update(
- (True, message): Only filename changed, document was updated
- (False, None): Content changed or new file, needs full processing
"""
- from sqlalchemy import cast, select, String
+ from sqlalchemy import String, cast, select
from sqlalchemy.orm.attributes import flag_modified
from app.db import Document
@@ -603,7 +603,8 @@ async def _check_rename_only_update(
select(Document).where(
Document.search_space_id == search_space_id,
Document.document_type == DocumentType.GOOGLE_DRIVE_FILE,
- cast(Document.document_metadata["google_drive_file_id"], String) == file_id,
+ cast(Document.document_metadata["google_drive_file_id"], String)
+ == file_id,
)
)
existing_document = result.scalar_one_or_none()
@@ -755,7 +756,7 @@ async def _remove_document(session: AsyncSession, file_id: str, search_space_id:
Handles both new (file_id-based) and legacy (filename-based) hash schemes.
"""
- from sqlalchemy import cast, select, String
+ from sqlalchemy import String, cast, select
from app.db import Document
@@ -774,7 +775,8 @@ async def _remove_document(session: AsyncSession, file_id: str, search_space_id:
select(Document).where(
Document.search_space_id == search_space_id,
Document.document_type == DocumentType.GOOGLE_DRIVE_FILE,
- cast(Document.document_metadata["google_drive_file_id"], String) == file_id,
+ cast(Document.document_metadata["google_drive_file_id"], String)
+ == file_id,
)
)
existing_document = result.scalar_one_or_none()
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
index 6a3057437..08d2904d6 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@@ -173,15 +173,16 @@ async def index_google_gmail_messages(
# Check if this is an authentication error that requires re-authentication
error_message = error
error_type = "APIError"
- if "re-authenticate" in error.lower() or "expired or been revoked" in error.lower() or "authentication failed" in error.lower():
+ if (
+ "re-authenticate" in error.lower()
+ or "expired or been revoked" in error.lower()
+ or "authentication failed" in error.lower()
+ ):
error_message = "Gmail authentication failed. Please re-authenticate."
error_type = "AuthenticationError"
-
+
await task_logger.log_task_failure(
- log_entry,
- error_message,
- error,
- {"error_type": error_type}
+ log_entry, error_message, error, {"error_type": error_type}
)
return 0, error_message
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index 68a548409..293d4a243 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -18,7 +18,10 @@ import { ConnectorDialogHeader } from "./connector-popup/components/connector-di
import { ConnectorConnectView } from "./connector-popup/connector-configs/views/connector-connect-view";
import { ConnectorEditView } from "./connector-popup/connector-configs/views/connector-edit-view";
import { IndexingConfigurationView } from "./connector-popup/connector-configs/views/indexing-configuration-view";
-import { COMPOSIO_CONNECTORS, OAUTH_CONNECTORS } from "./connector-popup/constants/connector-constants";
+import {
+ COMPOSIO_CONNECTORS,
+ OAUTH_CONNECTORS,
+} from "./connector-popup/constants/connector-constants";
import { useConnectorDialog } from "./connector-popup/hooks/use-connector-dialog";
import { useIndexingConnectors } from "./connector-popup/hooks/use-indexing-connectors";
import { ActiveConnectorsTab } from "./connector-popup/tabs/active-connectors-tab";
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx
index ce5133a9d..6f282d892 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-calendar-config.tsx
@@ -12,4 +12,3 @@ interface ComposioCalendarConfigProps {
export const ComposioCalendarConfig: FC = () => {
return ;
};
-
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx
index 0ab0869ff..239125565 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx
@@ -1,6 +1,14 @@
"use client";
-import { File, FileSpreadsheet, FileText, FolderClosed, Image, Presentation, X } from "lucide-react";
+import {
+ File,
+ FileSpreadsheet,
+ FileText,
+ FolderClosed,
+ Image,
+ Presentation,
+ X,
+} from "lucide-react";
import type { FC } from "react";
import { useEffect, useState } from "react";
import { ComposioDriveFolderTree } from "@/components/connectors/composio-drive-folder-tree";
@@ -85,7 +93,10 @@ function getFileIconFromName(fileName: string, className: string = "size-3.5 shr
return ;
}
-export const ComposioDriveConfig: FC = ({ connector, onConfigChange }) => {
+export const ComposioDriveConfig: FC = ({
+ connector,
+ onConfigChange,
+}) => {
const isIndexable = connector.config?.is_indexable as boolean;
// Initialize with existing selected folders and files from connector config
@@ -184,9 +195,7 @@ export const ComposioDriveConfig: FC = ({ connector, o
);
}
if (selectedFiles.length > 0) {
- parts.push(
- `${selectedFiles.length} file${selectedFiles.length > 1 ? "s" : ""}`
- );
+ parts.push(`${selectedFiles.length} file${selectedFiles.length > 1 ? "s" : ""}`);
}
return parts.length > 0 ? `(${parts.join(", ")})` : "";
})()}
@@ -329,13 +338,10 @@ export const ComposioDriveConfig: FC = ({ connector, o
- handleIndexingOptionChange("include_subfolders", checked)
- }
+ onCheckedChange={(checked) => handleIndexingOptionChange("include_subfolders", checked)}
/>
);
};
-
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx
index 4664e3e64..494e1362f 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-gmail-config.tsx
@@ -12,4 +12,3 @@ interface ComposioGmailConfigProps {
export const ComposioGmailConfig: FC = () => {
return ;
};
-
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx
index b6cfb39ae..383f6ce0e 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx
@@ -1,6 +1,14 @@
"use client";
-import { File, FileSpreadsheet, FileText, FolderClosed, Image, Presentation, X } from "lucide-react";
+import {
+ File,
+ FileSpreadsheet,
+ FileText,
+ FolderClosed,
+ Image,
+ Presentation,
+ X,
+} from "lucide-react";
import type { FC } from "react";
import { useEffect, useState } from "react";
import { GoogleDriveFolderTree } from "@/components/connectors/google-drive-folder-tree";
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
index 8f58db542..5668d398e 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@@ -276,7 +276,8 @@ export const ConnectorEditView: FC = ({
Re-indexing runs in the background
- You can continue using SurfSense while we sync your data. Check inbox for updates.
+ You can continue using SurfSense while we sync your data. Check inbox for
+ updates.
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
index 019e6b37f..684f03252 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
@@ -170,13 +170,13 @@ export const IndexingConfigurationView: FC = ({
{/* Periodic sync - not shown for Google Drive (regular and Composio) */}
{config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" && (
-
- )}
+
+ )}
>
)}
@@ -189,7 +189,8 @@ export const IndexingConfigurationView: FC = ({
Indexing runs in the background
- You can continue using SurfSense while we sync your data. Check inbox for updates.
+ You can continue using SurfSense while we sync your data. Check inbox for
+ updates.