merge: upstream/dev with migration renumbering

2026-06-22 21:28:12 +02:00 · 2026-01-27 11:22:26 +02:00 · 2026-01-27 11:22:26 +02:00 · a7145b2c63
commit a7145b2c63
parent 6091e070f3 2434e64093
176 changed files with 8791 additions and 3608 deletions
--- a/surfsense_backend/Dockerfile
+++ b/surfsense_backend/Dockerfile
@ -19,6 +19,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    libxext6 \
    libxrender1 \
    dos2unix \
+    git \
    && rm -rf /var/lib/apt/lists/*

 # Update certificates and install SSL tools
--- a/surfsense_backend/alembic/versions/79_add_composio_connector_enums.py
+++ b/surfsense_backend/alembic/versions/79_add_composio_connector_enums.py
@ -0,0 +1,95 @@
+"""Add Composio connector types to SearchSourceConnectorType and DocumentType enums
+
+Revision ID: 79
+Revises: 78
+
+This migration adds the Composio connector enum values to both:
+- searchsourceconnectortype (for connector type tracking)
+- documenttype (for document type tracking)
+
+Composio is a managed OAuth integration service that allows connecting
+to various third-party services (Google Drive, Gmail, Calendar, etc.)
+without requiring separate OAuth app verification.
+
+This migration adds three specific connector types:
+- COMPOSIO_GOOGLE_DRIVE_CONNECTOR
+- COMPOSIO_GMAIL_CONNECTOR
+- COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "79"
+down_revision: str | None = "78"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+# Define the ENUM type names and the new values
+CONNECTOR_ENUM = "searchsourceconnectortype"
+CONNECTOR_NEW_VALUES = [
+    "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+    "COMPOSIO_GMAIL_CONNECTOR",
+    "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+]
+DOCUMENT_ENUM = "documenttype"
+DOCUMENT_NEW_VALUES = [
+    "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+    "COMPOSIO_GMAIL_CONNECTOR",
+    "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+]
+
+
+def upgrade() -> None:
+    """Upgrade schema - add Composio connector types to connector and document enums safely."""
+    # Add each Composio connector type to searchsourceconnectortype only if not exists
+    for value in CONNECTOR_NEW_VALUES:
+        op.execute(
+            f"""
+            DO $$
+            BEGIN
+                IF NOT EXISTS (
+                    SELECT 1 FROM pg_enum e
+                    JOIN pg_type t ON e.enumtypid = t.oid
+                    WHERE t.typname = '{CONNECTOR_ENUM}' AND e.enumlabel = '{value}'
+                ) THEN
+                    ALTER TYPE {CONNECTOR_ENUM} ADD VALUE '{value}';
+                END IF;
+            END$$;
+        """
+        )
+
+    # Add each Composio connector type to documenttype only if not exists
+    for value in DOCUMENT_NEW_VALUES:
+        op.execute(
+            f"""
+            DO $$
+            BEGIN
+                IF NOT EXISTS (
+                    SELECT 1 FROM pg_enum e
+                    JOIN pg_type t ON e.enumtypid = t.oid
+                    WHERE t.typname = '{DOCUMENT_ENUM}' AND e.enumlabel = '{value}'
+                ) THEN
+                    ALTER TYPE {DOCUMENT_ENUM} ADD VALUE '{value}';
+                END IF;
+            END$$;
+        """
+        )
+
+
+def downgrade() -> None:
+    """Downgrade schema - remove Composio connector types from connector and document enums.
+
+    Note: PostgreSQL does not support removing enum values directly.
+    To properly downgrade, you would need to:
+    1. Delete any rows using the Composio connector type values
+    2. Create new enums without the Composio connector types
+    3. Alter the columns to use the new enums
+    4. Drop the old enums
+
+    This is left as a no-op since removing enum values is complex
+    and typically not needed in practice.
+    """
+    pass
--- a/surfsense_backend/alembic/versions/80_add_user_incentive_tasks_table.py
+++ b/surfsense_backend/alembic/versions/80_add_user_incentive_tasks_table.py
@ -0,0 +1,97 @@
+"""Add user incentive tasks table for earning free pages
+
+Revision ID: 80
+Revises: 79
+
+Changes:
+1. Create incentive_task_type enum with GITHUB_STAR value
+2. Create user_incentive_tasks table to track completed tasks
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "80"
+down_revision: str | None = "79"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Create incentive tasks infrastructure."""
+
+    # Check if enum already exists (handles partial migration recovery)
+    conn = op.get_bind()
+    result = conn.execute(
+        sa.text("SELECT 1 FROM pg_type WHERE typname = 'incentivetasktype'")
+    )
+    enum_exists = result.fetchone() is not None
+
+    # Create the enum type only if it doesn't exist
+    if not enum_exists:
+        incentive_task_type_enum = postgresql.ENUM(
+            "GITHUB_STAR",
+            name="incentivetasktype",
+            create_type=False,
+        )
+        incentive_task_type_enum.create(op.get_bind(), checkfirst=True)
+
+    # Check if table already exists (handles partial migration recovery)
+    result = conn.execute(
+        sa.text(
+            "SELECT 1 FROM information_schema.tables WHERE table_name = 'user_incentive_tasks'"
+        )
+    )
+    table_exists = result.fetchone() is not None
+
+    if not table_exists:
+        # Create the user_incentive_tasks table
+        op.create_table(
+            "user_incentive_tasks",
+            sa.Column("id", sa.Integer(), primary_key=True, index=True),
+            sa.Column(
+                "user_id",
+                sa.UUID(as_uuid=True),
+                sa.ForeignKey("user.id", ondelete="CASCADE"),
+                nullable=False,
+                index=True,
+            ),
+            sa.Column(
+                "task_type",
+                postgresql.ENUM(
+                    "GITHUB_STAR", name="incentivetasktype", create_type=False
+                ),
+                nullable=False,
+                index=True,
+            ),
+            sa.Column("pages_awarded", sa.Integer(), nullable=False),
+            sa.Column(
+                "completed_at",
+                sa.TIMESTAMP(timezone=True),
+                nullable=False,
+                server_default=sa.func.now(),
+            ),
+            sa.Column(
+                "created_at",
+                sa.TIMESTAMP(timezone=True),
+                nullable=False,
+                server_default=sa.func.now(),
+                index=True,
+            ),
+            sa.UniqueConstraint("user_id", "task_type", name="uq_user_incentive_task"),
+        )
+
+
+def downgrade() -> None:
+    """Remove incentive tasks infrastructure."""
+
+    # Drop the table
+    op.drop_table("user_incentive_tasks")
+
+    # Drop the enum type
+    postgresql.ENUM(name="incentivetasktype").drop(op.get_bind(), checkfirst=True)
--- a/surfsense_backend/alembic/versions/81_add_public_share_to_chat_threads.py
+++ b/surfsense_backend/alembic/versions/81_add_public_share_to_chat_threads.py
@ -1,7 +1,7 @@
 """Add public sharing columns to new_chat_threads

-Revision ID: 79
-Revises: 78
+Revision ID: 81
+Revises: 80
 Create Date: 2026-01-23

 Adds public_share_token and public_share_enabled columns to enable
@ -13,8 +13,8 @@ from collections.abc import Sequence
 from alembic import op

 # revision identifiers, used by Alembic.
-revision: str = "79"
-down_revision: str | None = "78"
+revision: str = "81"
+down_revision: str | None = "80"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None

--- a/surfsense_backend/alembic/versions/82_add_thread_id_to_podcasts.py
+++ b/surfsense_backend/alembic/versions/82_add_thread_id_to_podcasts.py
@ -1,7 +1,7 @@
 """Add thread_id to podcasts

-Revision ID: 80
-Revises: 79
+Revision ID: 82
+Revises: 81
 Create Date: 2026-01-23

 """
@ -10,8 +10,8 @@ from collections.abc import Sequence

 from alembic import op

-revision: str = "80"
-down_revision: str | None = "79"
+revision: str = "82"
+down_revision: str | None = "81"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None

--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@ -7,6 +7,7 @@ via NewLLMConfig.
 """

 from collections.abc import Sequence
+from typing import Any

 from deepagents import create_deep_agent
 from langchain_core.tools import BaseTool
@ -23,6 +24,90 @@ from app.agents.new_chat.system_prompt import (
 from app.agents.new_chat.tools.registry import build_tools_async
 from app.services.connector_service import ConnectorService

+# =============================================================================
+# Connector Type Mapping
+# =============================================================================
+
+# Maps SearchSourceConnectorType enum values to the searchable document/connector types
+# used by the knowledge_base tool. Some connectors map to different document types.
+_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
+    # Direct mappings (connector type == searchable type)
+    "TAVILY_API": "TAVILY_API",
+    "SEARXNG_API": "SEARXNG_API",
+    "LINKUP_API": "LINKUP_API",
+    "BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
+    "SLACK_CONNECTOR": "SLACK_CONNECTOR",
+    "TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
+    "NOTION_CONNECTOR": "NOTION_CONNECTOR",
+    "GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
+    "LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
+    "DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
+    "JIRA_CONNECTOR": "JIRA_CONNECTOR",
+    "CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
+    "CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
+    "GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
+    "GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
+    "GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",  # Connector type differs from document type
+    "AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
+    "LUMA_CONNECTOR": "LUMA_CONNECTOR",
+    "ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
+    "WEBCRAWLER_CONNECTOR": "CRAWLED_URL",  # Maps to document type
+    "BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
+    "CIRCLEBACK_CONNECTOR": "CIRCLEBACK",  # Connector type differs from document type
+    "OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
+    # Composio connectors
+    "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+    "COMPOSIO_GMAIL_CONNECTOR": "COMPOSIO_GMAIL_CONNECTOR",
+    "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+}
+
+# Document types that don't come from SearchSourceConnector but should always be searchable
+_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
+    "EXTENSION",  # Browser extension data
+    "FILE",  # Uploaded files
+    "NOTE",  # User notes
+    "YOUTUBE_VIDEO",  # YouTube videos
+]
+
+
+def _map_connectors_to_searchable_types(
+    connector_types: list[Any],
+) -> list[str]:
+    """
+    Map SearchSourceConnectorType enums to searchable document/connector types.
+
+    This function:
+    1. Converts connector type enums to their searchable counterparts
+    2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
+    3. Deduplicates while preserving order
+
+    Args:
+        connector_types: List of SearchSourceConnectorType enum values
+
+    Returns:
+        List of searchable connector/document type strings
+    """
+    result_set: set[str] = set()
+    result_list: list[str] = []
+
+    # Add always-available document types first
+    for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
+        if doc_type not in result_set:
+            result_set.add(doc_type)
+            result_list.append(doc_type)
+
+    # Map each connector type to its searchable equivalent
+    for ct in connector_types:
+        # Handle both enum and string types
+        ct_str = ct.value if hasattr(ct, "value") else str(ct)
+        searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
+        if searchable and searchable not in result_set:
+            result_set.add(searchable)
+            result_list.append(searchable)
+
+    return result_list
+
+
 # =============================================================================
 # Deep Agent Factory
 # =============================================================================
@ -117,6 +202,30 @@ async def create_surfsense_deep_agent(
            additional_tools=[my_custom_tool]
        )
    """
+    # Discover available connectors and document types for this search space
+    # This enables dynamic tool docstrings that inform the LLM about what's actually available
+    available_connectors: list[str] | None = None
+    available_document_types: list[str] | None = None
+
+    try:
+        # Get enabled search source connectors for this search space
+        connector_types = await connector_service.get_available_connectors(
+            search_space_id
+        )
+        if connector_types:
+            # Convert enum values to strings and also include mapped document types
+            available_connectors = _map_connectors_to_searchable_types(connector_types)
+
+        # Get document types that have at least one document indexed
+        available_document_types = await connector_service.get_available_document_types(
+            search_space_id
+        )
+    except Exception as e:
+        # Log but don't fail - fall back to all connectors if discovery fails
+        import logging
+
+        logging.warning(f"Failed to discover available connectors/document types: {e}")
+
    # Build dependencies dict for the tools registry
    dependencies = {
        "search_space_id": search_space_id,
@ -125,6 +234,9 @@ async def create_surfsense_deep_agent(
        "firecrawl_api_key": firecrawl_api_key,
        "user_id": user_id,  # Required for memory tools
        "thread_id": thread_id,  # For podcast tool
+        # Dynamic connector/document type discovery for knowledge base tool
+        "available_connectors": available_connectors,
+        "available_document_types": available_document_types,
    }

    # Build tools using the async registry (includes MCP tools)
--- a/surfsense_backend/app/agents/new_chat/tools/init.py
+++ b/surfsense_backend/app/agents/new_chat/tools/init.py
@ -19,6 +19,7 @@ Available tools:
 # Tool factory exports (for direct use)
 from .display_image import create_display_image_tool
 from .knowledge_base import (
+    CONNECTOR_DESCRIPTIONS,
    create_search_knowledge_base_tool,
    format_documents_for_context,
    search_knowledge_base_async,
@ -40,6 +41,8 @@ from .user_memory import create_recall_memory_tool, create_save_memory_tool
 __all__ = [
    # Registry
    "BUILTIN_TOOLS",
+    # Knowledge base utilities
+    "CONNECTOR_DESCRIPTIONS",
    "ToolDefinition",
    "build_tools",
    # Tool factories
@ -51,7 +54,6 @@ __all__ = [
    "create_scrape_webpage_tool",
    "create_search_knowledge_base_tool",
    "create_search_surfsense_docs_tool",
-    # Knowledge base utilities
    "format_documents_for_context",
    "get_all_tool_names",
    "get_default_enabled_tools",
--- a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
+++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
@ -12,7 +12,8 @@ import json
 from datetime import datetime
 from typing import Any

-from langchain_core.tools import tool
+from langchain_core.tools import StructuredTool
+from pydantic import BaseModel, Field
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.services.connector_service import ConnectorService
@ -22,6 +23,7 @@ from app.services.connector_service import ConnectorService
 # =============================================================================

 # Canonical connector values used internally by ConnectorService
+# Includes all document types and search source connectors
 _ALL_CONNECTORS: list[str] = [
    "EXTENSION",
    "FILE",
@ -50,41 +52,117 @@ _ALL_CONNECTORS: list[str] = [
    "CRAWLED_URL",
    "CIRCLEBACK",
    "OBSIDIAN_CONNECTOR",
+    # Composio connectors
+    "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+    "COMPOSIO_GMAIL_CONNECTOR",
+    "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
 ]

+# Human-readable descriptions for each connector type
+# Used for generating dynamic docstrings and informing the LLM
+CONNECTOR_DESCRIPTIONS: dict[str, str] = {
+    "EXTENSION": "Web content saved via SurfSense browser extension (personal browsing history)",
+    "FILE": "User-uploaded documents (PDFs, Word, etc.) (personal files)",
+    "NOTE": "SurfSense Notes (notes created inside SurfSense)",
+    "SLACK_CONNECTOR": "Slack conversations and shared content (personal workspace communications)",
+    "TEAMS_CONNECTOR": "Microsoft Teams messages and conversations (personal Teams communications)",
+    "NOTION_CONNECTOR": "Notion workspace pages and databases (personal knowledge management)",
+    "YOUTUBE_VIDEO": "YouTube video transcripts and metadata (personally saved videos)",
+    "GITHUB_CONNECTOR": "GitHub repository content and issues (personal repositories and interactions)",
+    "ELASTICSEARCH_CONNECTOR": "Elasticsearch indexed documents and data (personal Elasticsearch instances)",
+    "LINEAR_CONNECTOR": "Linear project issues and discussions (personal project management)",
+    "JIRA_CONNECTOR": "Jira project issues, tickets, and comments (personal project tracking)",
+    "CONFLUENCE_CONNECTOR": "Confluence pages and comments (personal project documentation)",
+    "CLICKUP_CONNECTOR": "ClickUp tasks and project data (personal task management)",
+    "GOOGLE_CALENDAR_CONNECTOR": "Google Calendar events, meetings, and schedules (personal calendar)",
+    "GOOGLE_GMAIL_CONNECTOR": "Google Gmail emails and conversations (personal emails)",
+    "GOOGLE_DRIVE_FILE": "Google Drive files and documents (personal cloud storage)",
+    "DISCORD_CONNECTOR": "Discord server conversations and shared content (personal community)",
+    "AIRTABLE_CONNECTOR": "Airtable records, tables, and database content (personal data)",
+    "TAVILY_API": "Tavily web search API results (real-time web search)",
+    "SEARXNG_API": "SearxNG search API results (privacy-focused web search)",
+    "LINKUP_API": "Linkup search API results (web search)",
+    "BAIDU_SEARCH_API": "Baidu search API results (Chinese web search)",
+    "LUMA_CONNECTOR": "Luma events and meetings",
+    "WEBCRAWLER_CONNECTOR": "Webpages indexed by SurfSense (personally selected websites)",
+    "CRAWLED_URL": "Webpages indexed by SurfSense (personally selected websites)",
+    "BOOKSTACK_CONNECTOR": "BookStack pages (personal documentation)",
+    "CIRCLEBACK": "Circleback meeting notes, transcripts, and action items",
+    "OBSIDIAN_CONNECTOR": "Obsidian vault notes and markdown files (personal notes)",
+    # Composio connectors
+    "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "Google Drive files via Composio (personal cloud storage)",
+    "COMPOSIO_GMAIL_CONNECTOR": "Gmail emails via Composio (personal emails)",
+    "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "Google Calendar events via Composio (personal calendar)",
+}

-def _normalize_connectors(connectors_to_search: list[str] | None) -> list[str]:
+
+def _normalize_connectors(
+    connectors_to_search: list[str] | None,
+    available_connectors: list[str] | None = None,
+) -> list[str]:
    """
    Normalize connectors provided by the model.

    - Accepts user-facing enums like WEBCRAWLER_CONNECTOR and maps them to canonical
      ConnectorService types.
    - Drops unknown values.
-    - If None/empty, defaults to searching across all known connectors.
+    - If available_connectors is provided, only includes connectors from that list.
+    - If connectors_to_search is None/empty, defaults to available_connectors or all.
+
+    Args:
+        connectors_to_search: List of connectors requested by the model
+        available_connectors: List of connectors actually available in the search space
+
+    Returns:
+        List of normalized connector strings to search
    """
+    # Determine the set of valid connectors to consider
+    valid_set = (
+        set(available_connectors) if available_connectors else set(_ALL_CONNECTORS)
+    )
+
    if not connectors_to_search:
-        return list(_ALL_CONNECTORS)
+        # Search all available connectors if none specified
+        return (
+            list(available_connectors)
+            if available_connectors
+            else list(_ALL_CONNECTORS)
+        )

    normalized: list[str] = []
    for raw in connectors_to_search:
        c = (raw or "").strip().upper()
        if not c:
            continue
+        # Map user-facing aliases to canonical names
        if c == "WEBCRAWLER_CONNECTOR":
            c = "CRAWLED_URL"
        normalized.append(c)

-    # de-dupe while preserving order + filter unknown
+    # de-dupe while preserving order + filter to valid connectors
    seen: set[str] = set()
    out: list[str] = []
    for c in normalized:
        if c in seen:
            continue
+        # Only include if it's a known connector AND available
        if c not in _ALL_CONNECTORS:
            continue
+        if c not in valid_set:
+            continue
        seen.add(c)
        out.append(c)
-    return out if out else list(_ALL_CONNECTORS)
+
+    # Fallback to all available if nothing matched
+    return (
+        out
+        if out
+        else (
+            list(available_connectors)
+            if available_connectors
+            else list(_ALL_CONNECTORS)
+        )
+    )


 # =============================================================================
@ -233,6 +311,7 @@ async def search_knowledge_base_async(
    top_k: int = 10,
    start_date: datetime | None = None,
    end_date: datetime | None = None,
+    available_connectors: list[str] | None = None,
 ) -> str:
    """
    Search the user's knowledge base for relevant documents.
@ -248,6 +327,8 @@ async def search_knowledge_base_async(
        top_k: Number of results per connector
        start_date: Optional start datetime (UTC) for filtering documents
        end_date: Optional end datetime (UTC) for filtering documents
+        available_connectors: Optional list of connectors actually available in the search space.
+                            If provided, only these connectors will be searched.

    Returns:
        Formatted string with search results
@ -262,7 +343,7 @@ async def search_knowledge_base_async(
        end_date=end_date,
    )

-    connectors = _normalize_connectors(connectors_to_search)
+    connectors = _normalize_connectors(connectors_to_search, available_connectors)

    for connector in connectors:
        try:
@ -316,6 +397,16 @@ async def search_knowledge_base_async(
                )
                all_documents.extend(chunks)

+            elif connector == "TEAMS_CONNECTOR":
+                _, chunks = await connector_service.search_teams(
+                    user_query=query,
+                    search_space_id=search_space_id,
+                    top_k=top_k,
+                    start_date=resolved_start_date,
+                    end_date=resolved_end_date,
+                )
+                all_documents.extend(chunks)
+
            elif connector == "NOTION_CONNECTOR":
                _, chunks = await connector_service.search_notion(
                    user_query=query,
@ -519,6 +610,39 @@ async def search_knowledge_base_async(
                )
                all_documents.extend(chunks)

+            # =========================================================
+            # Composio Connectors
+            # =========================================================
+            elif connector == "COMPOSIO_GOOGLE_DRIVE_CONNECTOR":
+                _, chunks = await connector_service.search_composio_google_drive(
+                    user_query=query,
+                    search_space_id=search_space_id,
+                    top_k=top_k,
+                    start_date=resolved_start_date,
+                    end_date=resolved_end_date,
+                )
+                all_documents.extend(chunks)
+
+            elif connector == "COMPOSIO_GMAIL_CONNECTOR":
+                _, chunks = await connector_service.search_composio_gmail(
+                    user_query=query,
+                    search_space_id=search_space_id,
+                    top_k=top_k,
+                    start_date=resolved_start_date,
+                    end_date=resolved_end_date,
+                )
+                all_documents.extend(chunks)
+
+            elif connector == "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR":
+                _, chunks = await connector_service.search_composio_google_calendar(
+                    user_query=query,
+                    search_space_id=search_space_id,
+                    top_k=top_k,
+                    start_date=resolved_start_date,
+                    end_date=resolved_end_date,
+                )
+                all_documents.extend(chunks)
+
        except Exception as e:
            print(f"Error searching connector {connector}: {e}")
            continue
@ -543,11 +667,68 @@ async def search_knowledge_base_async(
    return format_documents_for_context(deduplicated)


+def _build_connector_docstring(available_connectors: list[str] | None) -> str:
+    """
+    Build the connector documentation section for the tool docstring.
+
+    Args:
+        available_connectors: List of available connector types, or None for all
+
+    Returns:
+        Formatted docstring section listing available connectors
+    """
+    connectors = available_connectors if available_connectors else list(_ALL_CONNECTORS)
+
+    lines = []
+    for connector in connectors:
+        # Skip internal names, prefer user-facing aliases
+        if connector == "CRAWLED_URL":
+            # Show as WEBCRAWLER_CONNECTOR for user-facing docs
+            description = CONNECTOR_DESCRIPTIONS.get(connector, connector)
+            lines.append(f"- WEBCRAWLER_CONNECTOR: {description}")
+        else:
+            description = CONNECTOR_DESCRIPTIONS.get(connector, connector)
+            lines.append(f"- {connector}: {description}")
+
+    return "\n".join(lines)
+
+
+# =============================================================================
+# Tool Input Schema
+# =============================================================================
+
+
+class SearchKnowledgeBaseInput(BaseModel):
+    """Input schema for the search_knowledge_base tool."""
+
+    query: str = Field(
+        description="The search query - be specific and include key terms"
+    )
+    top_k: int = Field(
+        default=10,
+        description="Number of results to retrieve (default: 10)",
+    )
+    start_date: str | None = Field(
+        default=None,
+        description="Optional ISO date/datetime (e.g. '2025-12-12' or '2025-12-12T00:00:00+00:00')",
+    )
+    end_date: str | None = Field(
+        default=None,
+        description="Optional ISO date/datetime (e.g. '2025-12-19' or '2025-12-19T23:59:59+00:00')",
+    )
+    connectors_to_search: list[str] | None = Field(
+        default=None,
+        description="Optional list of connector enums to search. If omitted, searches all available.",
+    )
+
+
 def create_search_knowledge_base_tool(
    search_space_id: int,
    db_session: AsyncSession,
    connector_service: ConnectorService,
-):
+    available_connectors: list[str] | None = None,
+    available_document_types: list[str] | None = None,
+) -> StructuredTool:
    """
    Factory function to create the search_knowledge_base tool with injected dependencies.

@ -555,72 +736,57 @@ def create_search_knowledge_base_tool(
        search_space_id: The user's search space ID
        db_session: Database session
        connector_service: Initialized connector service
+        available_connectors: Optional list of connector types available in the search space.
+                            Used to dynamically generate the tool docstring.
+        available_document_types: Optional list of document types that have data in the search space.
+                                Used to inform the LLM about what data exists.

    Returns:
-        A configured tool function
+        A configured StructuredTool instance
    """
+    # Build connector documentation dynamically
+    connector_docs = _build_connector_docstring(available_connectors)

-    @tool
-    async def search_knowledge_base(
+    # Build context about available document types
+    doc_types_info = ""
+    if available_document_types:
+        doc_types_info = f"""
+
+## Document types with indexed content in this search space
+
+The following document types have content available for search:
+{", ".join(available_document_types)}
+
+Focus searches on these types for best results."""
+
+    # Build the dynamic description for the tool
+    # This is what the LLM sees when deciding whether/how to use the tool
+    dynamic_description = f"""Search the user's personal knowledge base for relevant information.
+
+Use this tool to find documents, notes, files, web pages, and other content that may help answer the user's question.
+
+IMPORTANT:
+- If the user requests a specific source type (e.g. "my notes", "Slack messages"), pass `connectors_to_search=[...]` using the enums below.
+- If `connectors_to_search` is omitted/empty, the system will search broadly.
+- Only connectors that are enabled/configured for this search space are available.{doc_types_info}
+
+## Available connector enums for `connectors_to_search`
+
+{connector_docs}
+
+NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`."""
+
+    # Capture for closure
+    _available_connectors = available_connectors
+
+    async def _search_knowledge_base_impl(
        query: str,
        top_k: int = 10,
        start_date: str | None = None,
        end_date: str | None = None,
        connectors_to_search: list[str] | None = None,
    ) -> str:
-        """
-        Search the user's personal knowledge base for relevant information.
-
-        Use this tool to find documents, notes, files, web pages, and other content
-        that may help answer the user's question.
-
-        IMPORTANT:
-        - If the user requests a specific source type (e.g. "my notes", "Slack messages"),
-          pass `connectors_to_search=[...]` using the enums below.
-        - If `connectors_to_search` is omitted/empty, the system will search broadly.
-
-        ## Available connector enums for `connectors_to_search`
-
-        - EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history)
-        - FILE: "User-uploaded documents (PDFs, Word, etc.)" (personal files)
-        - NOTE: "SurfSense Notes" (notes created inside SurfSense)
-        - SLACK_CONNECTOR: "Slack conversations and shared content" (personal workspace communications)
-        - TEAMS_CONNECTOR: "Microsoft Teams messages and conversations" (personal Teams communications)
-        - NOTION_CONNECTOR: "Notion workspace pages and databases" (personal knowledge management)
-        - YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos)
-        - GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions)
-        - ELASTICSEARCH_CONNECTOR: "Elasticsearch indexed documents and data" (personal Elasticsearch instances and custom data sources)
-        - LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management)
-        - JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking)
-        - CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation)
-        - CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management)
-        - GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management)
-        - GOOGLE_GMAIL_CONNECTOR: "Google Gmail emails and conversations" (personal emails and communications)
-        - GOOGLE_DRIVE_FILE: "Google Drive files and documents" (personal cloud storage and file management)
-        - DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications)
-        - AIRTABLE_CONNECTOR: "Airtable records, tables, and database content" (personal data management and organization)
-        - TAVILY_API: "Tavily search API results" (personalized search results)
-        - SEARXNG_API: "SearxNG search API results" (personalized search results)
-        - LINKUP_API: "Linkup search API results" (personalized search results)
-        - BAIDU_SEARCH_API: "Baidu search API results" (personalized search results)
-        - LUMA_CONNECTOR: "Luma events"
-        - WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites)
-        - BOOKSTACK_CONNECTOR: "BookStack pages" (personal documentation)
-        - CIRCLEBACK: "Circleback meeting notes, transcripts, and action items" (personal meeting records)
-        - OBSIDIAN_CONNECTOR: "Obsidian vault notes and markdown files" (personal notes and knowledge management)
-
-        NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`.
-
-        Args:
-            query: The search query - be specific and include key terms
-            top_k: Number of results to retrieve (default: 10)
-            start_date: Optional ISO date/datetime (e.g. "2025-12-12" or "2025-12-12T00:00:00+00:00")
-            end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00")
-            connectors_to_search: Optional list of connector enums to search. If omitted, searches all.
-
-        Returns:
-            Formatted string with relevant documents and their content
-        """
+        """Implementation function for knowledge base search."""
        from app.agents.new_chat.utils import parse_date_or_datetime

        parsed_start: datetime | None = None
@ -640,6 +806,16 @@ def create_search_knowledge_base_tool(
            top_k=top_k,
            start_date=parsed_start,
            end_date=parsed_end,
+            available_connectors=_available_connectors,
        )

-    return search_knowledge_base
+    # Create StructuredTool with dynamic description
+    # This properly sets the description that the LLM sees
+    tool = StructuredTool(
+        name="search_knowledge_base",
+        description=dynamic_description,
+        coroutine=_search_knowledge_base_impl,
+        args_schema=SearchKnowledgeBaseInput,
+    )
+
+    return tool
--- a/surfsense_backend/app/agents/new_chat/tools/registry.py
+++ b/surfsense_backend/app/agents/new_chat/tools/registry.py
@ -85,6 +85,7 @@ class ToolDefinition:
 # Contributors: Add your new tools here!
 BUILTIN_TOOLS: list[ToolDefinition] = [
    # Core tool - searches the user's knowledge base
+    # Now supports dynamic connector/document type discovery
    ToolDefinition(
        name="search_knowledge_base",
        description="Search the user's personal knowledge base for relevant information",
@ -92,8 +93,12 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
            search_space_id=deps["search_space_id"],
            db_session=deps["db_session"],
            connector_service=deps["connector_service"],
+            # Optional: dynamically discovered connectors/document types
+            available_connectors=deps.get("available_connectors"),
+            available_document_types=deps.get("available_document_types"),
        ),
        requires=["search_space_id", "db_session", "connector_service"],
+        # Note: available_connectors and available_document_types are optional
    ),
    # Podcast generation tool
    ToolDefinition(
--- a/surfsense_backend/app/connectors/composio_connector.py
+++ b/surfsense_backend/app/connectors/composio_connector.py
@ -1,7 +1,7 @@
 """
-Composio Connector Module.
+Composio Connector Base Module.

-Provides a unified interface for interacting with various services via Composio,
+Provides a base class for interacting with various services via Composio,
 primarily used during indexing operations.
 """

@ -19,10 +19,10 @@ logger = logging.getLogger(__name__)

 class ComposioConnector:
    """
-    Generic Composio connector for data retrieval.
+    Base Composio connector for data retrieval.

    Wraps the ComposioService to provide toolkit-specific data access
-    for indexing operations.
+    for indexing operations. Subclasses implement toolkit-specific methods.
    """

    def __init__(
@ -89,302 +89,12 @@ class ComposioConnector:
        toolkit_id = await self.get_toolkit_id()
        return toolkit_id in INDEXABLE_TOOLKITS

-    # ===== Google Drive Methods =====
+    @property
+    def session(self) -> AsyncSession:
+        """Get the database session."""
+        return self._session

-    async def list_drive_files(
-        self,
-        folder_id: str | None = None,
-        page_token: str | None = None,
-        page_size: int = 100,
-    ) -> tuple[list[dict[str, Any]], str | None, str | None]:
-        """
-        List files from Google Drive via Composio.
-
-        Args:
-            folder_id: Optional folder ID to list contents of.
-            page_token: Pagination token.
-            page_size: Number of files per page.
-
-        Returns:
-            Tuple of (files list, next_page_token, error message).
-        """
-        connected_account_id = await self.get_connected_account_id()
-        if not connected_account_id:
-            return [], None, "No connected account ID found"
-
-        entity_id = await self.get_entity_id()
-        service = await self._get_service()
-        return await service.get_drive_files(
-            connected_account_id=connected_account_id,
-            entity_id=entity_id,
-            folder_id=folder_id,
-            page_token=page_token,
-            page_size=page_size,
-        )
-
-    async def get_drive_file_content(
-        self, file_id: str
-    ) -> tuple[bytes | None, str | None]:
-        """
-        Download file content from Google Drive via Composio.
-
-        Args:
-            file_id: Google Drive file ID.
-
-        Returns:
-            Tuple of (file content bytes, error message).
-        """
-        connected_account_id = await self.get_connected_account_id()
-        if not connected_account_id:
-            return None, "No connected account ID found"
-
-        entity_id = await self.get_entity_id()
-        service = await self._get_service()
-        return await service.get_drive_file_content(
-            connected_account_id=connected_account_id,
-            entity_id=entity_id,
-            file_id=file_id,
-        )
-
-    # ===== Gmail Methods =====
-
-    async def list_gmail_messages(
-        self,
-        query: str = "",
-        max_results: int = 100,
-    ) -> tuple[list[dict[str, Any]], str | None]:
-        """
-        List Gmail messages via Composio.
-
-        Args:
-            query: Gmail search query.
-            max_results: Maximum number of messages.
-
-        Returns:
-            Tuple of (messages list, error message).
-        """
-        connected_account_id = await self.get_connected_account_id()
-        if not connected_account_id:
-            return [], "No connected account ID found"
-
-        entity_id = await self.get_entity_id()
-        service = await self._get_service()
-        return await service.get_gmail_messages(
-            connected_account_id=connected_account_id,
-            entity_id=entity_id,
-            query=query,
-            max_results=max_results,
-        )
-
-    async def get_gmail_message_detail(
-        self, message_id: str
-    ) -> tuple[dict[str, Any] | None, str | None]:
-        """
-        Get full details of a Gmail message via Composio.
-
-        Args:
-            message_id: Gmail message ID.
-
-        Returns:
-            Tuple of (message details, error message).
-        """
-        connected_account_id = await self.get_connected_account_id()
-        if not connected_account_id:
-            return None, "No connected account ID found"
-
-        entity_id = await self.get_entity_id()
-        service = await self._get_service()
-        return await service.get_gmail_message_detail(
-            connected_account_id=connected_account_id,
-            entity_id=entity_id,
-            message_id=message_id,
-        )
-
-    # ===== Google Calendar Methods =====
-
-    async def list_calendar_events(
-        self,
-        time_min: str | None = None,
-        time_max: str | None = None,
-        max_results: int = 250,
-    ) -> tuple[list[dict[str, Any]], str | None]:
-        """
-        List Google Calendar events via Composio.
-
-        Args:
-            time_min: Start time (RFC3339 format).
-            time_max: End time (RFC3339 format).
-            max_results: Maximum number of events.
-
-        Returns:
-            Tuple of (events list, error message).
-        """
-        connected_account_id = await self.get_connected_account_id()
-        if not connected_account_id:
-            return [], "No connected account ID found"
-
-        entity_id = await self.get_entity_id()
-        service = await self._get_service()
-        return await service.get_calendar_events(
-            connected_account_id=connected_account_id,
-            entity_id=entity_id,
-            time_min=time_min,
-            time_max=time_max,
-            max_results=max_results,
-        )
-
-    # ===== Utility Methods =====
-
-    def format_gmail_message_to_markdown(self, message: dict[str, Any]) -> str:
-        """
-        Format a Gmail message to markdown.
-
-        Args:
-            message: Message object from Composio's GMAIL_FETCH_EMAILS response.
-                    Composio structure: messageId, messageText, messageTimestamp,
-                    payload.headers, labelIds, attachmentList
-
-        Returns:
-            Formatted markdown string.
-        """
-        try:
-            # Composio uses 'messageId' (camelCase)
-            message_id = message.get("messageId", "") or message.get("id", "")
-            label_ids = message.get("labelIds", [])
-
-            # Extract headers from payload
-            payload = message.get("payload", {})
-            headers = payload.get("headers", [])
-
-            # Parse headers into a dict
-            header_dict = {}
-            for header in headers:
-                name = header.get("name", "").lower()
-                value = header.get("value", "")
-                header_dict[name] = value
-
-            # Extract key information
-            subject = header_dict.get("subject", "No Subject")
-            from_email = header_dict.get("from", "Unknown Sender")
-            to_email = header_dict.get("to", "Unknown Recipient")
-            # Composio provides messageTimestamp directly
-            date_str = message.get("messageTimestamp", "") or header_dict.get(
-                "date", "Unknown Date"
-            )
-
-            # Build markdown content
-            markdown_content = f"# {subject}\n\n"
-            markdown_content += f"**From:** {from_email}\n"
-            markdown_content += f"**To:** {to_email}\n"
-            markdown_content += f"**Date:** {date_str}\n"
-
-            if label_ids:
-                markdown_content += f"**Labels:** {', '.join(label_ids)}\n"
-
-            markdown_content += "\n---\n\n"
-
-            # Composio provides full message text in 'messageText'
-            message_text = message.get("messageText", "")
-            if message_text:
-                markdown_content += f"## Content\n\n{message_text}\n\n"
-            else:
-                # Fallback to snippet if no messageText
-                snippet = message.get("snippet", "")
-                if snippet:
-                    markdown_content += f"## Preview\n\n{snippet}\n\n"
-
-            # Add attachment info if present
-            attachments = message.get("attachmentList", [])
-            if attachments:
-                markdown_content += "## Attachments\n\n"
-                for att in attachments:
-                    att_name = att.get("filename", att.get("name", "Unknown"))
-                    markdown_content += f"- {att_name}\n"
-                markdown_content += "\n"
-
-            # Add message metadata
-            markdown_content += "## Message Details\n\n"
-            markdown_content += f"- **Message ID:** {message_id}\n"
-
-            return markdown_content
-
-        except Exception as e:
-            return f"Error formatting message to markdown: {e!s}"
-
-    def format_calendar_event_to_markdown(self, event: dict[str, Any]) -> str:
-        """
-        Format a Google Calendar event to markdown.
-
-        Args:
-            event: Event object from Google Calendar API.
-
-        Returns:
-            Formatted markdown string.
-        """
-        from datetime import datetime
-
-        try:
-            # Extract basic event information
-            summary = event.get("summary", "No Title")
-            description = event.get("description", "")
-            location = event.get("location", "")
-
-            # Extract start and end times
-            start = event.get("start", {})
-            end = event.get("end", {})
-
-            start_time = start.get("dateTime") or start.get("date", "")
-            end_time = end.get("dateTime") or end.get("date", "")
-
-            # Format times for display
-            def format_time(time_str: str) -> str:
-                if not time_str:
-                    return "Unknown"
-                try:
-                    if "T" in time_str:
-                        dt = datetime.fromisoformat(time_str.replace("Z", "+00:00"))
-                        return dt.strftime("%Y-%m-%d %H:%M")
-                    return time_str
-                except Exception:
-                    return time_str
-
-            start_formatted = format_time(start_time)
-            end_formatted = format_time(end_time)
-
-            # Extract attendees
-            attendees = event.get("attendees", [])
-            attendee_list = []
-            for attendee in attendees:
-                email = attendee.get("email", "")
-                display_name = attendee.get("displayName", email)
-                response_status = attendee.get("responseStatus", "")
-                attendee_list.append(f"- {display_name} ({response_status})")
-
-            # Build markdown content
-            markdown_content = f"# {summary}\n\n"
-            markdown_content += f"**Start:** {start_formatted}\n"
-            markdown_content += f"**End:** {end_formatted}\n"
-
-            if location:
-                markdown_content += f"**Location:** {location}\n"
-
-            markdown_content += "\n"
-
-            if description:
-                markdown_content += f"## Description\n\n{description}\n\n"
-
-            if attendee_list:
-                markdown_content += "## Attendees\n\n"
-                markdown_content += "\n".join(attendee_list)
-                markdown_content += "\n\n"
-
-            # Add event metadata
-            markdown_content += "## Event Details\n\n"
-            markdown_content += f"- **Event ID:** {event.get('id', 'Unknown')}\n"
-            markdown_content += f"- **Created:** {event.get('created', 'Unknown')}\n"
-            markdown_content += f"- **Updated:** {event.get('updated', 'Unknown')}\n"
-
-            return markdown_content
-
-        except Exception as e:
-            return f"Error formatting event to markdown: {e!s}"
+    @property
+    def connector_id(self) -> int:
+        """Get the connector ID."""
+        return self._connector_id
--- a/surfsense_backend/app/connectors/composio_gmail_connector.py
+++ b/surfsense_backend/app/connectors/composio_gmail_connector.py
@ -0,0 +1,613 @@
+"""
+Composio Gmail Connector Module.
+
+Provides Gmail specific methods for data retrieval and indexing via Composio.
+"""
+
+import logging
+from datetime import UTC, datetime
+from typing import Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+from sqlalchemy.orm import selectinload
+
+from app.config import config
+from app.connectors.composio_connector import ComposioConnector
+from app.db import Document, DocumentType
+from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
+from app.services.llm_service import get_user_long_context_llm
+from app.services.task_logging_service import TaskLoggingService
+from app.tasks.connector_indexers.base import calculate_date_range
+from app.utils.document_converters import (
+    create_document_chunks,
+    generate_content_hash,
+    generate_document_summary,
+    generate_unique_identifier_hash,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def get_current_timestamp() -> datetime:
+    """Get the current timestamp with timezone for updated_at field."""
+    return datetime.now(UTC)
+
+
+async def check_document_by_unique_identifier(
+    session: AsyncSession, unique_identifier_hash: str
+) -> Document | None:
+    """Check if a document with the given unique identifier hash already exists."""
+    existing_doc_result = await session.execute(
+        select(Document)
+        .options(selectinload(Document.chunks))
+        .where(Document.unique_identifier_hash == unique_identifier_hash)
+    )
+    return existing_doc_result.scalars().first()
+
+
+async def update_connector_last_indexed(
+    session: AsyncSession,
+    connector,
+    update_last_indexed: bool = True,
+) -> None:
+    """Update the last_indexed_at timestamp for a connector."""
+    if update_last_indexed:
+        connector.last_indexed_at = datetime.now(UTC)
+        logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+
+
+class ComposioGmailConnector(ComposioConnector):
+    """
+    Gmail specific Composio connector.
+
+    Provides methods for listing messages, getting message details, and formatting
+    Gmail messages from Gmail via Composio.
+    """
+
+    async def list_gmail_messages(
+        self,
+        query: str = "",
+        max_results: int = 50,
+        page_token: str | None = None,
+    ) -> tuple[list[dict[str, Any]], str | None, int | None, str | None]:
+        """
+        List Gmail messages via Composio with pagination support.
+
+        Args:
+            query: Gmail search query.
+            max_results: Maximum number of messages per page (default: 50).
+            page_token: Optional pagination token for next page.
+
+        Returns:
+            Tuple of (messages list, next_page_token, result_size_estimate, error message).
+        """
+        connected_account_id = await self.get_connected_account_id()
+        if not connected_account_id:
+            return [], None, None, "No connected account ID found"
+
+        entity_id = await self.get_entity_id()
+        service = await self._get_service()
+        return await service.get_gmail_messages(
+            connected_account_id=connected_account_id,
+            entity_id=entity_id,
+            query=query,
+            max_results=max_results,
+            page_token=page_token,
+        )
+
+    async def get_gmail_message_detail(
+        self, message_id: str
+    ) -> tuple[dict[str, Any] | None, str | None]:
+        """
+        Get full details of a Gmail message via Composio.
+
+        Args:
+            message_id: Gmail message ID.
+
+        Returns:
+            Tuple of (message details, error message).
+        """
+        connected_account_id = await self.get_connected_account_id()
+        if not connected_account_id:
+            return None, "No connected account ID found"
+
+        entity_id = await self.get_entity_id()
+        service = await self._get_service()
+        return await service.get_gmail_message_detail(
+            connected_account_id=connected_account_id,
+            entity_id=entity_id,
+            message_id=message_id,
+        )
+
+    def format_gmail_message_to_markdown(self, message: dict[str, Any]) -> str:
+        """
+        Format a Gmail message to markdown.
+
+        Args:
+            message: Message object from Composio's GMAIL_FETCH_EMAILS response.
+                    Composio structure: messageId, messageText, messageTimestamp,
+                    payload.headers, labelIds, attachmentList
+
+        Returns:
+            Formatted markdown string.
+        """
+        try:
+            # Composio uses 'messageId' (camelCase)
+            message_id = message.get("messageId", "") or message.get("id", "")
+            label_ids = message.get("labelIds", [])
+
+            # Extract headers from payload
+            payload = message.get("payload", {})
+            headers = payload.get("headers", [])
+
+            # Parse headers into a dict
+            header_dict = {}
+            for header in headers:
+                name = header.get("name", "").lower()
+                value = header.get("value", "")
+                header_dict[name] = value
+
+            # Extract key information
+            subject = header_dict.get("subject", "No Subject")
+            from_email = header_dict.get("from", "Unknown Sender")
+            to_email = header_dict.get("to", "Unknown Recipient")
+            # Composio provides messageTimestamp directly
+            date_str = message.get("messageTimestamp", "") or header_dict.get(
+                "date", "Unknown Date"
+            )
+
+            # Build markdown content
+            markdown_content = f"# {subject}\n\n"
+            markdown_content += f"**From:** {from_email}\n"
+            markdown_content += f"**To:** {to_email}\n"
+            markdown_content += f"**Date:** {date_str}\n"
+
+            if label_ids:
+                markdown_content += f"**Labels:** {', '.join(label_ids)}\n"
+
+            markdown_content += "\n---\n\n"
+
+            # Composio provides full message text in 'messageText'
+            message_text = message.get("messageText", "")
+            if message_text:
+                markdown_content += f"## Content\n\n{message_text}\n\n"
+            else:
+                # Fallback to snippet if no messageText
+                snippet = message.get("snippet", "")
+                if snippet:
+                    markdown_content += f"## Preview\n\n{snippet}\n\n"
+
+            # Add attachment info if present
+            attachments = message.get("attachmentList", [])
+            if attachments:
+                markdown_content += "## Attachments\n\n"
+                for att in attachments:
+                    att_name = att.get("filename", att.get("name", "Unknown"))
+                    markdown_content += f"- {att_name}\n"
+                markdown_content += "\n"
+
+            # Add message metadata
+            markdown_content += "## Message Details\n\n"
+            markdown_content += f"- **Message ID:** {message_id}\n"
+
+            return markdown_content
+
+        except Exception as e:
+            return f"Error formatting message to markdown: {e!s}"
+
+
+# ============ Indexer Functions ============
+
+
+async def _process_gmail_message_batch(
+    session: AsyncSession,
+    messages: list[dict[str, Any]],
+    composio_connector: ComposioGmailConnector,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    total_documents_indexed: int = 0,
+) -> tuple[int, int]:
+    """
+    Process a batch of Gmail messages and index them.
+
+    Args:
+        total_documents_indexed: Running total of documents indexed so far (for batch commits).
+
+    Returns:
+        Tuple of (documents_indexed, documents_skipped)
+    """
+    documents_indexed = 0
+    documents_skipped = 0
+
+    for message in messages:
+        try:
+            # Composio uses 'messageId' (camelCase), not 'id'
+            message_id = message.get("messageId", "") or message.get("id", "")
+            if not message_id:
+                documents_skipped += 1
+                continue
+
+            # Composio's GMAIL_FETCH_EMAILS already returns full message content
+            # No need for a separate detail API call
+
+            # Extract message info from Composio response
+            # Composio structure: messageId, messageText, messageTimestamp, payload.headers, labelIds
+            payload = message.get("payload", {})
+            headers = payload.get("headers", [])
+
+            subject = "No Subject"
+            sender = "Unknown Sender"
+            date_str = message.get("messageTimestamp", "Unknown Date")
+
+            for header in headers:
+                name = header.get("name", "").lower()
+                value = header.get("value", "")
+                if name == "subject":
+                    subject = value
+                elif name == "from":
+                    sender = value
+                elif name == "date":
+                    date_str = value
+
+            # Format to markdown using the full message data
+            markdown_content = composio_connector.format_gmail_message_to_markdown(
+                message
+            )
+
+            # Check for empty content (defensive parsing per Composio best practices)
+            if not markdown_content.strip():
+                logger.warning(f"Skipping Gmail message with no content: {subject}")
+                documents_skipped += 1
+                continue
+
+            # Generate unique identifier
+            document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"])
+            unique_identifier_hash = generate_unique_identifier_hash(
+                document_type, f"gmail_{message_id}", search_space_id
+            )
+
+            content_hash = generate_content_hash(markdown_content, search_space_id)
+
+            existing_document = await check_document_by_unique_identifier(
+                session, unique_identifier_hash
+            )
+
+            # Get label IDs from Composio response
+            label_ids = message.get("labelIds", [])
+            # Extract thread_id if available (for consistency with non-Composio implementation)
+            thread_id = message.get("threadId", "") or message.get("thread_id", "")
+
+            if existing_document:
+                if existing_document.content_hash == content_hash:
+                    documents_skipped += 1
+                    continue
+
+                # Update existing
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata = {
+                        "message_id": message_id,
+                        "thread_id": thread_id,
+                        "subject": subject,
+                        "sender": sender,
+                        "document_type": "Gmail Message (Composio)",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        markdown_content, user_llm, document_metadata
+                    )
+                else:
+                    summary_content = (
+                        f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
+                    )
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(markdown_content)
+
+                existing_document.title = f"Gmail: {subject}"
+                existing_document.content = summary_content
+                existing_document.content_hash = content_hash
+                existing_document.embedding = summary_embedding
+                existing_document.document_metadata = {
+                    "message_id": message_id,
+                    "thread_id": thread_id,
+                    "subject": subject,
+                    "sender": sender,
+                    "date": date_str,
+                    "labels": label_ids,
+                    "connector_id": connector_id,
+                    "source": "composio",
+                }
+                existing_document.chunks = chunks
+                existing_document.updated_at = get_current_timestamp()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents
+                current_total = total_documents_indexed + documents_indexed
+                if current_total % 10 == 0:
+                    logger.info(
+                        f"Committing batch: {current_total} Gmail messages processed so far"
+                    )
+                    await session.commit()
+                continue
+
+            # Create new document
+            user_llm = await get_user_long_context_llm(
+                session, user_id, search_space_id
+            )
+
+            if user_llm:
+                document_metadata = {
+                    "message_id": message_id,
+                    "thread_id": thread_id,
+                    "subject": subject,
+                    "sender": sender,
+                    "document_type": "Gmail Message (Composio)",
+                }
+                summary_content, summary_embedding = await generate_document_summary(
+                    markdown_content, user_llm, document_metadata
+                )
+            else:
+                summary_content = (
+                    f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
+                )
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+            chunks = await create_document_chunks(markdown_content)
+
+            document = Document(
+                search_space_id=search_space_id,
+                title=f"Gmail: {subject}",
+                document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]),
+                document_metadata={
+                    "message_id": message_id,
+                    "thread_id": thread_id,
+                    "subject": subject,
+                    "sender": sender,
+                    "date": date_str,
+                    "labels": label_ids,
+                    "connector_id": connector_id,
+                    "toolkit_id": "gmail",
+                    "source": "composio",
+                },
+                content=summary_content,
+                content_hash=content_hash,
+                unique_identifier_hash=unique_identifier_hash,
+                embedding=summary_embedding,
+                chunks=chunks,
+                updated_at=get_current_timestamp(),
+            )
+            session.add(document)
+            documents_indexed += 1
+
+            # Batch commit every 10 documents
+            current_total = total_documents_indexed + documents_indexed
+            if current_total % 10 == 0:
+                logger.info(
+                    f"Committing batch: {current_total} Gmail messages processed so far"
+                )
+                await session.commit()
+
+        except Exception as e:
+            logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
+            documents_skipped += 1
+            # Rollback on error to avoid partial state (per Composio best practices)
+            try:
+                await session.rollback()
+            except Exception as rollback_error:
+                logger.error(
+                    f"Error during rollback: {rollback_error!s}", exc_info=True
+                )
+            continue
+
+    return documents_indexed, documents_skipped
+
+
+async def index_composio_gmail(
+    session: AsyncSession,
+    connector,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None,
+    end_date: str | None,
+    task_logger: TaskLoggingService,
+    log_entry,
+    update_last_indexed: bool = True,
+    max_items: int = 1000,
+) -> tuple[int, str]:
+    """Index Gmail messages via Composio with pagination and incremental processing."""
+    try:
+        composio_connector = ComposioGmailConnector(session, connector_id)
+
+        # Normalize date values - handle "undefined" strings from frontend
+        if start_date == "undefined" or start_date == "":
+            start_date = None
+        if end_date == "undefined" or end_date == "":
+            end_date = None
+
+        # Use provided dates directly if both are provided, otherwise calculate from last_indexed_at
+        # This ensures user-selected dates are respected (matching non-Composio Gmail connector behavior)
+        if start_date is not None and end_date is not None:
+            # User provided both dates - use them directly
+            start_date_str = start_date
+            end_date_str = end_date
+        else:
+            # Calculate date range with defaults (uses last_indexed_at or 365 days back)
+            # This ensures indexing works even when user doesn't specify dates
+            start_date_str, end_date_str = calculate_date_range(
+                connector, start_date, end_date, default_days_back=365
+            )
+
+        # Build query with date range
+        query_parts = []
+        if start_date_str:
+            query_parts.append(f"after:{start_date_str.replace('-', '/')}")
+        if end_date_str:
+            query_parts.append(f"before:{end_date_str.replace('-', '/')}")
+        query = " ".join(query_parts) if query_parts else ""
+
+        logger.info(
+            f"Gmail query for connector {connector_id}: '{query}' "
+            f"(start_date={start_date_str}, end_date={end_date_str})"
+        )
+
+        # Use smaller batch size to avoid 413 payload too large errors
+        batch_size = 50
+        page_token = None
+        total_documents_indexed = 0
+        total_documents_skipped = 0
+        total_messages_fetched = 0
+        result_size_estimate = None  # Will be set from first API response
+
+        while total_messages_fetched < max_items:
+            # Calculate how many messages to fetch in this batch
+            remaining = max_items - total_messages_fetched
+            current_batch_size = min(batch_size, remaining)
+
+            # Use result_size_estimate if available, otherwise fall back to max_items
+            estimated_total = (
+                result_size_estimate if result_size_estimate is not None else max_items
+            )
+            # Cap estimated_total at max_items to avoid showing misleading progress
+            estimated_total = min(estimated_total, max_items)
+
+            await task_logger.log_task_progress(
+                log_entry,
+                f"Fetching Gmail messages batch via Composio for connector {connector_id} "
+                f"({total_messages_fetched}/{estimated_total} fetched, {total_documents_indexed} indexed)",
+                {
+                    "stage": "fetching_messages",
+                    "batch_size": current_batch_size,
+                    "total_fetched": total_messages_fetched,
+                    "total_indexed": total_documents_indexed,
+                    "estimated_total": estimated_total,
+                },
+            )
+
+            # Fetch batch of messages
+            (
+                messages,
+                next_token,
+                result_size_estimate_batch,
+                error,
+            ) = await composio_connector.list_gmail_messages(
+                query=query,
+                max_results=current_batch_size,
+                page_token=page_token,
+            )
+
+            if error:
+                await task_logger.log_task_failure(
+                    log_entry, f"Failed to fetch Gmail messages: {error}", {}
+                )
+                return 0, f"Failed to fetch Gmail messages: {error}"
+
+            if not messages:
+                # No more messages available
+                break
+
+            # Update result_size_estimate from first response (Gmail provides this estimate)
+            if result_size_estimate is None and result_size_estimate_batch is not None:
+                result_size_estimate = result_size_estimate_batch
+                logger.info(
+                    f"Gmail API estimated {result_size_estimate} total messages for query: '{query}'"
+                )
+
+            total_messages_fetched += len(messages)
+            # Recalculate estimated_total after potentially updating result_size_estimate
+            estimated_total = (
+                result_size_estimate if result_size_estimate is not None else max_items
+            )
+            estimated_total = min(estimated_total, max_items)
+
+            logger.info(
+                f"Fetched batch of {len(messages)} Gmail messages "
+                f"(total: {total_messages_fetched}/{estimated_total})"
+            )
+
+            # Process batch incrementally
+            batch_indexed, batch_skipped = await _process_gmail_message_batch(
+                session=session,
+                messages=messages,
+                composio_connector=composio_connector,
+                connector_id=connector_id,
+                search_space_id=search_space_id,
+                user_id=user_id,
+                total_documents_indexed=total_documents_indexed,
+            )
+
+            total_documents_indexed += batch_indexed
+            total_documents_skipped += batch_skipped
+
+            logger.info(
+                f"Processed batch: {batch_indexed} indexed, {batch_skipped} skipped "
+                f"(total: {total_documents_indexed} indexed, {total_documents_skipped} skipped)"
+            )
+
+            # Batch commits happen in _process_gmail_message_batch every 10 documents
+            # This ensures progress is saved incrementally, preventing data loss on crashes
+
+            # Check if we should continue
+            if not next_token:
+                # No more pages available
+                break
+
+            if len(messages) < current_batch_size:
+                # Last page had fewer items than requested, we're done
+                break
+
+            # Continue with next page
+            page_token = next_token
+
+        if total_messages_fetched == 0:
+            success_msg = "No Gmail messages found in the specified date range"
+            await task_logger.log_task_success(
+                log_entry, success_msg, {"messages_count": 0}
+            )
+            # CRITICAL: Update timestamp even when no messages found so Electric SQL syncs and UI shows indexed status
+            await update_connector_last_indexed(session, connector, update_last_indexed)
+            await session.commit()
+            return 0, None  # Return None (not error) when no items found
+
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        await update_connector_last_indexed(session, connector, update_last_indexed)
+
+        # Final commit to ensure all documents are persisted (safety net)
+        # This matches the pattern used in non-Composio Gmail indexer
+        logger.info(
+            f"Final commit: Total {total_documents_indexed} Gmail messages processed"
+        )
+        await session.commit()
+        logger.info(
+            "Successfully committed all Composio Gmail document changes to database"
+        )
+
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed Gmail indexing via Composio for connector {connector_id}",
+            {
+                "documents_indexed": total_documents_indexed,
+                "documents_skipped": total_documents_skipped,
+                "messages_fetched": total_messages_fetched,
+            },
+        )
+
+        return total_documents_indexed, None
+
+    except Exception as e:
+        logger.error(f"Failed to index Gmail via Composio: {e!s}", exc_info=True)
+        return 0, f"Failed to index Gmail via Composio: {e!s}"
--- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@ -0,0 +1,502 @@
+"""
+Composio Google Calendar Connector Module.
+
+Provides Google Calendar specific methods for data retrieval and indexing via Composio.
+"""
+
+import logging
+from datetime import UTC, datetime
+from typing import Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+from sqlalchemy.orm import selectinload
+
+from app.config import config
+from app.connectors.composio_connector import ComposioConnector
+from app.db import Document, DocumentType
+from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
+from app.services.llm_service import get_user_long_context_llm
+from app.services.task_logging_service import TaskLoggingService
+from app.tasks.connector_indexers.base import (
+    calculate_date_range,
+    check_duplicate_document_by_hash,
+)
+from app.utils.document_converters import (
+    create_document_chunks,
+    generate_content_hash,
+    generate_document_summary,
+    generate_unique_identifier_hash,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def get_current_timestamp() -> datetime:
+    """Get the current timestamp with timezone for updated_at field."""
+    return datetime.now(UTC)
+
+
+async def check_document_by_unique_identifier(
+    session: AsyncSession, unique_identifier_hash: str
+) -> Document | None:
+    """Check if a document with the given unique identifier hash already exists."""
+    existing_doc_result = await session.execute(
+        select(Document)
+        .options(selectinload(Document.chunks))
+        .where(Document.unique_identifier_hash == unique_identifier_hash)
+    )
+    return existing_doc_result.scalars().first()
+
+
+async def update_connector_last_indexed(
+    session: AsyncSession,
+    connector,
+    update_last_indexed: bool = True,
+) -> None:
+    """Update the last_indexed_at timestamp for a connector."""
+    if update_last_indexed:
+        connector.last_indexed_at = datetime.now(UTC)
+        logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+
+
+class ComposioGoogleCalendarConnector(ComposioConnector):
+    """
+    Google Calendar specific Composio connector.
+
+    Provides methods for listing calendar events and formatting them from
+    Google Calendar via Composio.
+    """
+
+    async def list_calendar_events(
+        self,
+        time_min: str | None = None,
+        time_max: str | None = None,
+        max_results: int = 250,
+    ) -> tuple[list[dict[str, Any]], str | None]:
+        """
+        List Google Calendar events via Composio.
+
+        Args:
+            time_min: Start time (RFC3339 format).
+            time_max: End time (RFC3339 format).
+            max_results: Maximum number of events.
+
+        Returns:
+            Tuple of (events list, error message).
+        """
+        connected_account_id = await self.get_connected_account_id()
+        if not connected_account_id:
+            return [], "No connected account ID found"
+
+        entity_id = await self.get_entity_id()
+        service = await self._get_service()
+        return await service.get_calendar_events(
+            connected_account_id=connected_account_id,
+            entity_id=entity_id,
+            time_min=time_min,
+            time_max=time_max,
+            max_results=max_results,
+        )
+
+    def format_calendar_event_to_markdown(self, event: dict[str, Any]) -> str:
+        """
+        Format a Google Calendar event to markdown.
+
+        Args:
+            event: Event object from Google Calendar API.
+
+        Returns:
+            Formatted markdown string.
+        """
+        try:
+            # Extract basic event information
+            summary = event.get("summary", "No Title")
+            description = event.get("description", "")
+            location = event.get("location", "")
+
+            # Extract start and end times
+            start = event.get("start", {})
+            end = event.get("end", {})
+
+            start_time = start.get("dateTime") or start.get("date", "")
+            end_time = end.get("dateTime") or end.get("date", "")
+
+            # Format times for display
+            def format_time(time_str: str) -> str:
+                if not time_str:
+                    return "Unknown"
+                try:
+                    if "T" in time_str:
+                        dt = datetime.fromisoformat(time_str.replace("Z", "+00:00"))
+                        return dt.strftime("%Y-%m-%d %H:%M")
+                    return time_str
+                except Exception:
+                    return time_str
+
+            start_formatted = format_time(start_time)
+            end_formatted = format_time(end_time)
+
+            # Extract attendees
+            attendees = event.get("attendees", [])
+            attendee_list = []
+            for attendee in attendees:
+                email = attendee.get("email", "")
+                display_name = attendee.get("displayName", email)
+                response_status = attendee.get("responseStatus", "")
+                attendee_list.append(f"- {display_name} ({response_status})")
+
+            # Build markdown content
+            markdown_content = f"# {summary}\n\n"
+            markdown_content += f"**Start:** {start_formatted}\n"
+            markdown_content += f"**End:** {end_formatted}\n"
+
+            if location:
+                markdown_content += f"**Location:** {location}\n"
+
+            markdown_content += "\n"
+
+            if description:
+                markdown_content += f"## Description\n\n{description}\n\n"
+
+            if attendee_list:
+                markdown_content += "## Attendees\n\n"
+                markdown_content += "\n".join(attendee_list)
+                markdown_content += "\n\n"
+
+            # Add event metadata
+            markdown_content += "## Event Details\n\n"
+            markdown_content += f"- **Event ID:** {event.get('id', 'Unknown')}\n"
+            markdown_content += f"- **Created:** {event.get('created', 'Unknown')}\n"
+            markdown_content += f"- **Updated:** {event.get('updated', 'Unknown')}\n"
+
+            return markdown_content
+
+        except Exception as e:
+            return f"Error formatting event to markdown: {e!s}"
+
+
+# ============ Indexer Functions ============
+
+
+async def index_composio_google_calendar(
+    session: AsyncSession,
+    connector,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None,
+    end_date: str | None,
+    task_logger: TaskLoggingService,
+    log_entry,
+    update_last_indexed: bool = True,
+    max_items: int = 2500,
+) -> tuple[int, str]:
+    """Index Google Calendar events via Composio."""
+    try:
+        composio_connector = ComposioGoogleCalendarConnector(session, connector_id)
+
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Fetching Google Calendar events via Composio for connector {connector_id}",
+            {"stage": "fetching_events"},
+        )
+
+        # Normalize date values - handle "undefined" strings from frontend
+        if start_date == "undefined" or start_date == "":
+            start_date = None
+        if end_date == "undefined" or end_date == "":
+            end_date = None
+
+        # Use provided dates directly if both are provided, otherwise calculate from last_indexed_at
+        # This ensures user-selected dates are respected (matching non-Composio Calendar connector behavior)
+        if start_date is not None and end_date is not None:
+            # User provided both dates - use them directly
+            start_date_str = start_date
+            end_date_str = end_date
+        else:
+            # Calculate date range with defaults (uses last_indexed_at or 365 days back)
+            # This ensures indexing works even when user doesn't specify dates
+            start_date_str, end_date_str = calculate_date_range(
+                connector, start_date, end_date, default_days_back=365
+            )
+
+        # Build time range for API call
+        time_min = f"{start_date_str}T00:00:00Z"
+        time_max = f"{end_date_str}T23:59:59Z"
+
+        logger.info(
+            f"Google Calendar query for connector {connector_id}: "
+            f"(start_date={start_date_str}, end_date={end_date_str})"
+        )
+
+        events, error = await composio_connector.list_calendar_events(
+            time_min=time_min,
+            time_max=time_max,
+            max_results=max_items,
+        )
+
+        if error:
+            await task_logger.log_task_failure(
+                log_entry, f"Failed to fetch Calendar events: {error}", {}
+            )
+            return 0, f"Failed to fetch Calendar events: {error}"
+
+        if not events:
+            success_msg = "No Google Calendar events found in the specified date range"
+            await task_logger.log_task_success(
+                log_entry, success_msg, {"events_count": 0}
+            )
+            # CRITICAL: Update timestamp even when no events found so Electric SQL syncs and UI shows indexed status
+            await update_connector_last_indexed(session, connector, update_last_indexed)
+            await session.commit()
+            return (
+                0,
+                None,
+            )  # Return None (not error) when no items found - this is success with 0 items
+
+        logger.info(f"Found {len(events)} Google Calendar events to index via Composio")
+
+        documents_indexed = 0
+        documents_skipped = 0
+        duplicate_content_count = (
+            0  # Track events skipped due to duplicate content_hash
+        )
+
+        for event in events:
+            try:
+                # Handle both standard Google API and potential Composio variations
+                event_id = event.get("id", "") or event.get("eventId", "")
+                summary = (
+                    event.get("summary", "") or event.get("title", "") or "No Title"
+                )
+
+                if not event_id:
+                    documents_skipped += 1
+                    continue
+
+                # Format to markdown
+                markdown_content = composio_connector.format_calendar_event_to_markdown(
+                    event
+                )
+
+                # Generate unique identifier
+                document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"])
+                unique_identifier_hash = generate_unique_identifier_hash(
+                    document_type, f"calendar_{event_id}", search_space_id
+                )
+
+                content_hash = generate_content_hash(markdown_content, search_space_id)
+
+                existing_document = await check_document_by_unique_identifier(
+                    session, unique_identifier_hash
+                )
+
+                # Extract event times
+                start = event.get("start", {})
+                end = event.get("end", {})
+                start_time = start.get("dateTime") or start.get("date", "")
+                end_time = end.get("dateTime") or end.get("date", "")
+                location = event.get("location", "")
+
+                if existing_document:
+                    if existing_document.content_hash == content_hash:
+                        documents_skipped += 1
+                        continue
+
+                    # Update existing
+                    user_llm = await get_user_long_context_llm(
+                        session, user_id, search_space_id
+                    )
+
+                    if user_llm:
+                        document_metadata = {
+                            "event_id": event_id,
+                            "summary": summary,
+                            "start_time": start_time,
+                            "document_type": "Google Calendar Event (Composio)",
+                        }
+                        (
+                            summary_content,
+                            summary_embedding,
+                        ) = await generate_document_summary(
+                            markdown_content, user_llm, document_metadata
+                        )
+                    else:
+                        summary_content = f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
+                        if location:
+                            summary_content += f"\nLocation: {location}"
+                        summary_embedding = config.embedding_model_instance.embed(
+                            summary_content
+                        )
+
+                    chunks = await create_document_chunks(markdown_content)
+
+                    existing_document.title = f"Calendar: {summary}"
+                    existing_document.content = summary_content
+                    existing_document.content_hash = content_hash
+                    existing_document.embedding = summary_embedding
+                    existing_document.document_metadata = {
+                        "event_id": event_id,
+                        "summary": summary,
+                        "start_time": start_time,
+                        "end_time": end_time,
+                        "location": location,
+                        "connector_id": connector_id,
+                        "source": "composio",
+                    }
+                    existing_document.chunks = chunks
+                    existing_document.updated_at = get_current_timestamp()
+
+                    documents_indexed += 1
+
+                    # Batch commit every 10 documents
+                    if documents_indexed % 10 == 0:
+                        logger.info(
+                            f"Committing batch: {documents_indexed} Google Calendar events processed so far"
+                        )
+                        await session.commit()
+                    continue
+
+                # Document doesn't exist by unique_identifier_hash
+                # Check if a document with the same content_hash exists (from standard connector)
+                with session.no_autoflush:
+                    duplicate_by_content = await check_duplicate_document_by_hash(
+                        session, content_hash
+                    )
+
+                if duplicate_by_content:
+                    # A document with the same content already exists (likely from standard connector)
+                    logger.info(
+                        f"Event {summary} already indexed by another connector "
+                        f"(existing document ID: {duplicate_by_content.id}, "
+                        f"type: {duplicate_by_content.document_type}). Skipping to avoid duplicate content."
+                    )
+                    duplicate_content_count += 1
+                    documents_skipped += 1
+                    continue
+
+                # Create new document
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata = {
+                        "event_id": event_id,
+                        "summary": summary,
+                        "start_time": start_time,
+                        "document_type": "Google Calendar Event (Composio)",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        markdown_content, user_llm, document_metadata
+                    )
+                else:
+                    summary_content = (
+                        f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
+                    )
+                    if location:
+                        summary_content += f"\nLocation: {location}"
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(markdown_content)
+
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=f"Calendar: {summary}",
+                    document_type=DocumentType(
+                        TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"]
+                    ),
+                    document_metadata={
+                        "event_id": event_id,
+                        "summary": summary,
+                        "start_time": start_time,
+                        "end_time": end_time,
+                        "location": location,
+                        "connector_id": connector_id,
+                        "toolkit_id": "googlecalendar",
+                        "source": "composio",
+                    },
+                    content=summary_content,
+                    content_hash=content_hash,
+                    unique_identifier_hash=unique_identifier_hash,
+                    embedding=summary_embedding,
+                    chunks=chunks,
+                    updated_at=get_current_timestamp(),
+                )
+                session.add(document)
+                documents_indexed += 1
+
+                # Batch commit every 10 documents
+                if documents_indexed % 10 == 0:
+                    logger.info(
+                        f"Committing batch: {documents_indexed} Google Calendar events processed so far"
+                    )
+                    await session.commit()
+
+            except Exception as e:
+                logger.error(f"Error processing Calendar event: {e!s}", exc_info=True)
+                documents_skipped += 1
+                continue
+
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        await update_connector_last_indexed(session, connector, update_last_indexed)
+
+        # Final commit to ensure all documents are persisted (safety net)
+        # This matches the pattern used in non-Composio Gmail indexer
+        logger.info(
+            f"Final commit: Total {documents_indexed} Google Calendar events processed"
+        )
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Composio Google Calendar document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same event was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if duplicates were found
+        warning_message = None
+        if duplicate_content_count > 0:
+            warning_message = f"{duplicate_content_count} skipped (duplicate)"
+
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed Google Calendar indexing via Composio for connector {connector_id}",
+            {
+                "documents_indexed": documents_indexed,
+                "documents_skipped": documents_skipped,
+                "duplicate_content_count": duplicate_content_count,
+            },
+        )
+
+        logger.info(
+            f"Composio Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped "
+            f"({duplicate_content_count} due to duplicate content from other connectors)"
+        )
+        return documents_indexed, warning_message
+
+    except Exception as e:
+        logger.error(
+            f"Failed to index Google Calendar via Composio: {e!s}", exc_info=True
+        )
+        return 0, f"Failed to index Google Calendar via Composio: {e!s}"
--- a/surfsense_backend/app/connectors/composio_google_drive_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py
--- a/surfsense_backend/app/connectors/google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/google_calendar_connector.py
@ -142,6 +142,15 @@ class GoogleCalendarConnector:
                    flag_modified(connector, "config")
                    await self._session.commit()
            except Exception as e:
+                error_str = str(e)
+                # Check if this is an invalid_grant error (token expired/revoked)
+                if (
+                    "invalid_grant" in error_str.lower()
+                    or "token has been expired or revoked" in error_str.lower()
+                ):
+                    raise Exception(
+                        "Google Calendar authentication failed. Please re-authenticate."
+                    ) from e
                raise Exception(
                    f"Failed to refresh Google OAuth credentials: {e!s}"
                ) from e
@ -165,6 +174,14 @@ class GoogleCalendarConnector:
            self.service = build("calendar", "v3", credentials=credentials)
            return self.service
        except Exception as e:
+            error_str = str(e)
+            # If the error already contains a user-friendly re-authentication message, preserve it
+            if (
+                "re-authenticate" in error_str.lower()
+                or "expired or been revoked" in error_str.lower()
+                or "authentication failed" in error_str.lower()
+            ):
+                raise Exception(error_str) from e
            raise Exception(f"Failed to create Google Calendar service: {e!s}") from e

    async def get_calendars(self) -> tuple[list[dict[str, Any]], str | None]:
@ -271,6 +288,14 @@ class GoogleCalendarConnector:
            return events, None

        except Exception as e:
+            error_str = str(e)
+            # If the error already contains a user-friendly re-authentication message, preserve it
+            if (
+                "re-authenticate" in error_str.lower()
+                or "expired or been revoked" in error_str.lower()
+                or "authentication failed" in error_str.lower()
+            ):
+                return [], error_str
            return [], f"Error fetching events: {e!s}"

    def format_event_to_markdown(self, event: dict[str, Any]) -> str:
--- a/surfsense_backend/app/connectors/google_gmail_connector.py
+++ b/surfsense_backend/app/connectors/google_gmail_connector.py
@ -141,6 +141,15 @@ class GoogleGmailConnector:
                    flag_modified(connector, "config")
                    await self._session.commit()
            except Exception as e:
+                error_str = str(e)
+                # Check if this is an invalid_grant error (token expired/revoked)
+                if (
+                    "invalid_grant" in error_str.lower()
+                    or "token has been expired or revoked" in error_str.lower()
+                ):
+                    raise Exception(
+                        "Gmail authentication failed. Please re-authenticate."
+                    ) from e
                raise Exception(
                    f"Failed to refresh Google OAuth credentials: {e!s}"
                ) from e
@ -164,6 +173,14 @@ class GoogleGmailConnector:
            self.service = build("gmail", "v1", credentials=credentials)
            return self.service
        except Exception as e:
+            error_str = str(e)
+            # If the error already contains a user-friendly re-authentication message, preserve it
+            if (
+                "re-authenticate" in error_str.lower()
+                or "expired or been revoked" in error_str.lower()
+                or "authentication failed" in error_str.lower()
+            ):
+                raise Exception(error_str) from e
            raise Exception(f"Failed to create Gmail service: {e!s}") from e

    async def get_user_profile(self) -> tuple[dict[str, Any], str | None]:
@ -225,6 +242,14 @@ class GoogleGmailConnector:
            return messages, None

        except Exception as e:
+            error_str = str(e)
+            # If the error already contains a user-friendly re-authentication message, preserve it
+            if (
+                "re-authenticate" in error_str.lower()
+                or "expired or been revoked" in error_str.lower()
+                or "authentication failed" in error_str.lower()
+            ):
+                return [], error_str
            return [], f"Error fetching messages list: {e!s}"

    async def get_message_details(
@ -271,6 +296,13 @@ class GoogleGmailConnector:
        try:
            from datetime import datetime, timedelta

+            # Normalize date values - handle "undefined" strings from frontend
+            # This prevents "time data 'undefined' does not match format" errors
+            if start_date == "undefined" or start_date == "":
+                start_date = None
+            if end_date == "undefined" or end_date == "":
+                end_date = None
+
            # Build date query
            query_parts = []

--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -55,7 +55,9 @@ class DocumentType(str, Enum):
    CIRCLEBACK = "CIRCLEBACK"
    OBSIDIAN_CONNECTOR = "OBSIDIAN_CONNECTOR"
    NOTE = "NOTE"
-    COMPOSIO_CONNECTOR = "COMPOSIO_CONNECTOR"  # Generic Composio integration
+    COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"
+    COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR"
+    COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"


 class SearchSourceConnectorType(str, Enum):
@ -86,9 +88,9 @@ class SearchSourceConnectorType(str, Enum):
        "OBSIDIAN_CONNECTOR"  # Self-hosted only - Local Obsidian vault indexing
    )
    MCP_CONNECTOR = "MCP_CONNECTOR"  # Model Context Protocol - User-defined API tools
-    COMPOSIO_CONNECTOR = (
-        "COMPOSIO_CONNECTOR"  # Generic Composio integration (Google, Slack, etc.)
-    )
+    COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"
+    COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR"
+    COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"


 class LiteLLMProvider(str, Enum):
@ -142,6 +144,43 @@ class LogStatus(str, Enum):
    FAILED = "FAILED"


+class IncentiveTaskType(str, Enum):
+    """
+    Enum for incentive task types that users can complete to earn free pages.
+    Each task can only be completed once per user.
+
+    When adding new tasks:
+    1. Add a new enum value here
+    2. Add the task configuration to INCENTIVE_TASKS_CONFIG below
+    3. Create an Alembic migration to add the enum value to PostgreSQL
+    """
+
+    GITHUB_STAR = "GITHUB_STAR"
+    # Future tasks can be added here:
+    # GITHUB_ISSUE = "GITHUB_ISSUE"
+    # SOCIAL_SHARE = "SOCIAL_SHARE"
+    # REFER_FRIEND = "REFER_FRIEND"
+
+
+# Centralized configuration for incentive tasks
+# This makes it easy to add new tasks without changing code in multiple places
+INCENTIVE_TASKS_CONFIG = {
+    IncentiveTaskType.GITHUB_STAR: {
+        "title": "Star our GitHub repository",
+        "description": "Show your support by starring SurfSense on GitHub",
+        "pages_reward": 100,
+        "action_url": "https://github.com/MODSetter/SurfSense",
+    },
+    # Future tasks can be configured here:
+    # IncentiveTaskType.GITHUB_ISSUE: {
+    #     "title": "Create an issue",
+    #     "description": "Help improve SurfSense by reporting bugs or suggesting features",
+    #     "pages_reward": 50,
+    #     "action_url": "https://github.com/MODSetter/SurfSense/issues/new/choose",
+    # },
+}
+
+
 class Permission(str, Enum):
    """
    Granular permissions for search space resources.
@ -936,6 +975,39 @@ class Notification(BaseModel, TimestampMixin):
    search_space = relationship("SearchSpace", back_populates="notifications")


+class UserIncentiveTask(BaseModel, TimestampMixin):
+    """
+    Tracks completed incentive tasks for users.
+    Each user can only complete each task type once.
+    When a task is completed, the user's pages_limit is increased.
+    """
+
+    __tablename__ = "user_incentive_tasks"
+    __table_args__ = (
+        UniqueConstraint(
+            "user_id",
+            "task_type",
+            name="uq_user_incentive_task",
+        ),
+    )
+
+    user_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("user.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    task_type = Column(SQLAlchemyEnum(IncentiveTaskType), nullable=False, index=True)
+    pages_awarded = Column(Integer, nullable=False)
+    completed_at = Column(
+        TIMESTAMP(timezone=True),
+        nullable=False,
+        default=lambda: datetime.now(UTC),
+    )
+
+    user = relationship("User", back_populates="incentive_tasks")
+
+
 class SearchSpaceRole(BaseModel, TimestampMixin):
    """
    Custom roles that can be defined per search space.
@ -1114,6 +1186,13 @@ if config.AUTH_TYPE == "GOOGLE":
            cascade="all, delete-orphan",
        )

+        # Incentive tasks completed by this user
+        incentive_tasks = relationship(
+            "UserIncentiveTask",
+            back_populates="user",
+            cascade="all, delete-orphan",
+        )
+
        # Page usage tracking for ETL services
        pages_limit = Column(
            Integer,
@ -1165,6 +1244,13 @@ else:
            cascade="all, delete-orphan",
        )

+        # Incentive tasks completed by this user
+        incentive_tasks = relationship(
+            "UserIncentiveTask",
+            back_populates="user",
+            cascade="all, delete-orphan",
+        )
+
        # Page usage tracking for ETL services
        pages_limit = Column(
            Integer,
--- a/surfsense_backend/app/routes/init.py
+++ b/surfsense_backend/app/routes/init.py
@ -20,6 +20,7 @@ from .google_drive_add_connector_route import (
 from .google_gmail_add_connector_route import (
    router as google_gmail_add_connector_router,
 )
+from .incentive_tasks_routes import router as incentive_tasks_router
 from .jira_add_connector_route import router as jira_add_connector_router
 from .linear_add_connector_route import router as linear_add_connector_router
 from .logs_routes import router as logs_router
@ -69,3 +70,4 @@ router.include_router(surfsense_docs_router)  # Surfsense documentation for cita
 router.include_router(notifications_router)  # Notifications with Electric SQL sync
 router.include_router(composio_router)  # Composio OAuth and toolkit management
 router.include_router(public_chat_router)  # Public chat sharing and cloning
+router.include_router(incentive_tasks_router)  # Incentive tasks for earning free pages
--- a/surfsense_backend/app/routes/composio_routes.py
+++ b/surfsense_backend/app/routes/composio_routes.py
@ -8,16 +8,18 @@ Endpoints:
 - GET /composio/toolkits - List available Composio toolkits
 - GET /auth/composio/connector/add - Initiate OAuth for a specific toolkit
 - GET /auth/composio/connector/callback - Handle OAuth callback
+- GET /connectors/{connector_id}/composio-drive/folders - List folders/files for Composio Google Drive
 """

 import logging
 from uuid import UUID

-from fastapi import APIRouter, Depends, HTTPException, Query
+from fastapi import APIRouter, Depends, HTTPException, Query, Request
 from fastapi.responses import RedirectResponse
 from pydantic import ValidationError
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select

 from app.config import config
 from app.db import (
@ -29,19 +31,31 @@ from app.db import (
 from app.services.composio_service import (
    COMPOSIO_TOOLKIT_NAMES,
    INDEXABLE_TOOLKITS,
+    TOOLKIT_TO_CONNECTOR_TYPE,
    ComposioService,
 )
 from app.users import current_active_user
 from app.utils.connector_naming import (
-    check_duplicate_connector,
-    generate_unique_connector_name,
+    count_connectors_of_type,
+    get_base_name_for_type,
 )
 from app.utils.oauth_security import OAuthStateManager

+# Note: We no longer use check_duplicate_connector for Composio connectors because
+# Composio generates a new connected_account_id each time, even for the same Google account.
+# Instead, we check for existing connectors by type/space/user and update them.
+
 logger = logging.getLogger(__name__)

 router = APIRouter()

+# Map toolkit_id to frontend connector ID
+TOOLKIT_TO_FRONTEND_CONNECTOR_ID = {
+    "googledrive": "composio-googledrive",
+    "gmail": "composio-gmail",
+    "googlecalendar": "composio-googlecalendar",
+}
+
 # Initialize security utilities
 _state_manager = None

@ -166,11 +180,8 @@ async def initiate_composio_auth(

@router.get("/auth/composio/connector/callback")
 async def composio_callback(
+    request: Request,
    state: str | None = None,
-    composio_connected_account_id: str | None = Query(
-        None, alias="connectedAccountId"
-    ),  # Composio sends camelCase
-    connected_account_id: str | None = None,  # Fallback snake_case
    error: str | None = None,
    session: AsyncSession = Depends(get_async_session),
 ):
@ -236,16 +247,17 @@ async def composio_callback(
        )

        # Initialize Composio service
-        ComposioService()
+        service = ComposioService()

-        # Use camelCase param if provided (Composio's format), fallback to snake_case
-        final_connected_account_id = (
-            composio_connected_account_id or connected_account_id
-        )
+        # Extract connected_account_id from query params (accepts both camelCase and snake_case)
+        query_params = request.query_params
+        final_connected_account_id = query_params.get(
+            "connectedAccountId"
+        ) or query_params.get("connected_account_id")

-        # DEBUG: Log all query parameters received
+        # DEBUG: Log query parameter received
        logger.info(
-            f"DEBUG: Callback received - connectedAccountId: {composio_connected_account_id}, connected_account_id: {connected_account_id}, using: {final_connected_account_id}"
+            f"DEBUG: Callback received - connectedAccountId: {query_params.get('connectedAccountId')}, connected_account_id: {query_params.get('connected_account_id')}, using: {final_connected_account_id}"
        )

        # If we still don't have a connected_account_id, warn but continue
@ -268,38 +280,89 @@ async def composio_callback(
            "is_indexable": toolkit_id in INDEXABLE_TOOLKITS,
        }

-        # Check for duplicate connector
-        # For Composio, we use toolkit_id + connected_account_id as unique identifier
-        identifier = final_connected_account_id or f"{toolkit_id}_{user_id}"
+        # Get the specific connector type for this toolkit
+        connector_type_str = TOOLKIT_TO_CONNECTOR_TYPE.get(toolkit_id)
+        if not connector_type_str:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Unknown toolkit: {toolkit_id}. Available: {list(TOOLKIT_TO_CONNECTOR_TYPE.keys())}",
+            )
+        connector_type = SearchSourceConnectorType(connector_type_str)

-        is_duplicate = await check_duplicate_connector(
-            session,
-            SearchSourceConnectorType.COMPOSIO_CONNECTOR,
-            space_id,
-            user_id,
-            identifier,
+        # Check for existing connector of the same type for this user/space
+        # When reconnecting, Composio gives a new connected_account_id, so we need to
+        # check by connector_type, user_id, and search_space_id instead of connected_account_id
+        existing_connector_result = await session.execute(
+            select(SearchSourceConnector).where(
+                SearchSourceConnector.connector_type == connector_type,
+                SearchSourceConnector.search_space_id == space_id,
+                SearchSourceConnector.user_id == user_id,
+            )
        )
-        if is_duplicate:
-            logger.warning(
-                f"Duplicate Composio connector detected for user {user_id} with toolkit {toolkit_id}"
+        existing_connector = existing_connector_result.scalars().first()
+
+        if existing_connector:
+            # Delete the old Composio connected account before updating
+            old_connected_account_id = existing_connector.config.get(
+                "composio_connected_account_id"
+            )
+            if (
+                old_connected_account_id
+                and old_connected_account_id != final_connected_account_id
+            ):
+                try:
+                    deleted = await service.delete_connected_account(
+                        old_connected_account_id
+                    )
+                    if deleted:
+                        logger.info(
+                            f"Deleted old Composio connected account {old_connected_account_id} "
+                            f"before updating connector {existing_connector.id}"
+                        )
+                    else:
+                        logger.warning(
+                            f"Failed to delete old Composio connected account {old_connected_account_id}"
+                        )
+                except Exception as delete_error:
+                    # Log but don't fail - the old account may already be deleted
+                    logger.warning(
+                        f"Error deleting old Composio connected account {old_connected_account_id}: {delete_error!s}"
+                    )
+
+            # Update existing connector with new connected_account_id
+            logger.info(
+                f"Updating existing Composio connector {existing_connector.id} with new connected_account_id {final_connected_account_id}"
+            )
+            existing_connector.config = connector_config
+            await session.commit()
+            await session.refresh(existing_connector)
+
+            # Get the frontend connector ID based on toolkit_id
+            frontend_connector_id = TOOLKIT_TO_FRONTEND_CONNECTOR_ID.get(
+                toolkit_id, "composio-connector"
            )
            return RedirectResponse(
-                url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&error=duplicate_account&connector=composio-connector"
+                url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&success=true&connector={frontend_connector_id}&connectorId={existing_connector.id}"
            )

        try:
-            # Generate a unique, user-friendly connector name
-            connector_name = await generate_unique_connector_name(
-                session,
-                SearchSourceConnectorType.COMPOSIO_CONNECTOR,
-                space_id,
-                user_id,
-                f"{toolkit_name} (Composio)",
+            # Count existing connectors of this type to determine the number
+            count = await count_connectors_of_type(
+                session, connector_type, space_id, user_id
            )

+            # Generate base name (e.g., "Gmail", "Google Drive")
+            base_name = get_base_name_for_type(connector_type)
+
+            # Format: "Gmail (Composio) 1", "Gmail (Composio) 2", etc.
+            if count == 0:
+                connector_name = f"{base_name} (Composio) 1"
+            else:
+                connector_name = f"{base_name} (Composio) {count + 1}"
+
            db_connector = SearchSourceConnector(
                name=connector_name,
-                connector_type=SearchSourceConnectorType.COMPOSIO_CONNECTOR,
+                connector_type=connector_type,
                config=connector_config,
                search_space_id=space_id,
                user_id=user_id,
@ -314,8 +377,12 @@ async def composio_callback(
                f"Successfully created Composio connector {db_connector.id} for user {user_id}, toolkit {toolkit_id}"
            )

+            # Get the frontend connector ID based on toolkit_id
+            frontend_connector_id = TOOLKIT_TO_FRONTEND_CONNECTOR_ID.get(
+                toolkit_id, "composio-connector"
+            )
            return RedirectResponse(
-                url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&success=true&connector=composio-connector&connectorId={db_connector.id}"
+                url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/new-chat?modal=connectors&tab=all&success=true&connector={frontend_connector_id}&connectorId={db_connector.id}"
            )

        except IntegrityError as e:
@ -339,3 +406,136 @@ async def composio_callback(
        raise HTTPException(
            status_code=500, detail=f"Failed to complete Composio OAuth: {e!s}"
        ) from e
+
+
+@router.get("/connectors/{connector_id}/composio-drive/folders")
+async def list_composio_drive_folders(
+    connector_id: int,
+    parent_id: str | None = None,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    List folders AND files in user's Google Drive via Composio with hierarchical support.
+
+    This is called at index time from the manage connector page to display
+    the complete file system (folders and files). Only folders are selectable.
+
+    Args:
+        connector_id: ID of the Composio Google Drive connector
+        parent_id: Optional parent folder ID to list contents (None for root)
+
+    Returns:
+        JSON with list of items: {
+            "items": [
+                {"id": str, "name": str, "mimeType": str, "isFolder": bool, ...},
+                ...
+            ]
+        }
+    """
+    if not ComposioService.is_enabled():
+        raise HTTPException(
+            status_code=503,
+            detail="Composio integration is not enabled.",
+        )
+
+    try:
+        # Get connector and verify ownership
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.user_id == user.id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            raise HTTPException(
+                status_code=404,
+                detail="Composio Google Drive connector not found or access denied",
+            )
+
+        # Get Composio connected account ID from config
+        composio_connected_account_id = connector.config.get(
+            "composio_connected_account_id"
+        )
+        if not composio_connected_account_id:
+            raise HTTPException(
+                status_code=400,
+                detail="Composio connected account not found. Please reconnect the connector.",
+            )
+
+        # Initialize Composio service and fetch files
+        service = ComposioService()
+        entity_id = f"surfsense_{user.id}"
+
+        # Fetch files/folders from Composio Google Drive
+        files, _next_token, error = await service.get_drive_files(
+            connected_account_id=composio_connected_account_id,
+            entity_id=entity_id,
+            folder_id=parent_id,
+            page_size=100,
+        )
+
+        if error:
+            logger.error(f"Failed to list Composio Drive files: {error}")
+            raise HTTPException(
+                status_code=500, detail=f"Failed to list folder contents: {error}"
+            )
+
+        # Transform files to match the expected format with isFolder field
+        items = []
+        for file_info in files:
+            file_id = file_info.get("id", "") or file_info.get("fileId", "")
+            file_name = (
+                file_info.get("name", "") or file_info.get("fileName", "") or "Untitled"
+            )
+            mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
+
+            if not file_id:
+                continue
+
+            is_folder = mime_type == "application/vnd.google-apps.folder"
+
+            items.append(
+                {
+                    "id": file_id,
+                    "name": file_name,
+                    "mimeType": mime_type,
+                    "isFolder": is_folder,
+                    "parents": file_info.get("parents", []),
+                    "size": file_info.get("size"),
+                    "iconLink": file_info.get("iconLink"),
+                }
+            )
+
+        # Sort: folders first, then files, both alphabetically
+        folders = sorted(
+            [item for item in items if item["isFolder"]],
+            key=lambda x: x["name"].lower(),
+        )
+        files_list = sorted(
+            [item for item in items if not item["isFolder"]],
+            key=lambda x: x["name"].lower(),
+        )
+        items = folders + files_list
+
+        folder_count = len(folders)
+        file_count = len(files_list)
+
+        logger.info(
+            f"Listed {len(items)} total items ({folder_count} folders, {file_count} files) for Composio connector {connector_id}"
+            + (f" in folder {parent_id}" if parent_id else " in ROOT")
+        )
+
+        return {"items": items}
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error listing Composio Drive contents: {e!s}", exc_info=True)
+        raise HTTPException(
+            status_code=500, detail=f"Failed to list Drive contents: {e!s}"
+        ) from e
--- a/surfsense_backend/app/routes/google_drive_add_connector_route.py
+++ b/surfsense_backend/app/routes/google_drive_add_connector_route.py
@ -402,7 +402,7 @@ async def list_google_drive_folders(
        file_count = len(items) - folder_count

        logger.info(
-            f"✅ Listed {len(items)} total items ({folder_count} folders, {file_count} files) for connector {connector_id}"
+            f"Listed {len(items)} total items ({folder_count} folders, {file_count} files) for connector {connector_id}"
            + (f" in folder {parent_id}" if parent_id else " in ROOT")
        )

--- a/surfsense_backend/app/routes/incentive_tasks_routes.py
+++ b/surfsense_backend/app/routes/incentive_tasks_routes.py
@ -0,0 +1,131 @@
+"""
+Incentive Tasks API routes.
+Allows users to complete tasks (like starring GitHub repo) to earn free pages.
+Each task can only be completed once per user.
+"""
+
+from fastapi import APIRouter, Depends, HTTPException, status
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import (
+    INCENTIVE_TASKS_CONFIG,
+    IncentiveTaskType,
+    User,
+    UserIncentiveTask,
+    get_async_session,
+)
+from app.schemas.incentive_tasks import (
+    CompleteTaskResponse,
+    IncentiveTaskInfo,
+    IncentiveTasksResponse,
+    TaskAlreadyCompletedResponse,
+)
+from app.users import current_active_user
+
+router = APIRouter(prefix="/incentive-tasks", tags=["incentive-tasks"])
+
+
+@router.get("", response_model=IncentiveTasksResponse)
+async def get_incentive_tasks(
+    user: User = Depends(current_active_user),
+    session: AsyncSession = Depends(get_async_session),
+) -> IncentiveTasksResponse:
+    """
+    Get all available incentive tasks with the user's completion status.
+    """
+    # Get all completed tasks for this user
+    result = await session.execute(
+        select(UserIncentiveTask).where(UserIncentiveTask.user_id == user.id)
+    )
+    completed_tasks = {task.task_type: task for task in result.scalars().all()}
+
+    # Build task list with completion status
+    tasks = []
+    total_pages_earned = 0
+
+    for task_type, config in INCENTIVE_TASKS_CONFIG.items():
+        completed_task = completed_tasks.get(task_type)
+        is_completed = completed_task is not None
+
+        if is_completed:
+            total_pages_earned += completed_task.pages_awarded
+
+        tasks.append(
+            IncentiveTaskInfo(
+                task_type=task_type,
+                title=config["title"],
+                description=config["description"],
+                pages_reward=config["pages_reward"],
+                action_url=config["action_url"],
+                completed=is_completed,
+                completed_at=completed_task.completed_at if completed_task else None,
+            )
+        )
+
+    return IncentiveTasksResponse(
+        tasks=tasks,
+        total_pages_earned=total_pages_earned,
+    )
+
+
+@router.post(
+    "/{task_type}/complete",
+    response_model=CompleteTaskResponse | TaskAlreadyCompletedResponse,
+)
+async def complete_task(
+    task_type: IncentiveTaskType,
+    user: User = Depends(current_active_user),
+    session: AsyncSession = Depends(get_async_session),
+) -> CompleteTaskResponse | TaskAlreadyCompletedResponse:
+    """
+    Mark an incentive task as completed and award pages to the user.
+
+    Each task can only be completed once. If the task was already completed,
+    returns the existing completion information without awarding additional pages.
+    """
+    # Validate task type exists in config
+    task_config = INCENTIVE_TASKS_CONFIG.get(task_type)
+    if not task_config:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Unknown task type: {task_type}",
+        )
+
+    # Check if task was already completed
+    existing_task = await session.execute(
+        select(UserIncentiveTask).where(
+            UserIncentiveTask.user_id == user.id,
+            UserIncentiveTask.task_type == task_type,
+        )
+    )
+    existing = existing_task.scalar_one_or_none()
+
+    if existing:
+        return TaskAlreadyCompletedResponse(
+            success=False,
+            message="Task already completed",
+            completed_at=existing.completed_at,
+        )
+
+    # Create the task completion record
+    pages_reward = task_config["pages_reward"]
+    new_task = UserIncentiveTask(
+        user_id=user.id,
+        task_type=task_type,
+        pages_awarded=pages_reward,
+    )
+    session.add(new_task)
+
+    # Update user's pages_limit
+    user.pages_limit += pages_reward
+
+    await session.commit()
+    await session.refresh(user)
+
+    return CompleteTaskResponse(
+        success=True,
+        message=f"Task completed! You earned {pages_reward} pages.",
+        pages_awarded=pages_reward,
+        new_pages_limit=user.pages_limit,
+    )
--- a/surfsense_backend/app/routes/rbac_routes.py
+++ b/surfsense_backend/app/routes/rbac_routes.py
@ -59,6 +59,58 @@ router = APIRouter()

 # ============ Permissions Endpoints ============

+# Human-readable descriptions for each permission
+PERMISSION_DESCRIPTIONS = {
+    # Documents
+    "documents:create": "Add new documents, files, and content to the search space",
+    "documents:read": "View and search documents in the search space",
+    "documents:update": "Edit existing documents and their metadata",
+    "documents:delete": "Remove documents from the search space",
+    # Chats
+    "chats:create": "Start new AI chat conversations",
+    "chats:read": "View chat history and conversations",
+    "chats:update": "Edit chat titles and settings",
+    "chats:delete": "Delete chat conversations",
+    # Comments
+    "comments:create": "Add comments and annotations to documents",
+    "comments:read": "View comments on documents",
+    "comments:delete": "Remove comments from documents",
+    # LLM Configs
+    "llm_configs:create": "Add new AI model configurations",
+    "llm_configs:read": "View AI model settings and configurations",
+    "llm_configs:update": "Modify AI model configurations",
+    "llm_configs:delete": "Remove AI model configurations",
+    # Podcasts
+    "podcasts:create": "Generate new AI podcasts from content",
+    "podcasts:read": "Listen to and view generated podcasts",
+    "podcasts:update": "Edit podcast settings and metadata",
+    "podcasts:delete": "Remove generated podcasts",
+    # Connectors
+    "connectors:create": "Set up new data source integrations",
+    "connectors:read": "View configured data sources and their status",
+    "connectors:update": "Modify data source configurations",
+    "connectors:delete": "Remove data source integrations",
+    # Logs
+    "logs:read": "View activity logs and audit trail",
+    "logs:delete": "Clear activity logs",
+    # Members
+    "members:invite": "Send invitations to new team members",
+    "members:view": "View the list of team members",
+    "members:remove": "Remove members from the search space",
+    "members:manage_roles": "Assign and change member roles",
+    # Roles
+    "roles:create": "Create new custom roles",
+    "roles:read": "View available roles and their permissions",
+    "roles:update": "Modify role permissions",
+    "roles:delete": "Remove custom roles",
+    # Settings
+    "settings:view": "View search space settings",
+    "settings:update": "Modify search space settings",
+    "settings:delete": "Delete the entire search space",
+    # Full access
+    "*": "Full access to all features and settings",
+}
+

@router.get("/permissions", response_model=PermissionsListResponse)
 async def list_all_permissions(
@ -71,12 +123,14 @@ async def list_all_permissions(
    for perm in Permission:
        # Extract category from permission value (e.g., "documents:read" -> "documents")
        category = perm.value.split(":")[0] if ":" in perm.value else "general"
+        description = PERMISSION_DESCRIPTIONS.get(perm.value, f"Permission for {perm.value}")

        permissions.append(
            PermissionInfo(
                value=perm.value,
                name=perm.name,
                category=category,
+                description=description,
            )
        )

--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@ -22,6 +22,8 @@ import logging
 from datetime import UTC, datetime, timedelta
 from typing import Any

+import pytz
+from dateutil.parser import isoparse
 from fastapi import APIRouter, Body, Depends, HTTPException, Query
 from pydantic import BaseModel, Field, ValidationError
 from sqlalchemy.exc import IntegrityError
@ -47,6 +49,7 @@ from app.schemas import (
    SearchSourceConnectorRead,
    SearchSourceConnectorUpdate,
 )
+from app.services.composio_service import ComposioService
 from app.services.notification_service import NotificationService
 from app.tasks.connector_indexers import (
    index_airtable_records,
@ -529,6 +532,38 @@ async def delete_search_source_connector(
                    f"Failed to delete periodic schedule for connector {connector_id}"
                )

+        # For Composio connectors, also delete the connected account in Composio
+        composio_connector_types = [
+            SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
+            SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
+            SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
+        ]
+        if db_connector.connector_type in composio_connector_types:
+            composio_connected_account_id = db_connector.config.get(
+                "composio_connected_account_id"
+            )
+            if composio_connected_account_id and ComposioService.is_enabled():
+                try:
+                    service = ComposioService()
+                    deleted = await service.delete_connected_account(
+                        composio_connected_account_id
+                    )
+                    if deleted:
+                        logger.info(
+                            f"Successfully deleted Composio connected account {composio_connected_account_id} "
+                            f"for connector {connector_id}"
+                        )
+                    else:
+                        logger.warning(
+                            f"Failed to delete Composio connected account {composio_connected_account_id} "
+                            f"for connector {connector_id}"
+                        )
+                except Exception as composio_error:
+                    # Log but don't fail the deletion - Composio account may already be deleted
+                    logger.warning(
+                        f"Error deleting Composio connected account {composio_connected_account_id}: {composio_error!s}"
+                    )
+
        await session.delete(db_connector)
        await session.commit()
        return {"message": "Search source connector deleted successfully"}
@ -611,32 +646,59 @@ async def index_connector_content(

        # Handle different connector types
        response_message = ""
-        today_str = datetime.now().strftime("%Y-%m-%d")
+        # Use UTC for consistency with last_indexed_at storage
+        today_str = datetime.now(UTC).strftime("%Y-%m-%d")

        # Determine the actual date range to use
        if start_date is None:
            # Use last_indexed_at or default to 365 days ago
            if connector.last_indexed_at:
-                today = datetime.now().date()
-                if connector.last_indexed_at.date() == today:
-                    # If last indexed today, go back 1 day to ensure we don't miss anything
-                    indexing_from = (today - timedelta(days=1)).strftime("%Y-%m-%d")
-                else:
-                    indexing_from = connector.last_indexed_at.strftime("%Y-%m-%d")
-            else:
-                indexing_from = (datetime.now() - timedelta(days=365)).strftime(
-                    "%Y-%m-%d"
+                # Convert last_indexed_at to timezone-naive for comparison (like calculate_date_range does)
+                last_indexed_naive = (
+                    connector.last_indexed_at.replace(tzinfo=None)
+                    if connector.last_indexed_at.tzinfo
+                    else connector.last_indexed_at
                )
+                # Use UTC for "today" to match how last_indexed_at is stored
+                today_utc = datetime.now(UTC).replace(tzinfo=None).date()
+                last_indexed_date = last_indexed_naive.date()
+
+                if last_indexed_date == today_utc:
+                    # If last indexed today, go back 1 day to ensure we don't miss anything
+                    indexing_from = (today_utc - timedelta(days=1)).strftime("%Y-%m-%d")
+                else:
+                    indexing_from = last_indexed_naive.strftime("%Y-%m-%d")
+            else:
+                indexing_from = (
+                    datetime.now(UTC).replace(tzinfo=None) - timedelta(days=365)
+                ).strftime("%Y-%m-%d")
        else:
            indexing_from = start_date

        # For calendar connectors, default to today but allow future dates if explicitly provided
        if connector.connector_type in [
            SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
+            SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
            SearchSourceConnectorType.LUMA_CONNECTOR,
        ]:
            # Default to today if no end_date provided (users can manually select future dates)
            indexing_to = today_str if end_date is None else end_date
+
+            # If start_date and end_date are the same, adjust end_date to be one day later
+            # to ensure valid date range (start_date must be strictly before end_date)
+            if indexing_from == indexing_to:
+                dt = isoparse(indexing_to)
+                if dt.tzinfo is None:
+                    dt = dt.replace(tzinfo=pytz.UTC)
+                else:
+                    dt = dt.astimezone(pytz.UTC)
+                # Add one day to end_date to make it strictly after start_date
+                dt_end = dt + timedelta(days=1)
+                indexing_to = dt_end.strftime("%Y-%m-%d")
+                logger.info(
+                    f"Adjusted end_date from {end_date} to {indexing_to} "
+                    f"to ensure valid date range (start_date must be strictly before end_date)"
+                )
        else:
            # For non-calendar connectors, cap at today
            indexing_to = end_date if end_date else today_str
@ -887,11 +949,66 @@ async def index_connector_content(
            )
            response_message = "Obsidian vault indexing started in the background."

-        elif connector.connector_type == SearchSourceConnectorType.COMPOSIO_CONNECTOR:
+        elif (
+            connector.connector_type
+            == SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
+        ):
            from app.tasks.celery_tasks.connector_tasks import (
                index_composio_connector_task,
            )

+            # For Composio Google Drive, if drive_items is provided, update connector config
+            # This allows the UI to pass folder/file selection like the regular Google Drive connector
+            if drive_items and drive_items.has_items():
+                # Update connector config with the selected folders/files
+                config = connector.config or {}
+                config["selected_folders"] = [
+                    {"id": f.id, "name": f.name} for f in drive_items.folders
+                ]
+                config["selected_files"] = [
+                    {"id": f.id, "name": f.name} for f in drive_items.files
+                ]
+                if drive_items.indexing_options:
+                    config["indexing_options"] = {
+                        "max_files_per_folder": drive_items.indexing_options.max_files_per_folder,
+                        "incremental_sync": drive_items.indexing_options.incremental_sync,
+                        "include_subfolders": drive_items.indexing_options.include_subfolders,
+                    }
+                connector.config = config
+                from sqlalchemy.orm.attributes import flag_modified
+
+                flag_modified(connector, "config")
+                await session.commit()
+                await session.refresh(connector)
+
+                logger.info(
+                    f"Triggering Composio Google Drive indexing for connector {connector_id} into search space {search_space_id}, "
+                    f"folders: {len(drive_items.folders)}, files: {len(drive_items.files)}"
+                )
+            else:
+                logger.info(
+                    f"Triggering Composio Google Drive indexing for connector {connector_id} into search space {search_space_id} "
+                    f"using existing config (from {indexing_from} to {indexing_to})"
+                )
+
+            index_composio_connector_task.delay(
+                connector_id, search_space_id, str(user.id), indexing_from, indexing_to
+            )
+            response_message = (
+                "Composio Google Drive indexing started in the background."
+            )
+
+        elif connector.connector_type in [
+            SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
+            SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
+        ]:
+            from app.tasks.celery_tasks.connector_tasks import (
+                index_composio_connector_task,
+            )
+
+            # For Composio Gmail and Calendar, use the same date calculation logic as normal connectors
+            # This ensures consistent behavior and uses last_indexed_at to reduce API calls
+            # (includes special case: if indexed today, go back 1 day to avoid missing data)
            logger.info(
                f"Triggering Composio connector indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
            )
@ -943,7 +1060,9 @@ async def _update_connector_timestamp_by_id(session: AsyncSession, connector_id:
        connector = result.scalars().first()

        if connector:
-            connector.last_indexed_at = datetime.now()
+            connector.last_indexed_at = datetime.now(
+                UTC
+            )  # Use UTC for timezone consistency
            await session.commit()
            logger.info(f"Updated last_indexed_at for connector {connector_id}")
    except Exception as e:
@ -1083,18 +1202,24 @@ async def _run_indexing_with_notifications(
                )

            await update_timestamp_func(session, connector_id)
+            await session.commit()  # Commit timestamp update
            logger.info(
                f"Indexing completed successfully: {documents_processed} documents processed"
            )

-            # Update notification on success
+            # Update notification on success (or partial success with errors)
            if notification:
+                # Refresh notification to ensure it's not stale after timestamp update commit
+                await session.refresh(notification)
                await NotificationService.connector_indexing.notify_indexing_completed(
                    session=session,
                    notification=notification,
                    indexed_count=documents_processed,
-                    error_message=None,
+                    error_message=error_or_warning,  # Show errors even if some documents were indexed
                )
+                await (
+                    session.commit()
+                )  # Commit to ensure Electric SQL syncs the notification update
        elif documents_processed > 0:
            # Update notification to storing stage
            if notification:
@ -1110,24 +1235,73 @@ async def _run_indexing_with_notifications(
                f"Indexing completed successfully: {documents_processed} documents processed"
            )
            if notification:
+                # Refresh notification to ensure it's not stale after indexing function commits
+                await session.refresh(notification)
                await NotificationService.connector_indexing.notify_indexing_completed(
                    session=session,
                    notification=notification,
                    indexed_count=documents_processed,
-                    error_message=None,
+                    error_message=error_or_warning,  # Show errors even if some documents were indexed
                )
+                await (
+                    session.commit()
+                )  # Commit to ensure Electric SQL syncs the notification update
        else:
            # No new documents processed - check if this is an error or just no changes
            if error_or_warning:
-                # Actual failure
-                logger.error(f"Indexing failed: {error_or_warning}")
-                if notification:
-                    await NotificationService.connector_indexing.notify_indexing_completed(
-                        session=session,
-                        notification=notification,
-                        indexed_count=0,
-                        error_message=error_or_warning,
-                    )
+                # Check if this is a duplicate warning or empty result (success cases) or an actual error
+                # Handle both normal and Composio calendar connectors
+                error_or_warning_lower = (
+                    str(error_or_warning).lower() if error_or_warning else ""
+                )
+                is_duplicate_warning = "skipped (duplicate)" in error_or_warning_lower
+                # "No X found" messages are success cases - sync worked, just found nothing in date range
+                is_empty_result = (
+                    "no " in error_or_warning_lower
+                    and "found" in error_or_warning_lower
+                )
+
+                if is_duplicate_warning or is_empty_result:
+                    # These are success cases - sync worked, just found nothing new
+                    logger.info(f"Indexing completed successfully: {error_or_warning}")
+                    # Still update timestamp so ElectricSQL syncs and clears "Syncing" UI
+                    if update_timestamp_func:
+                        await update_timestamp_func(session, connector_id)
+                        await session.commit()  # Commit timestamp update
+                    if notification:
+                        # Refresh notification to ensure it's not stale after timestamp update commit
+                        await session.refresh(notification)
+                        # For empty results, use a cleaner message
+                        notification_message = (
+                            "No new items found in date range"
+                            if is_empty_result
+                            else error_or_warning
+                        )
+                        await NotificationService.connector_indexing.notify_indexing_completed(
+                            session=session,
+                            notification=notification,
+                            indexed_count=0,
+                            error_message=notification_message,  # Pass as warning, not error
+                            is_warning=True,  # Flag to indicate this is a warning, not an error
+                        )
+                        await (
+                            session.commit()
+                        )  # Commit to ensure Electric SQL syncs the notification update
+                else:
+                    # Actual failure
+                    logger.error(f"Indexing failed: {error_or_warning}")
+                    if notification:
+                        # Refresh notification to ensure it's not stale after indexing function commits
+                        await session.refresh(notification)
+                        await NotificationService.connector_indexing.notify_indexing_completed(
+                            session=session,
+                            notification=notification,
+                            indexed_count=0,
+                            error_message=error_or_warning,
+                        )
+                        await (
+                            session.commit()
+                        )  # Commit to ensure Electric SQL syncs the notification update
            else:
                # Success - just no new documents to index (all skipped/unchanged)
                logger.info(
@ -1136,13 +1310,19 @@ async def _run_indexing_with_notifications(
                # Still update timestamp so ElectricSQL syncs and clears "Syncing" UI
                if update_timestamp_func:
                    await update_timestamp_func(session, connector_id)
+                    await session.commit()  # Commit timestamp update
                if notification:
+                    # Refresh notification to ensure it's not stale after timestamp update commit
+                    await session.refresh(notification)
                    await NotificationService.connector_indexing.notify_indexing_completed(
                        session=session,
                        notification=notification,
                        indexed_count=0,
                        error_message=None,  # No error - sync succeeded
                    )
+                    await (
+                        session.commit()
+                    )  # Commit to ensure Electric SQL syncs the notification update
    except Exception as e:
        logger.error(f"Error in indexing task: {e!s}", exc_info=True)

@ -2157,6 +2337,59 @@ async def run_obsidian_indexing(
    )


+async def run_composio_indexing_with_new_session(
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str,
+    end_date: str,
+):
+    """
+    Create a new session and run the Composio indexing task.
+    This prevents session leaks by creating a dedicated session for the background task.
+    """
+    async with async_session_maker() as session:
+        await run_composio_indexing(
+            session, connector_id, search_space_id, user_id, start_date, end_date
+        )
+
+
+async def run_composio_indexing(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None,
+    end_date: str | None,
+):
+    """
+    Run Composio connector indexing with real-time notifications.
+
+    This wraps the Composio indexer with the notification system so that
+    Electric SQL can sync indexing progress to the frontend in real-time.
+
+    Args:
+        session: Database session
+        connector_id: ID of the Composio connector
+        search_space_id: ID of the search space
+        user_id: ID of the user
+        start_date: Start date for indexing
+        end_date: End date for indexing
+    """
+    from app.tasks.composio_indexer import index_composio_connector
+
+    await _run_indexing_with_notifications(
+        session=session,
+        connector_id=connector_id,
+        search_space_id=search_space_id,
+        user_id=user_id,
+        start_date=start_date,
+        end_date=end_date,
+        indexing_function=index_composio_connector,
+        update_timestamp_func=_update_connector_timestamp_by_id,
+    )
+
+
 # =============================================================================
 # MCP Connector Routes
 # =============================================================================
--- a/surfsense_backend/app/routes/search_spaces_routes.py
+++ b/surfsense_backend/app/routes/search_spaces_routes.py
@ -129,6 +129,7 @@ async def read_search_spaces(
            result = await session.execute(
                select(SearchSpace)
                .filter(SearchSpace.user_id == user.id)
+                .order_by(SearchSpace.id.asc())
                .offset(skip)
                .limit(limit)
            )
@ -138,6 +139,7 @@ async def read_search_spaces(
                select(SearchSpace)
                .join(SearchSpaceMembership)
                .filter(SearchSpaceMembership.user_id == user.id)
+                .order_by(SearchSpace.id.asc())
                .offset(skip)
                .limit(limit)
            )
--- a/surfsense_backend/app/schemas/incentive_tasks.py
+++ b/surfsense_backend/app/schemas/incentive_tasks.py
@ -0,0 +1,61 @@
+"""
+Schemas for incentive tasks API.
+"""
+
+from datetime import datetime
+
+from pydantic import BaseModel
+
+from app.db import INCENTIVE_TASKS_CONFIG, IncentiveTaskType
+
+
+class IncentiveTaskInfo(BaseModel):
+    """Information about an available incentive task."""
+
+    task_type: IncentiveTaskType
+    title: str
+    description: str
+    pages_reward: int
+    action_url: str
+    completed: bool
+    completed_at: datetime | None = None
+
+
+class IncentiveTasksResponse(BaseModel):
+    """Response containing all available incentive tasks with completion status."""
+
+    tasks: list[IncentiveTaskInfo]
+    total_pages_earned: int
+
+
+class CompleteTaskRequest(BaseModel):
+    """Request to mark a task as completed."""
+
+    task_type: IncentiveTaskType
+
+
+class CompleteTaskResponse(BaseModel):
+    """Response after completing a task."""
+
+    success: bool
+    message: str
+    pages_awarded: int
+    new_pages_limit: int
+
+
+class TaskAlreadyCompletedResponse(BaseModel):
+    """Response when task was already completed."""
+
+    success: bool
+    message: str
+    completed_at: datetime
+
+
+def get_task_info(task_type: IncentiveTaskType) -> dict | None:
+    """Get task configuration by type."""
+    return INCENTIVE_TASKS_CONFIG.get(task_type)
+
+
+def get_all_task_types() -> list[IncentiveTaskType]:
+    """Get all configured task types."""
+    return list(INCENTIVE_TASKS_CONFIG.keys())
--- a/surfsense_backend/app/schemas/rbac_schemas.py
+++ b/surfsense_backend/app/schemas/rbac_schemas.py
@ -167,6 +167,7 @@ class PermissionInfo(BaseModel):
    value: str
    name: str
    category: str
+    description: str


 class PermissionsListResponse(BaseModel):
--- a/surfsense_backend/app/services/composio_service.py
+++ b/surfsense_backend/app/services/composio_service.py
@ -39,21 +39,73 @@ COMPOSIO_TOOLKIT_NAMES = {
 # Toolkits that support indexing (Phase 1: Google services only)
 INDEXABLE_TOOLKITS = {"googledrive", "gmail", "googlecalendar"}

+# Mapping of toolkit IDs to connector types
+TOOLKIT_TO_CONNECTOR_TYPE = {
+    "googledrive": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+    "gmail": "COMPOSIO_GMAIL_CONNECTOR",
+    "googlecalendar": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+}
+
+# Mapping of toolkit IDs to document types
+TOOLKIT_TO_DOCUMENT_TYPE = {
+    "googledrive": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+    "gmail": "COMPOSIO_GMAIL_CONNECTOR",
+    "googlecalendar": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+}
+
+# Mapping of toolkit IDs to their indexer functions
+# Format: toolkit_id -> (module_path, function_name, supports_date_filter)
+# supports_date_filter: True if the indexer accepts start_date/end_date params
+TOOLKIT_TO_INDEXER = {
+    "googledrive": (
+        "app.connectors.composio_google_drive_connector",
+        "index_composio_google_drive",
+        False,  # Google Drive doesn't use date filtering
+    ),
+    "gmail": (
+        "app.connectors.composio_gmail_connector",
+        "index_composio_gmail",
+        True,  # Gmail uses date filtering
+    ),
+    "googlecalendar": (
+        "app.connectors.composio_google_calendar_connector",
+        "index_composio_google_calendar",
+        True,  # Calendar uses date filtering
+    ),
+}
+

 class ComposioService:
    """Service for interacting with Composio API."""

-    def __init__(self, api_key: str | None = None):
+    # Default download directory for files from Composio
+    DEFAULT_DOWNLOAD_DIR = "/tmp/composio_downloads"
+
+    def __init__(
+        self, api_key: str | None = None, file_download_dir: str | None = None
+    ):
        """
        Initialize the Composio service.

        Args:
            api_key: Composio API key. If not provided, uses config.COMPOSIO_API_KEY.
+            file_download_dir: Directory for downloaded files. Defaults to /tmp/composio_downloads.
        """
+        import os
+
        self.api_key = api_key or config.COMPOSIO_API_KEY
        if not self.api_key:
            raise ValueError("COMPOSIO_API_KEY is required but not configured")
-        self.client = Composio(api_key=self.api_key)
+
+        # Set up download directory
+        self.file_download_dir = file_download_dir or self.DEFAULT_DOWNLOAD_DIR
+        os.makedirs(self.file_download_dir, exist_ok=True)
+
+        # Initialize Composio client with download directory
+        # Per docs: file_download_dir configures where files are downloaded
+        self.client = Composio(
+            api_key=self.api_key, file_download_dir=self.file_download_dir
+        )

    @staticmethod
    def is_enabled() -> bool:
@ -252,7 +304,6 @@ class ComposioService:
                    }
                )

-            logger.info(f"DEBUG: Found {len(result)} TOTAL connections in Composio")
            return result
        except Exception as e:
            logger.error(f"Failed to list all connections: {e!s}")
@ -269,7 +320,6 @@ class ComposioService:
            List of connected account details.
        """
        try:
-            logger.info(f"DEBUG: Calling connected_accounts.list(user_id='{user_id}')")
            accounts_response = self.client.connected_accounts.list(user_id=user_id)

            # Handle paginated response (may have .items attribute) or direct list
@ -312,6 +362,30 @@ class ComposioService:
            logger.error(f"Failed to list connections for user {user_id}: {e!s}")
            return []

+    async def delete_connected_account(self, connected_account_id: str) -> bool:
+        """
+        Delete a connected account from Composio.
+
+        This permanently removes the connected account and revokes access tokens.
+
+        Args:
+            connected_account_id: The Composio connected account ID to delete.
+
+        Returns:
+            True if deletion was successful, False otherwise.
+        """
+        try:
+            self.client.connected_accounts.delete(connected_account_id)
+            logger.info(
+                f"Successfully deleted Composio connected account: {connected_account_id}"
+            )
+            return True
+        except Exception as e:
+            logger.error(
+                f"Failed to delete Composio connected account {connected_account_id}: {e!s}"
+            )
+            return False
+
    async def execute_tool(
        self,
        connected_account_id: str,
@ -338,7 +412,6 @@ class ComposioService:
            # - connected_account_id: for authentication
            # - user_id: user identifier (SDK uses user_id, not entity_id)
            # - dangerously_skip_version_check: skip version check for manual execution
-            logger.info(f"DEBUG: Executing tool {tool_name} with params: {params}")
            result = self.client.tools.execute(
                slug=tool_name,
                connected_account_id=connected_account_id,
@ -346,8 +419,6 @@ class ComposioService:
                arguments=params or {},
                dangerously_skip_version_check=True,
            )
-            logger.info(f"DEBUG: Tool {tool_name} raw result type: {type(result)}")
-            logger.info(f"DEBUG: Tool {tool_name} raw result: {result}")
            return {"success": True, "data": result}
        except Exception as e:
            logger.error(f"Failed to execute tool {tool_name}: {e!s}")
@ -382,7 +453,15 @@ class ComposioService:
                "page_size": min(page_size, 100),
            }
            if folder_id:
-                params["folder_id"] = folder_id
+                # List contents of a specific folder (exclude shortcuts - we don't have access to them)
+                params["q"] = (
+                    f"'{folder_id}' in parents and trashed = false and mimeType != 'application/vnd.google-apps.shortcut'"
+                )
+            else:
+                # List root-level items only (My Drive root), exclude shortcuts
+                params["q"] = (
+                    "'root' in parents and trashed = false and mimeType != 'application/vnd.google-apps.shortcut'"
+                )
            if page_token:
                params["page_token"] = page_token

@ -397,9 +476,6 @@ class ComposioService:
                return [], None, result.get("error", "Unknown error")

            data = result.get("data", {})
-            logger.info(
-                f"DEBUG: Drive data type: {type(data)}, keys: {data.keys() if isinstance(data, dict) else 'N/A'}"
-            )

            # Handle nested response structure from Composio
            files = []
@ -415,7 +491,6 @@ class ComposioService:
            elif isinstance(data, list):
                files = data

-            logger.info(f"DEBUG: Extracted {len(files)} drive files")
            return files, next_token, None

        except Exception as e:
@ -428,6 +503,10 @@ class ComposioService:
        """
        Download file content from Google Drive via Composio.

+        Per Composio docs: When tools return files, they are automatically downloaded
+        to a local directory, and the local file path is provided in the response.
+        Response includes: file_path, file_name, size fields.
+
        Args:
            connected_account_id: Composio connected account ID.
            entity_id: The entity/user ID that owns the connected account.
@ -436,27 +515,264 @@ class ComposioService:
        Returns:
            Tuple of (file content bytes, error message).
        """
+        from pathlib import Path
+
        try:
            result = await self.execute_tool(
                connected_account_id=connected_account_id,
                tool_name="GOOGLEDRIVE_DOWNLOAD_FILE",
-                params={"file_id": file_id},  # snake_case
+                params={"file_id": file_id},
                entity_id=entity_id,
            )

            if not result.get("success"):
                return None, result.get("error", "Unknown error")

-            content = result.get("data")
-            if isinstance(content, str):
-                content = content.encode("utf-8")
+            data = result.get("data")
+            if not data:
+                return None, "No data returned from Composio"

-            return content, None
+            # Per Composio docs, response includes file_path where file was downloaded
+            # Response structure: {data: {...}, error: ..., successful: ...}
+            # The actual file info is nested inside data["data"]
+            file_path = None
+
+            if isinstance(data, dict):
+                # Handle nested response structure: data contains {data, error, successful}
+                # The actual file info is in data["data"]
+                inner_data = data
+                if "data" in data and isinstance(data["data"], dict):
+                    inner_data = data["data"]
+                    logger.debug(
+                        f"Found nested data structure. Inner keys: {list(inner_data.keys())}"
+                    )
+                elif "successful" in data and "data" in data:
+                    # Standard Composio response wrapper
+                    inner_data = data["data"] if data["data"] else data
+
+                # Try documented fields: file_path, downloaded_file_content, path, uri
+                file_path = (
+                    inner_data.get("file_path")
+                    or inner_data.get("downloaded_file_content")
+                    or inner_data.get("path")
+                    or inner_data.get("uri")
+                )
+
+                # Handle nested dict case where downloaded_file_content contains the path
+                if isinstance(file_path, dict):
+                    file_path = (
+                        file_path.get("file_path")
+                        or file_path.get("downloaded_file_content")
+                        or file_path.get("path")
+                        or file_path.get("uri")
+                    )
+
+                # If still no path, check if inner_data itself has the nested structure
+                if not file_path and isinstance(inner_data, dict):
+                    for key in ["downloaded_file_content", "file_path", "path", "uri"]:
+                        if key in inner_data:
+                            val = inner_data[key]
+                            if isinstance(val, str):
+                                file_path = val
+                                break
+                            elif isinstance(val, dict):
+                                # One more level of nesting
+                                file_path = (
+                                    val.get("file_path")
+                                    or val.get("downloaded_file_content")
+                                    or val.get("path")
+                                    or val.get("uri")
+                                )
+                                if file_path:
+                                    break
+
+                logger.debug(
+                    f"Composio response keys: {list(data.keys())}, inner keys: {list(inner_data.keys()) if isinstance(inner_data, dict) else 'N/A'}, extracted path: {file_path}"
+                )
+            elif isinstance(data, str):
+                # Direct string response (could be path or content)
+                file_path = data
+            elif isinstance(data, bytes):
+                # Direct bytes response
+                return data, None
+
+            # Read file from the path
+            if file_path and isinstance(file_path, str):
+                path_obj = Path(file_path)
+
+                # Check if it's a valid file path (absolute or in .composio directory)
+                if path_obj.is_absolute() or ".composio" in str(path_obj):
+                    try:
+                        if path_obj.exists():
+                            content = path_obj.read_bytes()
+                            logger.info(
+                                f"Successfully read {len(content)} bytes from Composio file: {file_path}"
+                            )
+                            return content, None
+                        else:
+                            logger.warning(
+                                f"File path from Composio does not exist: {file_path}"
+                            )
+                            return None, f"File not found at path: {file_path}"
+                    except Exception as e:
+                        logger.error(
+                            f"Failed to read file from Composio path {file_path}: {e!s}"
+                        )
+                        return None, f"Failed to read file: {e!s}"
+                else:
+                    # Not a file path - might be base64 encoded content
+                    try:
+                        import base64
+
+                        content = base64.b64decode(file_path)
+                        return content, None
+                    except Exception:
+                        # Not base64, return as UTF-8 bytes
+                        return file_path.encode("utf-8"), None
+
+            # If we got here, couldn't extract file path
+            if isinstance(data, dict):
+                # Log full structure for debugging
+                inner_data = data.get("data", {})
+                logger.warning(
+                    f"Could not extract file path from Composio response. "
+                    f"Top keys: {list(data.keys())}, "
+                    f"Inner data keys: {list(inner_data.keys()) if isinstance(inner_data, dict) else type(inner_data).__name__}, "
+                    f"Full inner data: {inner_data}"
+                )
+                return (
+                    None,
+                    f"No file path in Composio response. Keys: {list(data.keys())}, inner: {list(inner_data.keys()) if isinstance(inner_data, dict) else 'N/A'}",
+                )
+
+            return None, f"Unexpected data type from Composio: {type(data).__name__}"

        except Exception as e:
            logger.error(f"Failed to get Drive file content: {e!s}")
            return None, str(e)

+    async def get_drive_start_page_token(
+        self, connected_account_id: str, entity_id: str
+    ) -> tuple[str | None, str | None]:
+        """
+        Get the starting page token for Google Drive change tracking.
+
+        This token represents the current state and is used for future delta syncs.
+        Per Composio docs: Use GOOGLEDRIVE_GET_CHANGES_START_PAGE_TOKEN to get initial token.
+
+        Args:
+            connected_account_id: Composio connected account ID.
+            entity_id: The entity/user ID that owns the connected account.
+
+        Returns:
+            Tuple of (start_page_token, error message).
+        """
+        try:
+            result = await self.execute_tool(
+                connected_account_id=connected_account_id,
+                tool_name="GOOGLEDRIVE_GET_CHANGES_START_PAGE_TOKEN",
+                params={},
+                entity_id=entity_id,
+            )
+
+            if not result.get("success"):
+                return None, result.get("error", "Unknown error")
+
+            data = result.get("data", {})
+            # Handle nested response: {data: {startPageToken: ...}, successful: ...}
+            if isinstance(data, dict):
+                inner_data = data.get("data", data)
+                token = (
+                    inner_data.get("startPageToken")
+                    or inner_data.get("start_page_token")
+                    or data.get("startPageToken")
+                    or data.get("start_page_token")
+                )
+                if token:
+                    logger.info(f"Got Drive start page token: {token}")
+                    return token, None
+
+            logger.warning(f"Could not extract start page token from response: {data}")
+            return None, "No start page token in response"
+
+        except Exception as e:
+            logger.error(f"Failed to get Drive start page token: {e!s}")
+            return None, str(e)
+
+    async def list_drive_changes(
+        self,
+        connected_account_id: str,
+        entity_id: str,
+        page_token: str | None = None,
+        page_size: int = 100,
+        include_removed: bool = True,
+    ) -> tuple[list[dict[str, Any]], str | None, str | None]:
+        """
+        List changes in Google Drive since the given page token.
+
+        Per Composio docs: GOOGLEDRIVE_LIST_CHANGES tracks modifications to files/folders.
+        If pageToken is not provided, it auto-fetches the current start page token.
+        Response includes nextPageToken for pagination and newStartPageToken for future syncs.
+
+        Args:
+            connected_account_id: Composio connected account ID.
+            entity_id: The entity/user ID that owns the connected account.
+            page_token: Page token from previous sync (optional - will auto-fetch if not provided).
+            page_size: Number of changes per page.
+            include_removed: Whether to include removed items in the response.
+
+        Returns:
+            Tuple of (changes list, new_start_page_token, error message).
+        """
+        try:
+            params = {
+                "pageSize": min(page_size, 100),
+                "includeRemoved": include_removed,
+            }
+            if page_token:
+                params["pageToken"] = page_token
+
+            result = await self.execute_tool(
+                connected_account_id=connected_account_id,
+                tool_name="GOOGLEDRIVE_LIST_CHANGES",
+                params=params,
+                entity_id=entity_id,
+            )
+
+            if not result.get("success"):
+                return [], None, result.get("error", "Unknown error")
+
+            data = result.get("data", {})
+
+            # Handle nested response structure
+            changes = []
+            new_start_token = None
+
+            if isinstance(data, dict):
+                inner_data = data.get("data", data)
+                changes = inner_data.get("changes", []) or data.get("changes", [])
+
+                # Get the token for next sync
+                # newStartPageToken is returned when all changes have been fetched
+                # nextPageToken is for pagination within the current fetch
+                new_start_token = (
+                    inner_data.get("newStartPageToken")
+                    or inner_data.get("new_start_page_token")
+                    or inner_data.get("nextPageToken")
+                    or inner_data.get("next_page_token")
+                    or data.get("newStartPageToken")
+                    or data.get("nextPageToken")
+                )
+
+            logger.info(
+                f"Got {len(changes)} Drive changes, new token: {new_start_token[:20] if new_start_token else 'None'}..."
+            )
+            return changes, new_start_token, None
+
+        except Exception as e:
+            logger.error(f"Failed to list Drive changes: {e!s}")
+            return [], None, str(e)
+
    # ===== Gmail specific methods =====

    async def get_gmail_messages(
@ -464,25 +780,30 @@ class ComposioService:
        connected_account_id: str,
        entity_id: str,
        query: str = "",
-        max_results: int = 100,
-    ) -> tuple[list[dict[str, Any]], str | None]:
+        max_results: int = 50,
+        page_token: str | None = None,
+    ) -> tuple[list[dict[str, Any]], str | None, int | None, str | None]:
        """
-        List Gmail messages via Composio.
+        List Gmail messages via Composio with pagination support.

        Args:
            connected_account_id: Composio connected account ID.
            entity_id: The entity/user ID that owns the connected account.
            query: Gmail search query.
-            max_results: Maximum number of messages to return.
+            max_results: Maximum number of messages to return per page (default: 50 to avoid payload size issues).
+            page_token: Optional pagination token for next page.

        Returns:
-            Tuple of (messages list, error message).
+            Tuple of (messages list, next_page_token, result_size_estimate, error message).
        """
        try:
-            # Composio uses snake_case for parameters, max is 500
-            params = {"max_results": min(max_results, 500)}
+            # Use smaller batch size to avoid 413 payload too large errors
+            # Composio uses snake_case for parameters
+            params = {"max_results": min(max_results, 50)}  # Reduced from 500 to 50
            if query:
                params["query"] = query  # Composio uses 'query' not 'q'
+            if page_token:
+                params["page_token"] = page_token

            result = await self.execute_tool(
                connected_account_id=connected_account_id,
@ -492,31 +813,42 @@ class ComposioService:
            )

            if not result.get("success"):
-                return [], result.get("error", "Unknown error")
+                return [], None, result.get("error", "Unknown error")

            data = result.get("data", {})
-            logger.info(
-                f"DEBUG: Gmail data type: {type(data)}, keys: {data.keys() if isinstance(data, dict) else 'N/A'}"
-            )
-            logger.info(f"DEBUG: Gmail full data: {data}")

            # Try different possible response structures
            messages = []
+            next_token = None
+            result_size_estimate = None
            if isinstance(data, dict):
                messages = (
                    data.get("messages", [])
                    or data.get("data", {}).get("messages", [])
                    or data.get("emails", [])
                )
+                # Check for pagination token in various possible locations
+                next_token = (
+                    data.get("nextPageToken")
+                    or data.get("next_page_token")
+                    or data.get("data", {}).get("nextPageToken")
+                    or data.get("data", {}).get("next_page_token")
+                )
+                # Extract resultSizeEstimate if available (Gmail API provides this)
+                result_size_estimate = (
+                    data.get("resultSizeEstimate")
+                    or data.get("result_size_estimate")
+                    or data.get("data", {}).get("resultSizeEstimate")
+                    or data.get("data", {}).get("result_size_estimate")
+                )
            elif isinstance(data, list):
                messages = data

-            logger.info(f"DEBUG: Extracted {len(messages)} messages")
-            return messages, None
+            return messages, next_token, result_size_estimate, None

        except Exception as e:
            logger.error(f"Failed to list Gmail messages: {e!s}")
-            return [], str(e)
+            return [], None, str(e)

    async def get_gmail_message_detail(
        self, connected_account_id: str, entity_id: str, message_id: str
@ -595,10 +927,6 @@ class ComposioService:
                return [], result.get("error", "Unknown error")

            data = result.get("data", {})
-            logger.info(
-                f"DEBUG: Calendar data type: {type(data)}, keys: {data.keys() if isinstance(data, dict) else 'N/A'}"
-            )
-            logger.info(f"DEBUG: Calendar full data: {data}")

            # Try different possible response structures
            events = []
@ -611,7 +939,6 @@ class ComposioService:
            elif isinstance(data, list):
                events = data

-            logger.info(f"DEBUG: Extracted {len(events)} calendar events")
            return events, None

        except Exception as e:
--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@ -2871,3 +2871,350 @@ class ConnectorService:
        }

        return result_object, obsidian_docs
+
+    # =========================================================================
+    # Composio Connector Search Methods
+    # =========================================================================
+
+    async def search_composio_google_drive(
+        self,
+        user_query: str,
+        search_space_id: int,
+        top_k: int = 20,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+    ) -> tuple:
+        """
+        Search for Composio Google Drive files and return both the source information
+        and langchain documents.
+
+        Uses combined chunk-level and document-level hybrid search with RRF fusion.
+
+        Args:
+            user_query: The user's query
+            search_space_id: The search space ID to search in
+            top_k: Maximum number of results to return
+            start_date: Optional start date for filtering documents by updated_at
+            end_date: Optional end date for filtering documents by updated_at
+
+        Returns:
+            tuple: (sources_info, langchain_documents)
+        """
+        composio_drive_docs = await self._combined_rrf_search(
+            query_text=user_query,
+            search_space_id=search_space_id,
+            document_type="COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+            top_k=top_k,
+            start_date=start_date,
+            end_date=end_date,
+        )
+
+        # Early return if no results
+        if not composio_drive_docs:
+            return {
+                "id": 54,
+                "name": "Google Drive (Composio)",
+                "type": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+                "sources": [],
+            }, []
+
+        def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+            return (
+                doc_info.get("title")
+                or metadata.get("title")
+                or metadata.get("file_name")
+                or "Untitled Document"
+            )
+
+        def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+            return metadata.get("url") or metadata.get("web_view_link") or ""
+
+        def _description_fn(
+            chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+        ) -> str:
+            description = self._chunk_preview(chunk.get("content", ""), limit=200)
+            info_parts = []
+            mime_type = metadata.get("mime_type")
+            modified_time = metadata.get("modified_time")
+            if mime_type:
+                info_parts.append(f"Type: {mime_type}")
+            if modified_time:
+                info_parts.append(f"Modified: {modified_time}")
+            if info_parts:
+                description = (description + " | " + " | ".join(info_parts)).strip(" |")
+            return description
+
+        def _extra_fields_fn(
+            _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+        ) -> dict[str, Any]:
+            return {
+                "mime_type": metadata.get("mime_type", ""),
+                "file_id": metadata.get("file_id", ""),
+                "modified_time": metadata.get("modified_time", ""),
+            }
+
+        sources_list = self._build_chunk_sources_from_documents(
+            composio_drive_docs,
+            title_fn=_title_fn,
+            url_fn=_url_fn,
+            description_fn=_description_fn,
+            extra_fields_fn=_extra_fields_fn,
+        )
+
+        # Create result object
+        result_object = {
+            "id": 54,
+            "name": "Google Drive (Composio)",
+            "type": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
+            "sources": sources_list,
+        }
+
+        return result_object, composio_drive_docs
+
+    async def search_composio_gmail(
+        self,
+        user_query: str,
+        search_space_id: int,
+        top_k: int = 20,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+    ) -> tuple:
+        """
+        Search for Composio Gmail messages and return both the source information
+        and langchain documents.
+
+        Uses combined chunk-level and document-level hybrid search with RRF fusion.
+
+        Args:
+            user_query: The user's query
+            search_space_id: The search space ID to search in
+            top_k: Maximum number of results to return
+            start_date: Optional start date for filtering documents by updated_at
+            end_date: Optional end date for filtering documents by updated_at
+
+        Returns:
+            tuple: (sources_info, langchain_documents)
+        """
+        composio_gmail_docs = await self._combined_rrf_search(
+            query_text=user_query,
+            search_space_id=search_space_id,
+            document_type="COMPOSIO_GMAIL_CONNECTOR",
+            top_k=top_k,
+            start_date=start_date,
+            end_date=end_date,
+        )
+
+        # Early return if no results
+        if not composio_gmail_docs:
+            return {
+                "id": 55,
+                "name": "Gmail (Composio)",
+                "type": "COMPOSIO_GMAIL_CONNECTOR",
+                "sources": [],
+            }, []
+
+        def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+            return (
+                doc_info.get("title")
+                or metadata.get("subject")
+                or metadata.get("title")
+                or "Untitled Email"
+            )
+
+        def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+            return metadata.get("url") or ""
+
+        def _description_fn(
+            chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+        ) -> str:
+            description = self._chunk_preview(chunk.get("content", ""), limit=200)
+            info_parts = []
+            sender = metadata.get("from") or metadata.get("sender")
+            date = metadata.get("date") or metadata.get("received_at")
+            if sender:
+                info_parts.append(f"From: {sender}")
+            if date:
+                info_parts.append(f"Date: {date}")
+            if info_parts:
+                description = (description + " | " + " | ".join(info_parts)).strip(" |")
+            return description
+
+        def _extra_fields_fn(
+            _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+        ) -> dict[str, Any]:
+            return {
+                "message_id": metadata.get("message_id", ""),
+                "thread_id": metadata.get("thread_id", ""),
+                "from": metadata.get("from", ""),
+                "to": metadata.get("to", ""),
+                "date": metadata.get("date", ""),
+            }
+
+        sources_list = self._build_chunk_sources_from_documents(
+            composio_gmail_docs,
+            title_fn=_title_fn,
+            url_fn=_url_fn,
+            description_fn=_description_fn,
+            extra_fields_fn=_extra_fields_fn,
+        )
+
+        # Create result object
+        result_object = {
+            "id": 55,
+            "name": "Gmail (Composio)",
+            "type": "COMPOSIO_GMAIL_CONNECTOR",
+            "sources": sources_list,
+        }
+
+        return result_object, composio_gmail_docs
+
+    async def search_composio_google_calendar(
+        self,
+        user_query: str,
+        search_space_id: int,
+        top_k: int = 20,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+    ) -> tuple:
+        """
+        Search for Composio Google Calendar events and return both the source information
+        and langchain documents.
+
+        Uses combined chunk-level and document-level hybrid search with RRF fusion.
+
+        Args:
+            user_query: The user's query
+            search_space_id: The search space ID to search in
+            top_k: Maximum number of results to return
+            start_date: Optional start date for filtering documents by updated_at
+            end_date: Optional end date for filtering documents by updated_at
+
+        Returns:
+            tuple: (sources_info, langchain_documents)
+        """
+        composio_calendar_docs = await self._combined_rrf_search(
+            query_text=user_query,
+            search_space_id=search_space_id,
+            document_type="COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+            top_k=top_k,
+            start_date=start_date,
+            end_date=end_date,
+        )
+
+        # Early return if no results
+        if not composio_calendar_docs:
+            return {
+                "id": 56,
+                "name": "Google Calendar (Composio)",
+                "type": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+                "sources": [],
+            }, []
+
+        def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+            return (
+                doc_info.get("title")
+                or metadata.get("summary")
+                or metadata.get("title")
+                or "Untitled Event"
+            )
+
+        def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+            return metadata.get("url") or metadata.get("html_link") or ""
+
+        def _description_fn(
+            chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+        ) -> str:
+            description = self._chunk_preview(chunk.get("content", ""), limit=200)
+            info_parts = []
+            start_time = metadata.get("start_time") or metadata.get("start")
+            end_time = metadata.get("end_time") or metadata.get("end")
+            if start_time:
+                info_parts.append(f"Start: {start_time}")
+            if end_time:
+                info_parts.append(f"End: {end_time}")
+            if info_parts:
+                description = (description + " | " + " | ".join(info_parts)).strip(" |")
+            return description
+
+        def _extra_fields_fn(
+            _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+        ) -> dict[str, Any]:
+            return {
+                "event_id": metadata.get("event_id", ""),
+                "calendar_id": metadata.get("calendar_id", ""),
+                "start_time": metadata.get("start_time", ""),
+                "end_time": metadata.get("end_time", ""),
+                "location": metadata.get("location", ""),
+            }
+
+        sources_list = self._build_chunk_sources_from_documents(
+            composio_calendar_docs,
+            title_fn=_title_fn,
+            url_fn=_url_fn,
+            description_fn=_description_fn,
+            extra_fields_fn=_extra_fields_fn,
+        )
+
+        # Create result object
+        result_object = {
+            "id": 56,
+            "name": "Google Calendar (Composio)",
+            "type": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
+            "sources": sources_list,
+        }
+
+        return result_object, composio_calendar_docs
+
+    # =========================================================================
+    # Utility Methods for Connector Discovery
+    # =========================================================================
+
+    async def get_available_connectors(
+        self,
+        search_space_id: int,
+    ) -> list[SearchSourceConnectorType]:
+        """
+        Get all available (enabled) connector types for a search space.
+
+        Args:
+            search_space_id: The search space ID
+
+        Returns:
+            List of SearchSourceConnectorType enums for enabled connectors
+        """
+        query = (
+            select(SearchSourceConnector.connector_type)
+            .filter(
+                SearchSourceConnector.search_space_id == search_space_id,
+            )
+            .distinct()
+        )
+
+        result = await self.session.execute(query)
+        connector_types = result.scalars().all()
+        return list(connector_types)
+
+    async def get_available_document_types(
+        self,
+        search_space_id: int,
+    ) -> list[str]:
+        """
+        Get all document types that have at least one document in the search space.
+
+        Args:
+            search_space_id: The search space ID
+
+        Returns:
+            List of document type strings that have documents indexed
+        """
+        from sqlalchemy import distinct
+
+        from app.db import Document
+
+        query = select(distinct(Document.document_type)).filter(
+            Document.search_space_id == search_space_id,
+        )
+
+        result = await self.session.execute(query)
+        doc_types = result.scalars().all()
+        return [str(dt) for dt in doc_types]
--- a/surfsense_backend/app/services/notification_service.py
+++ b/surfsense_backend/app/services/notification_service.py
@ -335,6 +335,7 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
        notification: Notification,
        indexed_count: int,
        error_message: str | None = None,
+        is_warning: bool = False,
    ) -> Notification:
        """
        Update notification when connector indexing completes.
@ -343,7 +344,8 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
            session: Database session
            notification: Notification to update
            indexed_count: Total number of items indexed
-            error_message: Error message if indexing failed (optional)
+            error_message: Error message if indexing failed, or warning message (optional)
+            is_warning: If True, treat error_message as a warning (success case) rather than an error

        Returns:
            Updated notification
@ -352,10 +354,26 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):
            "connector_name", "Connector"
        )

+        # If there's an error message but items were indexed, treat it as a warning (partial success)
+        # If is_warning is True, treat it as success even with 0 items (e.g., duplicates found)
+        # Otherwise, treat it as a failure
        if error_message:
-            title = f"Failed: {connector_name}"
-            message = f"Sync failed: {error_message}"
-            status = "failed"
+            if indexed_count > 0:
+                # Partial success with warnings (e.g., duplicate content from other connectors)
+                title = f"Ready: {connector_name}"
+                item_text = "item" if indexed_count == 1 else "items"
+                message = f"Now searchable! {indexed_count} {item_text} synced. Note: {error_message}"
+                status = "completed"
+            elif is_warning:
+                # Warning case (e.g., duplicates found) - treat as success
+                title = f"Ready: {connector_name}"
+                message = f"Sync completed. {error_message}"
+                status = "completed"
+            else:
+                # Complete failure
+                title = f"Failed: {connector_name}"
+                message = f"Sync failed: {error_message}"
+                status = "failed"
        else:
            title = f"Ready: {connector_name}"
            if indexed_count == 0:
@ -367,7 +385,9 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler):

        metadata_updates = {
            "indexed_count": indexed_count,
-            "sync_stage": "completed" if not error_message else "failed",
+            "sync_stage": "completed"
+            if (not error_message or is_warning or indexed_count > 0)
+            else "failed",
            "error_message": error_message,
        }

--- a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
@ -810,8 +810,8 @@ def index_composio_connector_task(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    start_date: str,
-    end_date: str,
+    start_date: str | None,
+    end_date: str | None,
 ):
    """Celery task to index Composio connector content (Google Drive, Gmail, Calendar via Composio)."""
    import asyncio
@ -833,14 +833,16 @@ async def _index_composio_connector(
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    start_date: str,
-    end_date: str,
+    start_date: str | None,
+    end_date: str | None,
 ):
-    """Index Composio connector content with new session."""
-    # Import from tasks folder (not connector_indexers) to avoid circular import
-    from app.tasks.composio_indexer import index_composio_connector
+    """Index Composio connector content with new session and real-time notifications."""
+    # Import from routes to use the notification-wrapped version
+    from app.routes.search_source_connectors_routes import (
+        run_composio_indexing,
+    )

    async with get_celery_session_maker()() as session:
-        await index_composio_connector(
+        await run_composio_indexing(
            session, connector_id, search_space_id, user_id, start_date, end_date
        )
--- a/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py
+++ b/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py
@ -66,6 +66,7 @@ async def _check_and_trigger_schedules():
            from app.tasks.celery_tasks.connector_tasks import (
                index_airtable_records_task,
                index_clickup_tasks_task,
+                index_composio_connector_task,
                index_confluence_pages_task,
                index_crawled_urls_task,
                index_discord_messages_task,
@ -98,6 +99,10 @@ async def _check_and_trigger_schedules():
                SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
                SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
                SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR: index_google_drive_files_task,
+                # Composio connector types
+                SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR: index_composio_connector_task,
+                SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR: index_composio_connector_task,
+                SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR: index_composio_connector_task,
            }

            # Trigger indexing for each due connector
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -54,21 +54,68 @@ def format_attachments_as_context(attachments: list[ChatAttachment]) -> str:


 def format_mentioned_documents_as_context(documents: list[Document]) -> str:
-    """Format mentioned documents as context for the agent."""
+    """
+    Format mentioned documents as context for the agent.
+
+    Uses the same XML structure as knowledge_base.format_documents_for_context
+    to ensure citations work properly with chunk IDs.
+    """
    if not documents:
        return ""

    context_parts = ["<mentioned_documents>"]
    context_parts.append(
        "The user has explicitly mentioned the following documents from their knowledge base. "
-        "These documents are directly relevant to the query and should be prioritized as primary sources."
+        "These documents are directly relevant to the query and should be prioritized as primary sources. "
+        "Use [citation:CHUNK_ID] format for citations (e.g., [citation:123])."
    )
-    for i, doc in enumerate(documents, 1):
-        context_parts.append(
-            f"<document index='{i}' id='{doc.id}' title='{doc.title}' type='{doc.document_type.value}'>"
+    context_parts.append("")
+
+    for doc in documents:
+        # Build metadata JSON
+        metadata = doc.document_metadata or {}
+        metadata_json = json.dumps(metadata, ensure_ascii=False)
+
+        # Get URL from metadata
+        url = (
+            metadata.get("url")
+            or metadata.get("source")
+            or metadata.get("page_url")
+            or ""
        )
-        context_parts.append(f"<![CDATA[{doc.content}]]>")
+
+        context_parts.append("<document>")
+        context_parts.append("<document_metadata>")
+        context_parts.append(f"  <document_id>{doc.id}</document_id>")
+        context_parts.append(
+            f"  <document_type>{doc.document_type.value}</document_type>"
+        )
+        context_parts.append(f"  <title><![CDATA[{doc.title}]]></title>")
+        context_parts.append(f"  <url><![CDATA[{url}]]></url>")
+        context_parts.append(
+            f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>"
+        )
+        context_parts.append("</document_metadata>")
+        context_parts.append("")
+        context_parts.append("<document_content>")
+
+        # Use chunks if available (preferred for proper citations)
+        if hasattr(doc, "chunks") and doc.chunks:
+            for chunk in doc.chunks:
+                context_parts.append(
+                    f"  <chunk id='{chunk.id}'><![CDATA[{chunk.content}]]></chunk>"
+                )
+        else:
+            # Fallback to document content if chunks not loaded
+            # Use document ID as chunk ID prefix for consistency
+            context_parts.append(
+                f"  <chunk id='{doc.id}'><![CDATA[{doc.content}]]></chunk>"
+            )
+
+        context_parts.append("</document_content>")
        context_parts.append("</document>")
+        context_parts.append("")
+
    context_parts.append("</mentioned_documents>")

    return "\n".join(context_parts)
@ -81,8 +128,6 @@ def format_mentioned_surfsense_docs_as_context(
    if not documents:
        return ""

-    import json
-
    context_parts = ["<mentioned_surfsense_docs>"]
    context_parts.append(
        "The user has explicitly mentioned the following SurfSense documentation pages. "
@ -263,11 +308,15 @@ async def stream_new_chat(
        # Build input with message history from frontend
        langchain_messages = []

-        # Fetch mentioned documents if any
+        # Fetch mentioned documents if any (with chunks for proper citations)
        mentioned_documents: list[Document] = []
        if mentioned_document_ids:
+            from sqlalchemy.orm import selectinload as doc_selectinload
+
            result = await session.execute(
-                select(Document).filter(
+                select(Document)
+                .options(doc_selectinload(Document.chunks))
+                .filter(
                    Document.id.in_(mentioned_document_ids),
                    Document.search_space_id == search_space_id,
                )
--- a/surfsense_backend/app/tasks/composio_indexer.py
+++ b/surfsense_backend/app/tasks/composio_indexer.py
@ -2,83 +2,76 @@
 Composio connector indexer.

 Routes indexing requests to toolkit-specific handlers (Google Drive, Gmail, Calendar).
+Uses a registry pattern for clean, extensible connector routing.

 Note: This module is intentionally placed in app/tasks/ (not in connector_indexers/)
 to avoid circular import issues with the connector_indexers package.
 """

 import logging
-from datetime import UTC, datetime
+from importlib import import_module

 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select
-from sqlalchemy.orm import selectinload

-from app.config import config
-from app.connectors.composio_connector import ComposioConnector
 from app.db import (
-    Document,
-    DocumentType,
    SearchSourceConnector,
    SearchSourceConnectorType,
 )
-from app.services.composio_service import INDEXABLE_TOOLKITS
-from app.services.llm_service import get_user_long_context_llm
+from app.services.composio_service import INDEXABLE_TOOLKITS, TOOLKIT_TO_INDEXER
 from app.services.task_logging_service import TaskLoggingService
-from app.utils.document_converters import (
-    create_document_chunks,
-    generate_content_hash,
-    generate_document_summary,
-    generate_unique_identifier_hash,
-)

 # Set up logging
 logger = logging.getLogger(__name__)


-# ============ Utility functions (copied from connector_indexers.base to avoid circular imports) ============
+# Valid Composio connector types
+COMPOSIO_CONNECTOR_TYPES = {
+    SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
+    SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
+    SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
+}


-def get_current_timestamp() -> datetime:
-    """Get the current timestamp with timezone for updated_at field."""
-    return datetime.now(UTC)
-
-
-async def check_document_by_unique_identifier(
-    session: AsyncSession, unique_identifier_hash: str
-) -> Document | None:
-    """Check if a document with the given unique identifier hash already exists."""
-    existing_doc_result = await session.execute(
-        select(Document)
-        .options(selectinload(Document.chunks))
-        .where(Document.unique_identifier_hash == unique_identifier_hash)
-    )
-    return existing_doc_result.scalars().first()
+# ============ Utility functions ============


 async def get_connector_by_id(
-    session: AsyncSession, connector_id: int, connector_type: SearchSourceConnectorType
+    session: AsyncSession,
+    connector_id: int,
+    connector_type: SearchSourceConnectorType | None,
 ) -> SearchSourceConnector | None:
-    """Get a connector by ID and type from the database."""
-    result = await session.execute(
-        select(SearchSourceConnector).filter(
-            SearchSourceConnector.id == connector_id,
-            SearchSourceConnector.connector_type == connector_type,
-        )
+    """Get a connector by ID and optionally by type from the database."""
+    query = select(SearchSourceConnector).filter(
+        SearchSourceConnector.id == connector_id
    )
+    if connector_type is not None:
+        query = query.filter(SearchSourceConnector.connector_type == connector_type)
+    result = await session.execute(query)
    return result.scalars().first()


-async def update_connector_last_indexed(
-    session: AsyncSession,
-    connector: SearchSourceConnector,
-    update_last_indexed: bool = True,
-) -> None:
-    """Update the last_indexed_at timestamp for a connector."""
-    if update_last_indexed:
-        connector.last_indexed_at = datetime.now()
-        logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+def get_indexer_function(toolkit_id: str):
+    """
+    Dynamically import and return the indexer function for a toolkit.
+
+    Args:
+        toolkit_id: The toolkit ID (e.g., "googledrive", "gmail")
+
+    Returns:
+        Tuple of (indexer_function, supports_date_filter)
+
+    Raises:
+        ValueError: If toolkit not found in registry
+    """
+    if toolkit_id not in TOOLKIT_TO_INDEXER:
+        raise ValueError(f"No indexer registered for toolkit: {toolkit_id}")
+
+    module_path, function_name, supports_date_filter = TOOLKIT_TO_INDEXER[toolkit_id]
+    module = import_module(module_path)
+    indexer_func = getattr(module, function_name)
+    return indexer_func, supports_date_filter


 # ============ Main indexer function ============
@ -98,6 +91,7 @@ async def index_composio_connector(
    Index content from a Composio connector.

    Routes to toolkit-specific indexing based on the connector's toolkit_id.
+    Uses a registry pattern for clean, extensible connector routing.

    Args:
        session: Database session
@ -129,10 +123,16 @@ async def index_composio_connector(
    )

    try:
-        # Get connector by id
-        connector = await get_connector_by_id(
-            session, connector_id, SearchSourceConnectorType.COMPOSIO_CONNECTOR
-        )
+        # Get connector by id - accept any Composio connector type
+        connector = await get_connector_by_id(session, connector_id, None)
+
+        # Validate it's a Composio connector
+        if connector and connector.connector_type not in COMPOSIO_CONNECTOR_TYPES:
+            error_msg = f"Connector {connector_id} is not a Composio connector"
+            await task_logger.log_task_failure(
+                log_entry, error_msg, {"error_type": "InvalidConnectorType"}
+            )
+            return 0, error_msg

        if not connector:
            error_msg = f"Composio connector with ID {connector_id} not found"
@ -160,53 +160,35 @@ async def index_composio_connector(
            )
            return 0, error_msg

-        # Route to toolkit-specific indexer
-        if toolkit_id == "googledrive":
-            return await _index_composio_google_drive(
-                session=session,
-                connector=connector,
-                connector_id=connector_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                task_logger=task_logger,
-                log_entry=log_entry,
-                update_last_indexed=update_last_indexed,
-                max_items=max_items,
-            )
-        elif toolkit_id == "gmail":
-            return await _index_composio_gmail(
-                session=session,
-                connector=connector,
-                connector_id=connector_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                start_date=start_date,
-                end_date=end_date,
-                task_logger=task_logger,
-                log_entry=log_entry,
-                update_last_indexed=update_last_indexed,
-                max_items=max_items,
-            )
-        elif toolkit_id == "googlecalendar":
-            return await _index_composio_google_calendar(
-                session=session,
-                connector=connector,
-                connector_id=connector_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                start_date=start_date,
-                end_date=end_date,
-                task_logger=task_logger,
-                log_entry=log_entry,
-                update_last_indexed=update_last_indexed,
-                max_items=max_items,
-            )
-        else:
-            error_msg = f"No indexer implemented for toolkit: {toolkit_id}"
+        # Get indexer function from registry
+        try:
+            indexer_func, supports_date_filter = get_indexer_function(toolkit_id)
+        except ValueError as e:
            await task_logger.log_task_failure(
-                log_entry, error_msg, {"error_type": "NoIndexerImplemented"}
+                log_entry, str(e), {"error_type": "NoIndexerImplemented"}
            )
-            return 0, error_msg
+            return 0, str(e)
+
+        # Build kwargs for the indexer function
+        kwargs = {
+            "session": session,
+            "connector": connector,
+            "connector_id": connector_id,
+            "search_space_id": search_space_id,
+            "user_id": user_id,
+            "task_logger": task_logger,
+            "log_entry": log_entry,
+            "update_last_indexed": update_last_indexed,
+            "max_items": max_items,
+        }
+
+        # Add date params for toolkits that support them
+        if supports_date_filter:
+            kwargs["start_date"] = start_date
+            kwargs["end_date"] = end_date
+
+        # Call the toolkit-specific indexer
+        return await indexer_func(**kwargs)

    except SQLAlchemyError as db_error:
        await session.rollback()
@ -228,714 +210,3 @@ async def index_composio_connector(
        )
        logger.error(f"Failed to index Composio connector: {e!s}", exc_info=True)
        return 0, f"Failed to index Composio connector: {e!s}"
-
-
-async def _index_composio_google_drive(
-    session: AsyncSession,
-    connector,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    task_logger: TaskLoggingService,
-    log_entry,
-    update_last_indexed: bool = True,
-    max_items: int = 1000,
-) -> tuple[int, str]:
-    """Index Google Drive files via Composio."""
-    try:
-        composio_connector = ComposioConnector(session, connector_id)
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Fetching Google Drive files via Composio for connector {connector_id}",
-            {"stage": "fetching_files"},
-        )
-
-        # Fetch files
-        all_files = []
-        page_token = None
-
-        while len(all_files) < max_items:
-            files, next_token, error = await composio_connector.list_drive_files(
-                page_token=page_token,
-                page_size=min(100, max_items - len(all_files)),
-            )
-
-            if error:
-                await task_logger.log_task_failure(
-                    log_entry, f"Failed to fetch Drive files: {error}", {}
-                )
-                return 0, f"Failed to fetch Drive files: {error}"
-
-            all_files.extend(files)
-
-            if not next_token:
-                break
-            page_token = next_token
-
-        if not all_files:
-            success_msg = "No Google Drive files found"
-            await task_logger.log_task_success(
-                log_entry, success_msg, {"files_count": 0}
-            )
-            return 0, success_msg
-
-        logger.info(f"Found {len(all_files)} Google Drive files to index via Composio")
-
-        documents_indexed = 0
-        documents_skipped = 0
-
-        for file_info in all_files:
-            try:
-                # Handle both standard Google API and potential Composio variations
-                file_id = file_info.get("id", "") or file_info.get("fileId", "")
-                file_name = (
-                    file_info.get("name", "")
-                    or file_info.get("fileName", "")
-                    or "Untitled"
-                )
-                mime_type = file_info.get("mimeType", "") or file_info.get(
-                    "mime_type", ""
-                )
-
-                if not file_id:
-                    documents_skipped += 1
-                    continue
-
-                # Skip folders
-                if mime_type == "application/vnd.google-apps.folder":
-                    continue
-
-                # Generate unique identifier hash
-                unique_identifier_hash = generate_unique_identifier_hash(
-                    DocumentType.COMPOSIO_CONNECTOR, f"drive_{file_id}", search_space_id
-                )
-
-                # Check if document exists
-                existing_document = await check_document_by_unique_identifier(
-                    session, unique_identifier_hash
-                )
-
-                # Get file content
-                (
-                    content,
-                    content_error,
-                ) = await composio_connector.get_drive_file_content(file_id)
-
-                if content_error or not content:
-                    logger.warning(
-                        f"Could not get content for file {file_name}: {content_error}"
-                    )
-                    # Use metadata as content fallback
-                    markdown_content = f"# {file_name}\n\n"
-                    markdown_content += f"**File ID:** {file_id}\n"
-                    markdown_content += f"**Type:** {mime_type}\n"
-                else:
-                    try:
-                        markdown_content = content.decode("utf-8")
-                    except UnicodeDecodeError:
-                        markdown_content = f"# {file_name}\n\n[Binary file content]\n"
-
-                content_hash = generate_content_hash(markdown_content, search_space_id)
-
-                if existing_document:
-                    if existing_document.content_hash == content_hash:
-                        documents_skipped += 1
-                        continue
-
-                    # Update existing document
-                    user_llm = await get_user_long_context_llm(
-                        session, user_id, search_space_id
-                    )
-
-                    if user_llm:
-                        document_metadata = {
-                            "file_id": file_id,
-                            "file_name": file_name,
-                            "mime_type": mime_type,
-                            "document_type": "Google Drive File (Composio)",
-                        }
-                        (
-                            summary_content,
-                            summary_embedding,
-                        ) = await generate_document_summary(
-                            markdown_content, user_llm, document_metadata
-                        )
-                    else:
-                        summary_content = (
-                            f"Google Drive File: {file_name}\n\nType: {mime_type}"
-                        )
-                        summary_embedding = config.embedding_model_instance.embed(
-                            summary_content
-                        )
-
-                    chunks = await create_document_chunks(markdown_content)
-
-                    existing_document.title = f"Drive: {file_name}"
-                    existing_document.content = summary_content
-                    existing_document.content_hash = content_hash
-                    existing_document.embedding = summary_embedding
-                    existing_document.document_metadata = {
-                        "file_id": file_id,
-                        "file_name": file_name,
-                        "mime_type": mime_type,
-                        "connector_id": connector_id,
-                        "source": "composio",
-                    }
-                    existing_document.chunks = chunks
-                    existing_document.updated_at = get_current_timestamp()
-
-                    documents_indexed += 1
-                    continue
-
-                # Create new document
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "file_id": file_id,
-                        "file_name": file_name,
-                        "mime_type": mime_type,
-                        "document_type": "Google Drive File (Composio)",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        markdown_content, user_llm, document_metadata
-                    )
-                else:
-                    summary_content = (
-                        f"Google Drive File: {file_name}\n\nType: {mime_type}"
-                    )
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                chunks = await create_document_chunks(markdown_content)
-
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=f"Drive: {file_name}",
-                    document_type=DocumentType.COMPOSIO_CONNECTOR,
-                    document_metadata={
-                        "file_id": file_id,
-                        "file_name": file_name,
-                        "mime_type": mime_type,
-                        "connector_id": connector_id,
-                        "toolkit_id": "googledrive",
-                        "source": "composio",
-                    },
-                    content=summary_content,
-                    content_hash=content_hash,
-                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                    updated_at=get_current_timestamp(),
-                )
-                session.add(document)
-                documents_indexed += 1
-
-                if documents_indexed % 10 == 0:
-                    await session.commit()
-
-            except Exception as e:
-                logger.error(f"Error processing Drive file: {e!s}", exc_info=True)
-                documents_skipped += 1
-                continue
-
-        if documents_indexed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
-
-        await session.commit()
-
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed Google Drive indexing via Composio for connector {connector_id}",
-            {
-                "documents_indexed": documents_indexed,
-                "documents_skipped": documents_skipped,
-            },
-        )
-
-        return documents_indexed, None
-
-    except Exception as e:
-        logger.error(f"Failed to index Google Drive via Composio: {e!s}", exc_info=True)
-        return 0, f"Failed to index Google Drive via Composio: {e!s}"
-
-
-async def _index_composio_gmail(
-    session: AsyncSession,
-    connector,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None,
-    end_date: str | None,
-    task_logger: TaskLoggingService,
-    log_entry,
-    update_last_indexed: bool = True,
-    max_items: int = 1000,
-) -> tuple[int, str]:
-    """Index Gmail messages via Composio."""
-    try:
-        composio_connector = ComposioConnector(session, connector_id)
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Fetching Gmail messages via Composio for connector {connector_id}",
-            {"stage": "fetching_messages"},
-        )
-
-        # Build query with date range
-        query_parts = []
-        if start_date:
-            query_parts.append(f"after:{start_date.replace('-', '/')}")
-        if end_date:
-            query_parts.append(f"before:{end_date.replace('-', '/')}")
-        query = " ".join(query_parts)
-
-        messages, error = await composio_connector.list_gmail_messages(
-            query=query,
-            max_results=max_items,
-        )
-
-        if error:
-            await task_logger.log_task_failure(
-                log_entry, f"Failed to fetch Gmail messages: {error}", {}
-            )
-            return 0, f"Failed to fetch Gmail messages: {error}"
-
-        if not messages:
-            success_msg = "No Gmail messages found in the specified date range"
-            await task_logger.log_task_success(
-                log_entry, success_msg, {"messages_count": 0}
-            )
-            return 0, success_msg
-
-        logger.info(f"Found {len(messages)} Gmail messages to index via Composio")
-
-        documents_indexed = 0
-        documents_skipped = 0
-
-        for message in messages:
-            try:
-                # Composio uses 'messageId' (camelCase), not 'id'
-                message_id = message.get("messageId", "") or message.get("id", "")
-                if not message_id:
-                    documents_skipped += 1
-                    continue
-
-                # Composio's GMAIL_FETCH_EMAILS already returns full message content
-                # No need for a separate detail API call
-
-                # Extract message info from Composio response
-                # Composio structure: messageId, messageText, messageTimestamp, payload.headers, labelIds
-                payload = message.get("payload", {})
-                headers = payload.get("headers", [])
-
-                subject = "No Subject"
-                sender = "Unknown Sender"
-                date_str = message.get("messageTimestamp", "Unknown Date")
-
-                for header in headers:
-                    name = header.get("name", "").lower()
-                    value = header.get("value", "")
-                    if name == "subject":
-                        subject = value
-                    elif name == "from":
-                        sender = value
-                    elif name == "date":
-                        date_str = value
-
-                # Format to markdown using the full message data
-                markdown_content = composio_connector.format_gmail_message_to_markdown(
-                    message
-                )
-
-                # Generate unique identifier
-                unique_identifier_hash = generate_unique_identifier_hash(
-                    DocumentType.COMPOSIO_CONNECTOR,
-                    f"gmail_{message_id}",
-                    search_space_id,
-                )
-
-                content_hash = generate_content_hash(markdown_content, search_space_id)
-
-                existing_document = await check_document_by_unique_identifier(
-                    session, unique_identifier_hash
-                )
-
-                # Get label IDs from Composio response
-                label_ids = message.get("labelIds", [])
-
-                if existing_document:
-                    if existing_document.content_hash == content_hash:
-                        documents_skipped += 1
-                        continue
-
-                    # Update existing
-                    user_llm = await get_user_long_context_llm(
-                        session, user_id, search_space_id
-                    )
-
-                    if user_llm:
-                        document_metadata = {
-                            "message_id": message_id,
-                            "subject": subject,
-                            "sender": sender,
-                            "document_type": "Gmail Message (Composio)",
-                        }
-                        (
-                            summary_content,
-                            summary_embedding,
-                        ) = await generate_document_summary(
-                            markdown_content, user_llm, document_metadata
-                        )
-                    else:
-                        summary_content = (
-                            f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
-                        )
-                        summary_embedding = config.embedding_model_instance.embed(
-                            summary_content
-                        )
-
-                    chunks = await create_document_chunks(markdown_content)
-
-                    existing_document.title = f"Gmail: {subject}"
-                    existing_document.content = summary_content
-                    existing_document.content_hash = content_hash
-                    existing_document.embedding = summary_embedding
-                    existing_document.document_metadata = {
-                        "message_id": message_id,
-                        "subject": subject,
-                        "sender": sender,
-                        "date": date_str,
-                        "labels": label_ids,
-                        "connector_id": connector_id,
-                        "source": "composio",
-                    }
-                    existing_document.chunks = chunks
-                    existing_document.updated_at = get_current_timestamp()
-
-                    documents_indexed += 1
-                    continue
-
-                # Create new document
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "message_id": message_id,
-                        "subject": subject,
-                        "sender": sender,
-                        "document_type": "Gmail Message (Composio)",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        markdown_content, user_llm, document_metadata
-                    )
-                else:
-                    summary_content = (
-                        f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
-                    )
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                chunks = await create_document_chunks(markdown_content)
-
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=f"Gmail: {subject}",
-                    document_type=DocumentType.COMPOSIO_CONNECTOR,
-                    document_metadata={
-                        "message_id": message_id,
-                        "subject": subject,
-                        "sender": sender,
-                        "date": date_str,
-                        "labels": label_ids,
-                        "connector_id": connector_id,
-                        "toolkit_id": "gmail",
-                        "source": "composio",
-                    },
-                    content=summary_content,
-                    content_hash=content_hash,
-                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                    updated_at=get_current_timestamp(),
-                )
-                session.add(document)
-                documents_indexed += 1
-
-                if documents_indexed % 10 == 0:
-                    await session.commit()
-
-            except Exception as e:
-                logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
-                documents_skipped += 1
-                continue
-
-        if documents_indexed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
-
-        await session.commit()
-
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed Gmail indexing via Composio for connector {connector_id}",
-            {
-                "documents_indexed": documents_indexed,
-                "documents_skipped": documents_skipped,
-            },
-        )
-
-        return documents_indexed, None
-
-    except Exception as e:
-        logger.error(f"Failed to index Gmail via Composio: {e!s}", exc_info=True)
-        return 0, f"Failed to index Gmail via Composio: {e!s}"
-
-
-async def _index_composio_google_calendar(
-    session: AsyncSession,
-    connector,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None,
-    end_date: str | None,
-    task_logger: TaskLoggingService,
-    log_entry,
-    update_last_indexed: bool = True,
-    max_items: int = 2500,
-) -> tuple[int, str]:
-    """Index Google Calendar events via Composio."""
-    from datetime import datetime, timedelta
-
-    try:
-        composio_connector = ComposioConnector(session, connector_id)
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Fetching Google Calendar events via Composio for connector {connector_id}",
-            {"stage": "fetching_events"},
-        )
-
-        # Build time range
-        if start_date:
-            time_min = f"{start_date}T00:00:00Z"
-        else:
-            # Default to 365 days ago
-            default_start = datetime.now() - timedelta(days=365)
-            time_min = default_start.strftime("%Y-%m-%dT00:00:00Z")
-
-        if end_date:
-            time_max = f"{end_date}T23:59:59Z"
-        else:
-            time_max = datetime.now().strftime("%Y-%m-%dT23:59:59Z")
-
-        events, error = await composio_connector.list_calendar_events(
-            time_min=time_min,
-            time_max=time_max,
-            max_results=max_items,
-        )
-
-        if error:
-            await task_logger.log_task_failure(
-                log_entry, f"Failed to fetch Calendar events: {error}", {}
-            )
-            return 0, f"Failed to fetch Calendar events: {error}"
-
-        if not events:
-            success_msg = "No Google Calendar events found in the specified date range"
-            await task_logger.log_task_success(
-                log_entry, success_msg, {"events_count": 0}
-            )
-            return 0, success_msg
-
-        logger.info(f"Found {len(events)} Google Calendar events to index via Composio")
-
-        documents_indexed = 0
-        documents_skipped = 0
-
-        for event in events:
-            try:
-                # Handle both standard Google API and potential Composio variations
-                event_id = event.get("id", "") or event.get("eventId", "")
-                summary = (
-                    event.get("summary", "") or event.get("title", "") or "No Title"
-                )
-
-                if not event_id:
-                    documents_skipped += 1
-                    continue
-
-                # Format to markdown
-                markdown_content = composio_connector.format_calendar_event_to_markdown(
-                    event
-                )
-
-                # Generate unique identifier
-                unique_identifier_hash = generate_unique_identifier_hash(
-                    DocumentType.COMPOSIO_CONNECTOR,
-                    f"calendar_{event_id}",
-                    search_space_id,
-                )
-
-                content_hash = generate_content_hash(markdown_content, search_space_id)
-
-                existing_document = await check_document_by_unique_identifier(
-                    session, unique_identifier_hash
-                )
-
-                # Extract event times
-                start = event.get("start", {})
-                end = event.get("end", {})
-                start_time = start.get("dateTime") or start.get("date", "")
-                end_time = end.get("dateTime") or end.get("date", "")
-                location = event.get("location", "")
-
-                if existing_document:
-                    if existing_document.content_hash == content_hash:
-                        documents_skipped += 1
-                        continue
-
-                    # Update existing
-                    user_llm = await get_user_long_context_llm(
-                        session, user_id, search_space_id
-                    )
-
-                    if user_llm:
-                        document_metadata = {
-                            "event_id": event_id,
-                            "summary": summary,
-                            "start_time": start_time,
-                            "document_type": "Google Calendar Event (Composio)",
-                        }
-                        (
-                            summary_content,
-                            summary_embedding,
-                        ) = await generate_document_summary(
-                            markdown_content, user_llm, document_metadata
-                        )
-                    else:
-                        summary_content = f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
-                        if location:
-                            summary_content += f"\nLocation: {location}"
-                        summary_embedding = config.embedding_model_instance.embed(
-                            summary_content
-                        )
-
-                    chunks = await create_document_chunks(markdown_content)
-
-                    existing_document.title = f"Calendar: {summary}"
-                    existing_document.content = summary_content
-                    existing_document.content_hash = content_hash
-                    existing_document.embedding = summary_embedding
-                    existing_document.document_metadata = {
-                        "event_id": event_id,
-                        "summary": summary,
-                        "start_time": start_time,
-                        "end_time": end_time,
-                        "location": location,
-                        "connector_id": connector_id,
-                        "source": "composio",
-                    }
-                    existing_document.chunks = chunks
-                    existing_document.updated_at = get_current_timestamp()
-
-                    documents_indexed += 1
-                    continue
-
-                # Create new document
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "event_id": event_id,
-                        "summary": summary,
-                        "start_time": start_time,
-                        "document_type": "Google Calendar Event (Composio)",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        markdown_content, user_llm, document_metadata
-                    )
-                else:
-                    summary_content = (
-                        f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
-                    )
-                    if location:
-                        summary_content += f"\nLocation: {location}"
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                chunks = await create_document_chunks(markdown_content)
-
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=f"Calendar: {summary}",
-                    document_type=DocumentType.COMPOSIO_CONNECTOR,
-                    document_metadata={
-                        "event_id": event_id,
-                        "summary": summary,
-                        "start_time": start_time,
-                        "end_time": end_time,
-                        "location": location,
-                        "connector_id": connector_id,
-                        "toolkit_id": "googlecalendar",
-                        "source": "composio",
-                    },
-                    content=summary_content,
-                    content_hash=content_hash,
-                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                    updated_at=get_current_timestamp(),
-                )
-                session.add(document)
-                documents_indexed += 1
-
-                if documents_indexed % 10 == 0:
-                    await session.commit()
-
-            except Exception as e:
-                logger.error(f"Error processing Calendar event: {e!s}", exc_info=True)
-                documents_skipped += 1
-                continue
-
-        if documents_indexed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
-
-        await session.commit()
-
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed Google Calendar indexing via Composio for connector {connector_id}",
-            {
-                "documents_indexed": documents_indexed,
-                "documents_skipped": documents_skipped,
-            },
-        )
-
-        return documents_indexed, None
-
-    except Exception as e:
-        logger.error(
-            f"Failed to index Google Calendar via Composio: {e!s}", exc_info=True
-        )
-        return 0, f"Failed to index Google Calendar via Composio: {e!s}"
--- a/surfsense_backend/app/tasks/connector_indexers/base.py
+++ b/surfsense_backend/app/tasks/connector_indexers/base.py
@ -112,6 +112,13 @@ def calculate_date_range(
    Returns:
        Tuple of (start_date_str, end_date_str)
    """
+    # Normalize "undefined" strings to None (from frontend)
+    # This prevents parsing errors and ensures consistent behavior across all indexers
+    if start_date == "undefined" or start_date == "":
+        start_date = None
+    if end_date == "undefined" or end_date == "":
+        end_date = None
+
    if start_date is not None and end_date is not None:
        return start_date, end_date

--- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
@ -136,10 +136,9 @@ async def index_bookstack_pages(
            )

            if error:
-                logger.error(f"Failed to get BookStack pages: {error}")
-
                # Don't treat "No pages found" as an error that should stop indexing
                if "No pages found" in error:
+                    logger.info(f"No BookStack pages found: {error}")
                    logger.info(
                        "No pages found is not a critical error, continuing with update"
                    )
@ -159,6 +158,7 @@ async def index_bookstack_pages(
                    )
                    return 0, None
                else:
+                    logger.error(f"Failed to get BookStack pages: {error}")
                    await task_logger.log_task_failure(
                        log_entry,
                        f"Failed to get BookStack pages: {error}",
--- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
@ -120,10 +120,9 @@ async def index_confluence_pages(
            )

            if error:
-                logger.error(f"Failed to get Confluence pages: {error}")
-
                # Don't treat "No pages found" as an error that should stop indexing
                if "No pages found" in error:
+                    logger.info(f"No Confluence pages found: {error}")
                    logger.info(
                        "No pages found is not a critical error, continuing with update"
                    )
@ -147,6 +146,7 @@ async def index_confluence_pages(
                            await confluence_client.close()
                    return 0, None
                else:
+                    logger.error(f"Failed to get Confluence pages: {error}")
                    await task_logger.log_task_failure(
                        log_entry,
                        f"Failed to get Confluence pages: {error}",
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@ -4,6 +4,8 @@ Google Calendar connector indexer.

 from datetime import datetime, timedelta

+import pytz
+from dateutil.parser import isoparse
 from google.oauth2.credentials import Credentials
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
@ -21,6 +23,7 @@ from app.utils.document_converters import (

 from .base import (
    check_document_by_unique_identifier,
+    check_duplicate_document_by_hash,
    get_connector_by_id,
    get_current_timestamp,
    logger,
@ -206,6 +209,23 @@ async def index_google_calendar_events(
            start_date_str = start_date
            end_date_str = end_date

+            # If start_date and end_date are the same, adjust end_date to be one day later
+            # to ensure valid date range (start_date must be strictly before end_date)
+            if start_date_str == end_date_str:
+                # Parse the date and add one day to ensure valid range
+                dt = isoparse(end_date_str)
+                if dt.tzinfo is None:
+                    dt = dt.replace(tzinfo=pytz.UTC)
+                else:
+                    dt = dt.astimezone(pytz.UTC)
+                # Add one day to end_date to make it strictly after start_date
+                dt_end = dt + timedelta(days=1)
+                end_date_str = dt_end.strftime("%Y-%m-%d")
+                logger.info(
+                    f"Adjusted end_date from {end_date} to {end_date_str} "
+                    f"to ensure valid date range (start_date must be strictly before end_date)"
+                )
+
        await task_logger.log_task_progress(
            log_entry,
            f"Fetching Google Calendar events from {start_date_str} to {end_date_str}",
@ -223,10 +243,9 @@ async def index_google_calendar_events(
            )

            if error:
-                logger.error(f"Failed to get Google Calendar events: {error}")
-
                # Don't treat "No events found" as an error that should stop indexing
                if "No events found" in error:
+                    logger.info(f"No Google Calendar events found: {error}")
                    logger.info(
                        "No events found is not a critical error, continuing with update"
                    )
@ -246,13 +265,25 @@ async def index_google_calendar_events(
                    )
                    return 0, None
                else:
+                    logger.error(f"Failed to get Google Calendar events: {error}")
+                    # Check if this is an authentication error that requires re-authentication
+                    error_message = error
+                    error_type = "APIError"
+                    if (
+                        "re-authenticate" in error.lower()
+                        or "expired or been revoked" in error.lower()
+                        or "authentication failed" in error.lower()
+                    ):
+                        error_message = "Google Calendar authentication failed. Please re-authenticate."
+                        error_type = "AuthenticationError"
+
                    await task_logger.log_task_failure(
                        log_entry,
-                        f"Failed to get Google Calendar events: {error}",
-                        "API Error",
-                        {"error_type": "APIError"},
+                        error_message,
+                        error,
+                        {"error_type": error_type},
                    )
-                    return 0, f"Failed to get Google Calendar events: {error}"
+                    return 0, error_message

            logger.info(f"Retrieved {len(events)} events from Google Calendar API")

@ -263,6 +294,9 @@ async def index_google_calendar_events(
        documents_indexed = 0
        documents_skipped = 0
        skipped_events = []
+        duplicate_content_count = (
+            0  # Track events skipped due to duplicate content_hash
+        )

        for event in events:
            try:
@ -383,6 +417,27 @@ async def index_google_calendar_events(
                        )
                        continue

+                # Document doesn't exist by unique_identifier_hash
+                # Check if a document with the same content_hash exists (from another connector)
+                with session.no_autoflush:
+                    duplicate_by_content = await check_duplicate_document_by_hash(
+                        session, content_hash
+                    )
+
+                if duplicate_by_content:
+                    # A document with the same content already exists (likely from Composio connector)
+                    logger.info(
+                        f"Event {event_summary} already indexed by another connector "
+                        f"(existing document ID: {duplicate_by_content.id}, "
+                        f"type: {duplicate_by_content.document_type}). Skipping to avoid duplicate content."
+                    )
+                    duplicate_content_count += 1
+                    documents_skipped += 1
+                    skipped_events.append(
+                        f"{event_summary} (already indexed by another connector)"
+                    )
+                    continue
+
                # Document doesn't exist - create new one
                # Generate summary with metadata
                user_llm = await get_user_long_context_llm(
@ -475,7 +530,28 @@ async def index_google_calendar_events(
        logger.info(
            f"Final commit: Total {documents_indexed} Google Calendar events processed"
        )
-        await session.commit()
+        try:
+            await session.commit()
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same event was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if duplicates were found
+        warning_message = None
+        if duplicate_content_count > 0:
+            warning_message = f"{duplicate_content_count} skipped (duplicate)"

        await task_logger.log_task_success(
            log_entry,
@ -484,14 +560,16 @@ async def index_google_calendar_events(
                "events_processed": total_processed,
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
+                "duplicate_content_count": duplicate_content_count,
                "skipped_events_count": len(skipped_events),
            },
        )

        logger.info(
-            f"Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped"
+            f"Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped "
+            f"({duplicate_content_count} due to duplicate content from other connectors)"
        )
-        return total_processed, None
+        return total_processed, warning_message

    except SQLAlchemyError as db_error:
        await session.rollback()
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@ -578,7 +578,7 @@ async def _check_rename_only_update(
        - (True, message): Only filename changed, document was updated
        - (False, None): Content changed or new file, needs full processing
    """
-    from sqlalchemy import select
+    from sqlalchemy import String, cast, select
    from sqlalchemy.orm.attributes import flag_modified

    from app.db import Document
@ -603,7 +603,8 @@ async def _check_rename_only_update(
            select(Document).where(
                Document.search_space_id == search_space_id,
                Document.document_type == DocumentType.GOOGLE_DRIVE_FILE,
-                Document.document_metadata["google_drive_file_id"].astext == file_id,
+                cast(Document.document_metadata["google_drive_file_id"], String)
+                == file_id,
            )
        )
        existing_document = result.scalar_one_or_none()
@ -755,7 +756,7 @@ async def _remove_document(session: AsyncSession, file_id: str, search_space_id:

    Handles both new (file_id-based) and legacy (filename-based) hash schemes.
    """
-    from sqlalchemy import select
+    from sqlalchemy import String, cast, select

    from app.db import Document

@ -774,7 +775,8 @@ async def _remove_document(session: AsyncSession, file_id: str, search_space_id:
            select(Document).where(
                Document.search_space_id == search_space_id,
                Document.document_type == DocumentType.GOOGLE_DRIVE_FILE,
-                Document.document_metadata["google_drive_file_id"].astext == file_id,
+                cast(Document.document_metadata["google_drive_file_id"], String)
+                == file_id,
            )
        )
        existing_document = result.scalar_one_or_none()
--- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@ -170,10 +170,21 @@ async def index_google_gmail_messages(
        )

        if error:
+            # Check if this is an authentication error that requires re-authentication
+            error_message = error
+            error_type = "APIError"
+            if (
+                "re-authenticate" in error.lower()
+                or "expired or been revoked" in error.lower()
+                or "authentication failed" in error.lower()
+            ):
+                error_message = "Gmail authentication failed. Please re-authenticate."
+                error_type = "AuthenticationError"
+
            await task_logger.log_task_failure(
-                log_entry, f"Failed to fetch messages: {error}", {}
+                log_entry, error_message, error, {"error_type": error_type}
            )
-            return 0, f"Failed to fetch Gmail messages: {error}"
+            return 0, error_message

        if not messages:
            success_msg = "No Google gmail messages found in the specified date range"
--- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
@ -126,10 +126,9 @@ async def index_jira_issues(
            )

            if error:
-                logger.error(f"Failed to get Jira issues: {error}")
-
                # Don't treat "No issues found" as an error that should stop indexing
                if "No issues found" in error:
+                    logger.info(f"No Jira issues found: {error}")
                    logger.info(
                        "No issues found is not a critical error, continuing with update"
                    )
@ -149,6 +148,7 @@ async def index_jira_issues(
                    )
                    return 0, None
                else:
+                    logger.error(f"Failed to get Jira issues: {error}")
                    await task_logger.log_task_failure(
                        log_entry,
                        f"Failed to get Jira issues: {error}",
--- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
@ -145,10 +145,9 @@ async def index_linear_issues(
            )

            if error:
-                logger.error(f"Failed to get Linear issues: {error}")
-
                # Don't treat "No issues found" as an error that should stop indexing
                if "No issues found" in error:
+                    logger.info(f"No Linear issues found: {error}")
                    logger.info(
                        "No issues found is not a critical error, continuing with update"
                    )
@ -162,6 +161,7 @@ async def index_linear_issues(
                        )
                    return 0, None
                else:
+                    logger.error(f"Failed to get Linear issues: {error}")
                    return 0, f"Failed to get Linear issues: {error}"

            logger.info(f"Retrieved {len(issues)} issues from Linear API")
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@ -116,6 +116,13 @@ async def index_luma_events(

        luma_client = LumaConnector(api_key=api_key)

+        # Handle 'undefined' string from frontend (treat as None)
+        # This prevents "time data 'undefined' does not match format" errors
+        if start_date == "undefined" or start_date == "":
+            start_date = None
+        if end_date == "undefined" or end_date == "":
+            end_date = None
+
        # Calculate date range
        # For calendar connectors, allow future dates to index upcoming events
        if start_date is None or end_date is None:
@ -172,10 +179,9 @@ async def index_luma_events(
            )

            if error:
-                logger.error(f"Failed to get Luma events: {error}")
-
                # Don't treat "No events found" as an error that should stop indexing
                if "No events found" in error or "no events" in error.lower():
+                    logger.info(f"No Luma events found: {error}")
                    logger.info(
                        "No events found is not a critical error, continuing with update"
                    )
@ -195,6 +201,7 @@ async def index_luma_events(
                    )
                    return 0, None
                else:
+                    logger.error(f"Failed to get Luma events: {error}")
                    await task_logger.log_task_failure(
                        log_entry,
                        f"Failed to get Luma events: {error}",
--- a/surfsense_backend/app/utils/connector_naming.py
+++ b/surfsense_backend/app/utils/connector_naming.py
@ -28,6 +28,9 @@ BASE_NAME_FOR_TYPE = {
    SearchSourceConnectorType.CONFLUENCE_CONNECTOR: "Confluence",
    SearchSourceConnectorType.AIRTABLE_CONNECTOR: "Airtable",
    SearchSourceConnectorType.MCP_CONNECTOR: "Model Context Protocol (MCP)",
+    SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR: "Gmail",
+    SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR: "Google Drive",
+    SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR: "Google Calendar",
 }


--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "surf-new-backend"
-version = "0.0.11"
+version = "0.0.12"
 description = "SurfSense Backend"
 requires-python = ">=3.12"
 dependencies = [
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
@ -6545,7 +6545,7 @@ wheels = [

 [[package]]
 name = "surf-new-backend"
-version = "0.0.11"
+version = "0.0.12"
 source = { editable = "." }
 dependencies = [
    { name = "alembic" },