Merge branch 'dev' of https://github.com/MODSetter/SurfSense into dev

2026-04-27 17:56:25 +02:00 · 2026-02-01 18:02:27 -08:00 · 2026-02-01 18:02:27 -08:00 · 8301e0169c
commit 8301e0169c
parent d476e18c54 32ab938329
71 changed files with 2889 additions and 732 deletions
--- a/surfsense_backend/app/agents/new_chat/llm_config.py
+++ b/surfsense_backend/app/agents/new_chat/llm_config.py
@ -32,7 +32,7 @@ PROVIDER_MAP = {
    "GROQ": "groq",
    "COHERE": "cohere",
    "GOOGLE": "gemini",
-    "OLLAMA": "ollama",
+    "OLLAMA": "ollama_chat",
    "MISTRAL": "mistral",
    "AZURE_OPENAI": "azure",
    "OPENROUTER": "openrouter",
--- a/surfsense_backend/app/celery_app.py
+++ b/surfsense_backend/app/celery_app.py
@ -79,6 +79,7 @@ celery_app = Celery(
        "app.tasks.celery_tasks.schedule_checker_task",
        "app.tasks.celery_tasks.blocknote_migration_tasks",
        "app.tasks.celery_tasks.document_reindex_tasks",
+        "app.tasks.celery_tasks.stale_notification_cleanup_task",
    ],
 )

@ -121,4 +122,14 @@ celery_app.conf.beat_schedule = {
            "expires": 30,  # Task expires after 30 seconds if not picked up
        },
    },
+    # Cleanup stale connector indexing notifications every 5 minutes
+    # This detects tasks that crashed or timed out without proper cleanup
+    # and marks their notifications as failed so users don't see perpetual "syncing"
+    "cleanup-stale-indexing-notifications": {
+        "task": "cleanup_stale_indexing_notifications",
+        "schedule": crontab(minute="*/5"),  # Every 5 minutes
+        "options": {
+            "expires": 60,  # Task expires after 60 seconds if not picked up
+        },
+    },
 }
--- a/surfsense_backend/app/connectors/composio_gmail_connector.py
+++ b/surfsense_backend/app/connectors/composio_gmail_connector.py
@ -5,6 +5,8 @@ Provides Gmail specific methods for data retrieval and indexing via Composio.
 """

 import logging
+import time
+from collections.abc import Awaitable, Callable
 from datetime import UTC, datetime
 from typing import Any

@ -26,6 +28,10 @@ from app.utils.document_converters import (
    generate_unique_identifier_hash,
 )

+# Heartbeat configuration
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+HEARTBEAT_INTERVAL_SECONDS = 30
+
 logger = logging.getLogger(__name__)


@ -427,6 +433,7 @@ async def index_composio_gmail(
    log_entry,
    update_last_indexed: bool = True,
    max_items: int = 1000,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str]:
    """Index Gmail messages via Composio with pagination and incremental processing."""
    try:
@ -471,8 +478,16 @@ async def index_composio_gmail(
        total_documents_skipped = 0
        total_messages_fetched = 0
        result_size_estimate = None  # Will be set from first API response
+        last_heartbeat_time = time.time()

        while total_messages_fetched < max_items:
+            # Send heartbeat periodically to indicate task is still alive
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(total_documents_indexed)
+                    last_heartbeat_time = current_time
+
            # Calculate how many messages to fetch in this batch
            remaining = max_items - total_messages_fetched
            current_batch_size = min(batch_size, remaining)
--- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@ -5,6 +5,8 @@ Provides Google Calendar specific methods for data retrieval and indexing via Co
 """

 import logging
+import time
+from collections.abc import Awaitable, Callable
 from datetime import UTC, datetime
 from typing import Any

@ -29,6 +31,10 @@ from app.utils.document_converters import (
    generate_unique_identifier_hash,
 )

+# Heartbeat configuration
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+HEARTBEAT_INTERVAL_SECONDS = 30
+
 logger = logging.getLogger(__name__)


@ -191,6 +197,7 @@ async def index_composio_google_calendar(
    log_entry,
    update_last_indexed: bool = True,
    max_items: int = 2500,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str]:
    """Index Google Calendar events via Composio."""
    try:
@ -262,8 +269,15 @@ async def index_composio_google_calendar(
        duplicate_content_count = (
            0  # Track events skipped due to duplicate content_hash
        )
+        last_heartbeat_time = time.time()

        for event in events:
+            # Send heartbeat periodically to indicate task is still alive
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
            try:
                # Handle both standard Google API and potential Composio variations
                event_id = event.get("id", "") or event.get("eventId", "")
--- a/surfsense_backend/app/connectors/composio_google_drive_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py
@ -9,6 +9,8 @@ import json
 import logging
 import os
 import tempfile
+import time
+from collections.abc import Awaitable, Callable
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
@ -29,6 +31,10 @@ from app.utils.document_converters import (
    generate_unique_identifier_hash,
 )

+# Heartbeat configuration
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+HEARTBEAT_INTERVAL_SECONDS = 30
+
 logger = logging.getLogger(__name__)


@ -552,7 +558,9 @@ def generate_indexing_settings_hash(
        "include_subfolders": indexing_options.get("include_subfolders", True),
        "max_files_per_folder": indexing_options.get("max_files_per_folder", 100),
    }
-    return hashlib.md5(json.dumps(settings, sort_keys=True).encode()).hexdigest()
+    return hashlib.md5(
+        json.dumps(settings, sort_keys=True).encode(), usedforsecurity=False
+    ).hexdigest()


 async def index_composio_google_drive(
@ -565,6 +573,7 @@ async def index_composio_google_drive(
    log_entry,
    update_last_indexed: bool = True,
    max_items: int = 1000,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int, str | None]:
    """Index Google Drive files via Composio with delta sync support.

@ -652,6 +661,7 @@ async def index_composio_google_drive(
                max_items=max_items,
                task_logger=task_logger,
                log_entry=log_entry,
+                on_heartbeat_callback=on_heartbeat_callback,
            )
        else:
            logger.info(
@ -684,6 +694,7 @@ async def index_composio_google_drive(
                max_items=max_items,
                task_logger=task_logger,
                log_entry=log_entry,
+                on_heartbeat_callback=on_heartbeat_callback,
            )

        # Get new page token for next sync (always update after successful sync)
@ -765,6 +776,7 @@ async def _index_composio_drive_delta_sync(
    max_items: int,
    task_logger: TaskLoggingService,
    log_entry,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int, list[str]]:
    """Index Google Drive files using delta sync (only changed files).

@ -774,6 +786,7 @@ async def _index_composio_drive_delta_sync(
    documents_indexed = 0
    documents_skipped = 0
    processing_errors = []
+    last_heartbeat_time = time.time()

    # Fetch all changes with pagination
    all_changes = []
@ -804,6 +817,13 @@ async def _index_composio_drive_delta_sync(
    logger.info(f"Processing {len(all_changes)} changes from delta sync")

    for change in all_changes[:max_items]:
+        # Send heartbeat periodically to indicate task is still alive
+        if on_heartbeat_callback:
+            current_time = time.time()
+            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = current_time
+
        try:
            # Handle removed files
            is_removed = change.get("removed", False)
@ -886,11 +906,13 @@ async def _index_composio_drive_full_scan(
    max_items: int,
    task_logger: TaskLoggingService,
    log_entry,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int, list[str]]:
    """Index Google Drive files using full scan (first sync or when no delta token)."""
    documents_indexed = 0
    documents_skipped = 0
    processing_errors = []
+    last_heartbeat_time = time.time()

    all_files = []

@ -1001,6 +1023,13 @@ async def _index_composio_drive_full_scan(
    )

    for file_info in all_files:
+        # Send heartbeat periodically to indicate task is still alive
+        if on_heartbeat_callback:
+            current_time = time.time()
+            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = current_time
+
        try:
            # Handle both standard Google API and potential Composio variations
            file_id = file_info.get("id", "") or file_info.get("fileId", "")
--- a/surfsense_backend/app/connectors/discord_connector.py
+++ b/surfsense_backend/app/connectors/discord_connector.py
@ -61,6 +61,9 @@ class DiscordConnector(commands.Bot):
            self.token = None
        self._bot_task = None  # Holds the async bot task
        self._is_running = False  # Flag to track if the bot is running
+        self._start_called_event = (
+            asyncio.Event()
+        )  # Event to signal when start() is called

        # Event to confirm bot is ready
        @self.event
@ -226,6 +229,9 @@ class DiscordConnector(commands.Bot):
                )
                return

+            # Signal that we're about to call start() - this allows _wait_until_ready() to proceed
+            self._start_called_event.set()
+
            await self.start(self.token)
            logger.info("Discord bot started successfully.")
        except discord.LoginFailure:
@ -260,6 +266,9 @@ class DiscordConnector(commands.Bot):
        else:
            logger.info("Bot is not running or already disconnected.")

+        # Reset the start event so the connector can be reused
+        self._start_called_event.clear()
+
    def set_token(self, token: str) -> None:
        """
        Set the discord bot token (for backward compatibility).
@ -277,10 +286,16 @@ class DiscordConnector(commands.Bot):
        """Helper to wait until the bot is connected and ready."""
        logger.info("Waiting for the bot to be ready...")

-        # Give the event loop a chance to switch to the bot's startup task.
-        # This allows self.start() to begin initializing the client.
-        # Terrible solution, but necessary to avoid blocking the event loop.
-        await asyncio.sleep(1)  # Yield control to the event loop
+        # Wait for start_bot() to actually call self.start()
+        # This ensures we don't call wait_until_ready() before the client is initialized
+        try:
+            await asyncio.wait_for(self._start_called_event.wait(), timeout=30.0)
+            logger.info("Bot start() has been called, now waiting for ready state...")
+        except TimeoutError:
+            logger.error("start_bot() did not call start() within 30 seconds")
+            raise RuntimeError(
+                "Discord client failed to initialize - start() was never called"
+            ) from None

        try:
            await asyncio.wait_for(self.wait_until_ready(), timeout=60.0)
--- a/surfsense_backend/app/connectors/google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/google_calendar_connector.py
@ -252,12 +252,16 @@ class GoogleCalendarConnector:
            if dt_start.tzinfo is None:
                dt_start = dt_start.replace(hour=0, minute=0, second=0, tzinfo=pytz.UTC)
            else:
-                dt_start = dt_start.astimezone(pytz.UTC).replace(hour=0, minute=0, second=0)
+                dt_start = dt_start.astimezone(pytz.UTC).replace(
+                    hour=0, minute=0, second=0
+                )

            if dt_end.tzinfo is None:
                dt_end = dt_end.replace(hour=23, minute=59, second=59, tzinfo=pytz.UTC)
            else:
-                dt_end = dt_end.astimezone(pytz.UTC).replace(hour=23, minute=59, second=59)
+                dt_end = dt_end.astimezone(pytz.UTC).replace(
+                    hour=23, minute=59, second=59
+                )

            if dt_start >= dt_end:
                return [], (
--- a/surfsense_backend/app/connectors/google_drive/credentials.py
+++ b/surfsense_backend/app/connectors/google_drive/credentials.py
@ -132,6 +132,15 @@ async def get_valid_credentials(
            await session.commit()

        except Exception as e:
+            error_str = str(e)
+            # Check if this is an invalid_grant error (token expired/revoked)
+            if (
+                "invalid_grant" in error_str.lower()
+                or "token has been expired or revoked" in error_str.lower()
+            ):
+                raise Exception(
+                    "Google Drive authentication failed. Please re-authenticate."
+                ) from e
            raise Exception(f"Failed to refresh Google OAuth credentials: {e!s}") from e

    return credentials
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -411,21 +411,6 @@ class NewChatThread(BaseModel, TimestampMixin):
        index=True,
    )

-    # Public sharing - cryptographic token for public URL access
-    public_share_token = Column(
-        String(64),
-        nullable=True,
-        unique=True,
-        index=True,
-    )
-    # Whether public sharing is currently enabled for this thread
-    public_share_enabled = Column(
-        Boolean,
-        nullable=False,
-        default=False,
-        server_default="false",
-    )
-
    # Clone tracking - for audit and history bootstrap
    cloned_from_thread_id = Column(
        Integer,
@ -433,6 +418,12 @@ class NewChatThread(BaseModel, TimestampMixin):
        nullable=True,
        index=True,
    )
+    cloned_from_snapshot_id = Column(
+        Integer,
+        ForeignKey("public_chat_snapshots.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
    cloned_at = Column(
        TIMESTAMP(timezone=True),
        nullable=True,
@ -444,13 +435,6 @@ class NewChatThread(BaseModel, TimestampMixin):
        default=False,
        server_default="false",
    )
-    # Flag indicating content clone is pending (two-phase clone)
-    clone_pending = Column(
-        Boolean,
-        nullable=False,
-        default=False,
-        server_default="false",
-    )

    # Relationships
    search_space = relationship("SearchSpace", back_populates="new_chat_threads")
@ -461,6 +445,12 @@ class NewChatThread(BaseModel, TimestampMixin):
        order_by="NewChatMessage.created_at",
        cascade="all, delete-orphan",
    )
+    snapshots = relationship(
+        "PublicChatSnapshot",
+        back_populates="thread",
+        cascade="all, delete-orphan",
+        foreign_keys="[PublicChatSnapshot.thread_id]",
+    )


 class NewChatMessage(BaseModel, TimestampMixin):
@ -501,6 +491,65 @@ class NewChatMessage(BaseModel, TimestampMixin):
    )


+class PublicChatSnapshot(BaseModel, TimestampMixin):
+    """
+    Immutable snapshot of a chat thread for public sharing.
+
+    Each snapshot is a frozen copy of the chat at a specific point in time.
+    The snapshot_data JSONB contains all messages and metadata needed to
+    render the public chat without querying the original thread.
+    """
+
+    __tablename__ = "public_chat_snapshots"
+
+    # Link to original thread - CASCADE DELETE when thread is deleted
+    thread_id = Column(
+        Integer,
+        ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+
+    # Public access token (unique URL identifier)
+    share_token = Column(
+        String(64),
+        nullable=False,
+        unique=True,
+        index=True,
+    )
+
+    content_hash = Column(
+        String(64),
+        nullable=False,
+        index=True,
+    )
+
+    snapshot_data = Column(JSONB, nullable=False)
+
+    message_ids = Column(ARRAY(Integer), nullable=False)
+
+    created_by_user_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("user.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+
+    # Relationships
+    thread = relationship(
+        "NewChatThread",
+        back_populates="snapshots",
+        foreign_keys="[PublicChatSnapshot.thread_id]",
+    )
+    created_by = relationship("User")
+
+    # Constraints
+    __table_args__ = (
+        # Prevent duplicate snapshots of the same content for the same thread
+        UniqueConstraint("thread_id", "content_hash", name="uq_snapshot_thread_content_hash"),
+    )
+
+
 class ChatComment(BaseModel, TimestampMixin):
    """
    Comment model for comments on AI chat responses.
--- a/surfsense_backend/app/routes/airtable_add_connector_route.py
+++ b/surfsense_backend/app/routes/airtable_add_connector_route.py
@ -442,11 +442,24 @@ async def refresh_airtable_token(

        if token_response.status_code != 200:
            error_detail = token_response.text
+            error_code = ""
            try:
                error_json = token_response.json()
                error_detail = error_json.get("error_description", error_detail)
+                error_code = error_json.get("error", "")
            except Exception:
                pass
+            # Check if this is a token expiration/revocation error
+            error_lower = (error_detail + error_code).lower()
+            if (
+                "invalid_grant" in error_lower
+                or "expired" in error_lower
+                or "revoked" in error_lower
+            ):
+                raise HTTPException(
+                    status_code=401,
+                    detail="Airtable authentication failed. Please re-authenticate.",
+                )
            raise HTTPException(
                status_code=400, detail=f"Token refresh failed: {error_detail}"
            )
--- a/surfsense_backend/app/routes/clickup_add_connector_route.py
+++ b/surfsense_backend/app/routes/clickup_add_connector_route.py
@ -417,6 +417,17 @@ async def refresh_clickup_token(
                error_detail = error_json.get("error", error_detail)
            except Exception:
                pass
+            # Check if this is a token expiration/revocation error
+            error_lower = error_detail.lower()
+            if (
+                "invalid_grant" in error_lower
+                or "expired" in error_lower
+                or "revoked" in error_lower
+            ):
+                raise HTTPException(
+                    status_code=401,
+                    detail="ClickUp authentication failed. Please re-authenticate.",
+                )
            raise HTTPException(
                status_code=400, detail=f"Token refresh failed: {error_detail}"
            )
--- a/surfsense_backend/app/routes/confluence_add_connector_route.py
+++ b/surfsense_backend/app/routes/confluence_add_connector_route.py
@ -428,13 +428,26 @@ async def refresh_confluence_token(

        if token_response.status_code != 200:
            error_detail = token_response.text
+            error_code = ""
            try:
                error_json = token_response.json()
                error_detail = error_json.get(
                    "error_description", error_json.get("error", error_detail)
                )
+                error_code = error_json.get("error", "")
            except Exception:
                pass
+            # Check if this is a token expiration/revocation error
+            error_lower = (error_detail + error_code).lower()
+            if (
+                "invalid_grant" in error_lower
+                or "expired" in error_lower
+                or "revoked" in error_lower
+            ):
+                raise HTTPException(
+                    status_code=401,
+                    detail="Confluence authentication failed. Please re-authenticate.",
+                )
            raise HTTPException(
                status_code=400, detail=f"Token refresh failed: {error_detail}"
            )
--- a/surfsense_backend/app/routes/discord_add_connector_route.py
+++ b/surfsense_backend/app/routes/discord_add_connector_route.py
@ -46,6 +46,11 @@ SCOPES = [
    "guilds.members.read",  # Read member information
 ]

+# Discord permission bits
+VIEW_CHANNEL = 1 << 10  # 1024
+READ_MESSAGE_HISTORY = 1 << 16  # 65536
+ADMINISTRATOR = 1 << 3  # 8
+
 # Initialize security utilities
 _state_manager = None
 _token_encryption = None
@ -531,3 +536,296 @@ async def refresh_discord_token(
        raise HTTPException(
            status_code=500, detail=f"Failed to refresh Discord tokens: {e!s}"
        ) from e
+
+
+def _compute_channel_permissions(
+    base_permissions: int,
+    bot_role_ids: set[str],
+    bot_user_id: str | None,
+    channel_overwrites: list[dict],
+    guild_id: str,
+) -> int:
+    """
+    Compute effective permissions for a channel based on role permissions and overwrites.
+
+    Discord permission computation follows this order (per official docs):
+    1. Start with base permissions from roles
+    2. Apply @everyone role overwrites (deny, then allow)
+    3. Apply role-specific overwrites (deny, then allow)
+    4. Apply member-specific overwrites (deny, then allow)
+
+    Args:
+        base_permissions: Combined permissions from all bot roles
+        bot_role_ids: Set of role IDs the bot has
+        bot_user_id: The bot's user ID for member-specific overwrites
+        channel_overwrites: List of permission overwrites for the channel
+        guild_id: Guild ID (same as @everyone role ID)
+
+    Returns:
+        Computed permission integer
+    """
+    permissions = base_permissions
+
+    # Permission overwrites are applied in order: @everyone, roles, member
+    everyone_allow = 0
+    everyone_deny = 0
+    role_allow = 0
+    role_deny = 0
+    member_allow = 0
+    member_deny = 0
+
+    for overwrite in channel_overwrites:
+        overwrite_id = overwrite.get("id")
+        overwrite_type = overwrite.get("type")  # 0 = role, 1 = member
+        allow = int(overwrite.get("allow", 0))
+        deny = int(overwrite.get("deny", 0))
+
+        if overwrite_type == 0:  # Role overwrite
+            if overwrite_id == guild_id:  # @everyone role
+                everyone_allow = allow
+                everyone_deny = deny
+            elif overwrite_id in bot_role_ids:
+                role_allow |= allow
+                role_deny |= deny
+        elif overwrite_type == 1 and bot_user_id and overwrite_id == bot_user_id:
+            # Member-specific overwrite for the bot
+            member_allow = allow
+            member_deny = deny
+
+    # Apply in order per Discord docs:
+    # 1. @everyone deny, then allow
+    permissions &= ~everyone_deny
+    permissions |= everyone_allow
+    # 2. Role deny, then allow
+    permissions &= ~role_deny
+    permissions |= role_allow
+    # 3. Member deny, then allow (applied LAST, highest priority)
+    permissions &= ~member_deny
+    permissions |= member_allow
+
+    return permissions
+
+
+@router.get("/discord/connector/{connector_id}/channels", response_model=None)
+async def get_discord_channels(
+    connector_id: int,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    Get list of Discord text channels for a connector with permission info.
+
+    Uses Discord's HTTP REST API directly instead of WebSocket bot connection.
+    Computes effective permissions to determine if bot can read message history.
+
+    Args:
+        connector_id: The Discord connector ID
+        session: Database session
+        user: Current authenticated user
+
+    Returns:
+        List of channels with id, name, type, position, category_id, and can_index fields
+    """
+    from sqlalchemy import select
+
+    try:
+        # Get connector and verify ownership
+        result = await session.execute(
+            select(SearchSourceConnector).where(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.user_id == user.id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.DISCORD_CONNECTOR,
+            )
+        )
+        connector = result.scalar_one_or_none()
+
+        if not connector:
+            raise HTTPException(
+                status_code=404,
+                detail="Discord connector not found or access denied",
+            )
+
+        # Get credentials and decrypt bot token
+        credentials = DiscordAuthCredentialsBase.from_dict(connector.config)
+        token_encryption = get_token_encryption()
+        is_encrypted = connector.config.get("_token_encrypted", False)
+
+        bot_token = credentials.bot_token
+        if is_encrypted and bot_token:
+            try:
+                bot_token = token_encryption.decrypt_token(bot_token)
+            except Exception as e:
+                logger.error(f"Failed to decrypt bot token: {e!s}")
+                raise HTTPException(
+                    status_code=500, detail="Failed to decrypt stored bot token"
+                ) from e
+
+        if not bot_token:
+            raise HTTPException(
+                status_code=400,
+                detail="No bot token available. Please re-authenticate.",
+            )
+
+        # Get guild_id from connector config
+        guild_id = connector.config.get("guild_id")
+        if not guild_id:
+            raise HTTPException(
+                status_code=400,
+                detail="No guild_id associated with this connector. Please reconnect the Discord server.",
+            )
+
+        headers = {"Authorization": f"Bot {bot_token}"}
+
+        async with httpx.AsyncClient() as client:
+            # Fetch bot's user info to get bot user ID
+            bot_user_response = await client.get(
+                "https://discord.com/api/v10/users/@me",
+                headers=headers,
+                timeout=30.0,
+            )
+
+            if bot_user_response.status_code != 200:
+                logger.warning(
+                    f"Failed to fetch bot user info: {bot_user_response.text}"
+                )
+                bot_user_id = None
+            else:
+                bot_user_id = bot_user_response.json().get("id")
+
+            # Fetch guild info to get roles
+            guild_response = await client.get(
+                f"https://discord.com/api/v10/guilds/{guild_id}",
+                headers=headers,
+                timeout=30.0,
+            )
+
+            if guild_response.status_code != 200:
+                raise HTTPException(
+                    status_code=guild_response.status_code,
+                    detail="Failed to fetch guild information",
+                )
+
+            guild_data = guild_response.json()
+            guild_roles = {role["id"]: role for role in guild_data.get("roles", [])}
+
+            # Fetch bot's member info to get its roles
+            bot_member_response = await client.get(
+                f"https://discord.com/api/v10/guilds/{guild_id}/members/{bot_user_id}",
+                headers=headers,
+                timeout=30.0,
+            )
+
+            if bot_member_response.status_code != 200:
+                logger.warning(
+                    f"Failed to fetch bot member info: {bot_member_response.text}"
+                )
+                bot_role_ids = {guild_id}  # At minimum, bot has @everyone role
+                base_permissions = int(
+                    guild_roles.get(guild_id, {}).get("permissions", 0)
+                )
+            else:
+                bot_member_data = bot_member_response.json()
+                bot_role_ids = set(bot_member_data.get("roles", []))
+                bot_role_ids.add(guild_id)  # @everyone role is always included
+
+                # Compute base permissions from all bot roles
+                base_permissions = 0
+                for role_id in bot_role_ids:
+                    if role_id in guild_roles:
+                        role_perms = int(guild_roles[role_id].get("permissions", 0))
+                        base_permissions |= role_perms
+
+            # Check if bot has administrator permission (bypasses all checks)
+            is_admin = (base_permissions & ADMINISTRATOR) == ADMINISTRATOR
+
+            # Fetch channels
+            channels_response = await client.get(
+                f"https://discord.com/api/v10/guilds/{guild_id}/channels",
+                headers=headers,
+                timeout=30.0,
+            )
+
+        if channels_response.status_code == 403:
+            raise HTTPException(
+                status_code=403,
+                detail="Bot does not have permission to view channels in this server. Please ensure the bot has the 'View Channels' permission.",
+            )
+        elif channels_response.status_code == 404:
+            raise HTTPException(
+                status_code=404,
+                detail="Discord server not found. The bot may have been removed from the server.",
+            )
+        elif channels_response.status_code != 200:
+            error_detail = channels_response.text
+            try:
+                error_json = channels_response.json()
+                error_detail = error_json.get("message", error_detail)
+            except Exception:
+                pass
+            raise HTTPException(
+                status_code=channels_response.status_code,
+                detail=f"Failed to fetch Discord channels: {error_detail}",
+            )
+
+        channels_data = channels_response.json()
+
+        # Discord channel types:
+        # 0 = GUILD_TEXT, 2 = GUILD_VOICE, 4 = GUILD_CATEGORY, 5 = GUILD_ANNOUNCEMENT
+        # We want text channels (type 0) and announcement channels (type 5)
+        text_channel_types = {0, 5}
+
+        text_channels = []
+        for ch in channels_data:
+            if ch.get("type") in text_channel_types:
+                # Compute effective permissions for this channel
+                if is_admin:
+                    # Administrators bypass all permission checks
+                    can_index = True
+                else:
+                    channel_overwrites = ch.get("permission_overwrites", [])
+                    effective_perms = _compute_channel_permissions(
+                        base_permissions,
+                        bot_role_ids,
+                        bot_user_id,
+                        channel_overwrites,
+                        guild_id,
+                    )
+
+                    # Bot can index if it has both VIEW_CHANNEL and READ_MESSAGE_HISTORY
+                    has_view = (effective_perms & VIEW_CHANNEL) == VIEW_CHANNEL
+                    has_read_history = (
+                        effective_perms & READ_MESSAGE_HISTORY
+                    ) == READ_MESSAGE_HISTORY
+                    can_index = has_view and has_read_history
+
+                text_channels.append(
+                    {
+                        "id": ch["id"],
+                        "name": ch["name"],
+                        "type": "text" if ch["type"] == 0 else "announcement",
+                        "position": ch.get("position", 0),
+                        "category_id": ch.get("parent_id"),
+                        "can_index": can_index,
+                    }
+                )
+
+        # Sort by position
+        text_channels.sort(key=lambda x: x["position"])
+
+        logger.info(
+            f"Fetched {len(text_channels)} text channels for Discord connector {connector_id}"
+        )
+
+        return text_channels
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(
+            f"Failed to get Discord channels for connector {connector_id}: {e!s}",
+            exc_info=True,
+        )
+        raise HTTPException(
+            status_code=500, detail=f"Failed to get Discord channels: {e!s}"
+        ) from e
--- a/surfsense_backend/app/routes/jira_add_connector_route.py
+++ b/surfsense_backend/app/routes/jira_add_connector_route.py
@ -446,13 +446,26 @@ async def refresh_jira_token(

        if token_response.status_code != 200:
            error_detail = token_response.text
+            error_code = ""
            try:
                error_json = token_response.json()
                error_detail = error_json.get(
                    "error_description", error_json.get("error", error_detail)
                )
+                error_code = error_json.get("error", "")
            except Exception:
                pass
+            # Check if this is a token expiration/revocation error
+            error_lower = (error_detail + error_code).lower()
+            if (
+                "invalid_grant" in error_lower
+                or "expired" in error_lower
+                or "revoked" in error_lower
+            ):
+                raise HTTPException(
+                    status_code=401,
+                    detail="Jira authentication failed. Please re-authenticate.",
+                )
            raise HTTPException(
                status_code=400, detail=f"Token refresh failed: {error_detail}"
            )
--- a/surfsense_backend/app/routes/linear_add_connector_route.py
+++ b/surfsense_backend/app/routes/linear_add_connector_route.py
@ -403,11 +403,24 @@ async def refresh_linear_token(

        if token_response.status_code != 200:
            error_detail = token_response.text
+            error_code = ""
            try:
                error_json = token_response.json()
                error_detail = error_json.get("error_description", error_detail)
+                error_code = error_json.get("error", "")
            except Exception:
                pass
+            # Check if this is a token expiration/revocation error
+            error_lower = (error_detail + error_code).lower()
+            if (
+                "invalid_grant" in error_lower
+                or "expired" in error_lower
+                or "revoked" in error_lower
+            ):
+                raise HTTPException(
+                    status_code=401,
+                    detail="Linear authentication failed. Please re-authenticate.",
+                )
            raise HTTPException(
                status_code=400, detail=f"Token refresh failed: {error_detail}"
            )
--- a/surfsense_backend/app/routes/new_chat_routes.py
+++ b/surfsense_backend/app/routes/new_chat_routes.py
@ -37,7 +37,6 @@ from app.db import (
    get_async_session,
 )
 from app.schemas.new_chat import (
-    CompleteCloneResponse,
    NewChatMessageAppend,
    NewChatMessageRead,
    NewChatRequest,
@ -46,14 +45,13 @@ from app.schemas.new_chat import (
    NewChatThreadUpdate,
    NewChatThreadVisibilityUpdate,
    NewChatThreadWithMessages,
-    PublicShareToggleRequest,
-    PublicShareToggleResponse,
    RegenerateRequest,
+    SnapshotCreateResponse,
+    SnapshotListResponse,
    ThreadHistoryLoadResponse,
    ThreadListItem,
    ThreadListResponse,
 )
-from app.services.public_chat_service import toggle_public_share
 from app.tasks.chat.stream_new_chat import stream_new_chat
 from app.users import current_active_user
 from app.utils.rbac import check_permission
@ -219,7 +217,6 @@ async def list_threads(
                visibility=thread.visibility,
                created_by_id=thread.created_by_id,
                is_own_thread=is_own_thread,
-                public_share_enabled=thread.public_share_enabled,
                created_at=thread.created_at,
                updated_at=thread.updated_at,
            )
@ -321,7 +318,6 @@ async def search_threads(
                    thread.created_by_id == user.id
                    or (thread.created_by_id is None and is_search_space_owner)
                ),
-                public_share_enabled=thread.public_share_enabled,
                created_at=thread.created_at,
                updated_at=thread.updated_at,
            )
@ -670,66 +666,6 @@ async def delete_thread(
        ) from None


-@router.post(
-    "/threads/{thread_id}/complete-clone", response_model=CompleteCloneResponse
-)
-async def complete_clone(
-    thread_id: int,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """
-    Complete the cloning process for a thread.
-
-    Copies messages and podcasts from the source thread.
-    Sets clone_pending=False and needs_history_bootstrap=True when done.
-
-    Requires authentication and ownership of the thread.
-    """
-    from app.services.public_chat_service import complete_clone_content
-
-    try:
-        result = await session.execute(
-            select(NewChatThread).filter(NewChatThread.id == thread_id)
-        )
-        thread = result.scalars().first()
-
-        if not thread:
-            raise HTTPException(status_code=404, detail="Thread not found")
-
-        if thread.created_by_id != user.id:
-            raise HTTPException(status_code=403, detail="Not authorized")
-
-        if not thread.clone_pending:
-            raise HTTPException(status_code=400, detail="Clone already completed")
-
-        if not thread.cloned_from_thread_id:
-            raise HTTPException(
-                status_code=400, detail="No source thread to clone from"
-            )
-
-        message_count = await complete_clone_content(
-            session=session,
-            target_thread=thread,
-            source_thread_id=thread.cloned_from_thread_id,
-            target_search_space_id=thread.search_space_id,
-        )
-
-        return CompleteCloneResponse(
-            status="success",
-            message_count=message_count,
-        )
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        await session.rollback()
-        raise HTTPException(
-            status_code=500,
-            detail=f"An unexpected error occurred while completing clone: {e!s}",
-        ) from None
-
-
@router.patch("/threads/{thread_id}/visibility", response_model=NewChatThreadRead)
 async def update_thread_visibility(
    thread_id: int,
@ -795,32 +731,83 @@ async def update_thread_visibility(
        ) from None


-@router.patch(
-    "/threads/{thread_id}/public-share", response_model=PublicShareToggleResponse
-)
-async def update_thread_public_share(
+# =============================================================================
+# Snapshot Endpoints
+# =============================================================================
+
+
+@router.post("/threads/{thread_id}/snapshots", response_model=SnapshotCreateResponse)
+async def create_thread_snapshot(
    thread_id: int,
    request: Request,
-    toggle_request: PublicShareToggleRequest,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
    """
-    Enable or disable public sharing for a thread.
+    Create a public snapshot of the thread.

-    Only the creator of the thread can manage public sharing.
-    When enabled, returns a public URL that anyone can use to view the chat.
+    Returns existing snapshot URL if content unchanged (deduplication).
+    Only the thread owner can create snapshots.
    """
+    from app.services.public_chat_service import create_snapshot
+
    base_url = str(request.base_url).rstrip("/")
-    return await toggle_public_share(
+    return await create_snapshot(
        session=session,
        thread_id=thread_id,
-        enabled=toggle_request.enabled,
        user=user,
        base_url=base_url,
    )


+@router.get("/threads/{thread_id}/snapshots", response_model=SnapshotListResponse)
+async def list_thread_snapshots(
+    thread_id: int,
+    request: Request,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    List all public snapshots for this thread.
+
+    Only the thread owner can view snapshots.
+    """
+    from app.services.public_chat_service import list_snapshots_for_thread
+
+    base_url = str(request.base_url).rstrip("/")
+    return SnapshotListResponse(
+        snapshots=await list_snapshots_for_thread(
+            session=session,
+            thread_id=thread_id,
+            user=user,
+            base_url=base_url,
+        )
+    )
+
+
+@router.delete("/threads/{thread_id}/snapshots/{snapshot_id}")
+async def delete_thread_snapshot(
+    thread_id: int,
+    snapshot_id: int,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    Delete a specific snapshot.
+
+    Only the thread owner can delete snapshots.
+    """
+    from app.services.public_chat_service import delete_snapshot
+
+    await delete_snapshot(
+        session=session,
+        thread_id=thread_id,
+        snapshot_id=snapshot_id,
+        user=user,
+    )
+    return {"message": "Snapshot deleted successfully"}
+
+
 # =============================================================================
 # Message Endpoints
 # =============================================================================
@ -1286,6 +1273,8 @@ async def regenerate_response(
            .limit(2)
        )
        messages_to_delete = list(last_messages_result.scalars().all())
+        
+        message_ids_to_delete = [msg.id for msg in messages_to_delete]

        # Get search space for LLM config
        search_space_result = await session.execute(
@ -1329,6 +1318,15 @@ async def regenerate_response(
                        for msg in messages_to_delete:
                            await session.delete(msg)
                        await session.commit()
+
+                        # Delete any public snapshots that contain the modified messages
+                        from app.services.public_chat_service import (
+                            delete_affected_snapshots,
+                        )
+
+                        await delete_affected_snapshots(
+                            session, thread_id, message_ids_to_delete
+                        )
                    except Exception as cleanup_error:
                        # Log but don't fail - the new messages are already streamed
                        print(
--- a/surfsense_backend/app/routes/notion_add_connector_route.py
+++ b/surfsense_backend/app/routes/notion_add_connector_route.py
@ -407,11 +407,24 @@ async def refresh_notion_token(

        if token_response.status_code != 200:
            error_detail = token_response.text
+            error_code = ""
            try:
                error_json = token_response.json()
                error_detail = error_json.get("error_description", error_detail)
+                error_code = error_json.get("error", "")
            except Exception:
                pass
+            # Check if this is a token expiration/revocation error
+            error_lower = (error_detail + error_code).lower()
+            if (
+                "invalid_grant" in error_lower
+                or "expired" in error_lower
+                or "revoked" in error_lower
+            ):
+                raise HTTPException(
+                    status_code=401,
+                    detail="Notion authentication failed. Please re-authenticate.",
+                )
            raise HTTPException(
                status_code=400, detail=f"Token refresh failed: {error_detail}"
            )
--- a/surfsense_backend/app/routes/podcasts_routes.py
+++ b/surfsense_backend/app/routes/podcasts_routes.py
@ -23,7 +23,7 @@ from app.db import (
    get_async_session,
 )
 from app.schemas import PodcastRead
-from app.users import current_active_user, current_optional_user
+from app.users import current_active_user
 from app.utils.rbac import check_permission

 router = APIRouter()
@ -82,17 +82,14 @@ async def read_podcasts(
 async def read_podcast(
    podcast_id: int,
    session: AsyncSession = Depends(get_async_session),
-    user: User | None = Depends(current_optional_user),
+    user: User = Depends(current_active_user),
 ):
    """
    Get a specific podcast by ID.

-    Access is allowed if:
-    - User is authenticated with PODCASTS_READ permission, OR
-    - Podcast belongs to a publicly shared thread
+    Requires authentication with PODCASTS_READ permission.
+    For public podcast access, use /public/{share_token}/podcasts/{podcast_id}/stream
    """
-    from app.services.public_chat_service import is_podcast_publicly_accessible
-
    try:
        result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id))
        podcast = result.scalars().first()
@ -103,18 +100,13 @@ async def read_podcast(
                detail="Podcast not found",
            )

-        is_public = await is_podcast_publicly_accessible(session, podcast_id)
-
-        if not is_public:
-            if not user:
-                raise HTTPException(status_code=401, detail="Authentication required")
-            await check_permission(
-                session,
-                user,
-                podcast.search_space_id,
-                Permission.PODCASTS_READ.value,
-                "You don't have permission to read podcasts in this search space",
-            )
+        await check_permission(
+            session,
+            user,
+            podcast.search_space_id,
+            Permission.PODCASTS_READ.value,
+            "You don't have permission to read podcasts in this search space",
+        )

        return PodcastRead.from_orm_with_entries(podcast)
    except HTTPException as he:
@ -168,19 +160,16 @@ async def delete_podcast(
 async def stream_podcast(
    podcast_id: int,
    session: AsyncSession = Depends(get_async_session),
-    user: User | None = Depends(current_optional_user),
+    user: User = Depends(current_active_user),
 ):
    """
    Stream a podcast audio file.

-    Access is allowed if:
-    - User is authenticated with PODCASTS_READ permission, OR
-    - Podcast belongs to a publicly shared thread
+    Requires authentication with PODCASTS_READ permission.
+    For public podcast access, use /public/{share_token}/podcasts/{podcast_id}/stream

    Note: Both /stream and /audio endpoints are supported for compatibility.
    """
-    from app.services.public_chat_service import is_podcast_publicly_accessible
-
    try:
        result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id))
        podcast = result.scalars().first()
@ -188,19 +177,13 @@ async def stream_podcast(
        if not podcast:
            raise HTTPException(status_code=404, detail="Podcast not found")

-        is_public = await is_podcast_publicly_accessible(session, podcast_id)
-
-        if not is_public:
-            if not user:
-                raise HTTPException(status_code=401, detail="Authentication required")
-
-            await check_permission(
-                session,
-                user,
-                podcast.search_space_id,
-                Permission.PODCASTS_READ.value,
-                "You don't have permission to access podcasts in this search space",
-            )
+        await check_permission(
+            session,
+            user,
+            podcast.search_space_id,
+            Permission.PODCASTS_READ.value,
+            "You don't have permission to access podcasts in this search space",
+        )

        file_path = podcast.file_location

--- a/surfsense_backend/app/routes/public_chat_routes.py
+++ b/surfsense_backend/app/routes/public_chat_routes.py
@ -1,21 +1,25 @@
 """
-Routes for public chat access (unauthenticated and mixed-auth endpoints).
+Routes for public chat access via immutable snapshots.
+
+All public endpoints use share_token for access - no authentication required
+for read operations. Clone requires authentication.
 """

-from datetime import UTC, datetime
+import os

 from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import StreamingResponse
 from sqlalchemy.ext.asyncio import AsyncSession

-from app.db import ChatVisibility, NewChatThread, User, get_async_session
+from app.db import User, get_async_session
 from app.schemas.new_chat import (
-    CloneInitResponse,
+    CloneResponse,
    PublicChatResponse,
 )
 from app.services.public_chat_service import (
+    clone_from_snapshot,
    get_public_chat,
-    get_thread_by_share_token,
-    get_user_default_search_space,
+    get_snapshot_podcast,
 )
 from app.users import current_active_user

@ -28,57 +32,85 @@ async def read_public_chat(
    session: AsyncSession = Depends(get_async_session),
 ):
    """
-    Get a public chat by share token.
+    Get a public chat snapshot by share token.

    No authentication required.
-    Returns sanitized content (citations stripped).
+    Returns immutable snapshot data (sanitized, citations stripped).
    """
    return await get_public_chat(session, share_token)


-@router.post("/{share_token}/clone", response_model=CloneInitResponse)
-async def clone_public_chat_endpoint(
+@router.post("/{share_token}/clone", response_model=CloneResponse)
+async def clone_public_chat(
    share_token: str,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
    """
-    Initialize cloning a public chat to the user's account.
-
-    Creates an empty thread with clone_pending=True.
-    Frontend should redirect to the new thread and call /complete-clone.
+    Clone a public chat snapshot to the user's account.

+    Creates thread and copies messages.
    Requires authentication.
    """
-    source_thread = await get_thread_by_share_token(session, share_token)
+    return await clone_from_snapshot(session, share_token, user)

-    if not source_thread:
-        raise HTTPException(
-            status_code=404, detail="Chat not found or no longer public"
-        )

-    target_search_space_id = await get_user_default_search_space(session, user.id)
+@router.get("/{share_token}/podcasts/{podcast_id}")
+async def get_public_podcast(
+    share_token: str,
+    podcast_id: int,
+    session: AsyncSession = Depends(get_async_session),
+):
+    """
+    Get podcast details from a public chat snapshot.

-    if target_search_space_id is None:
-        raise HTTPException(status_code=400, detail="No search space found for user")
+    No authentication required - the share_token provides access.
+    Returns podcast info including transcript.
+    """
+    podcast_info = await get_snapshot_podcast(session, share_token, podcast_id)

-    new_thread = NewChatThread(
-        title=source_thread.title,
-        archived=False,
-        visibility=ChatVisibility.PRIVATE,
-        search_space_id=target_search_space_id,
-        created_by_id=user.id,
-        public_share_enabled=False,
-        cloned_from_thread_id=source_thread.id,
-        cloned_at=datetime.now(UTC),
-        clone_pending=True,
-    )
-    session.add(new_thread)
-    await session.commit()
-    await session.refresh(new_thread)
-
-    return CloneInitResponse(
-        thread_id=new_thread.id,
-        search_space_id=target_search_space_id,
-        share_token=share_token,
+    if not podcast_info:
+        raise HTTPException(status_code=404, detail="Podcast not found")
+
+    return {
+        "id": podcast_info.get("original_id"),
+        "title": podcast_info.get("title"),
+        "status": "ready",
+        "podcast_transcript": podcast_info.get("transcript"),
+    }
+
+
+@router.get("/{share_token}/podcasts/{podcast_id}/stream")
+async def stream_public_podcast(
+    share_token: str,
+    podcast_id: int,
+    session: AsyncSession = Depends(get_async_session),
+):
+    """
+    Stream a podcast from a public chat snapshot.
+
+    No authentication required - the share_token provides access.
+    Looks up podcast by original_id in the snapshot's podcasts array.
+    """
+    podcast_info = await get_snapshot_podcast(session, share_token, podcast_id)
+
+    if not podcast_info:
+        raise HTTPException(status_code=404, detail="Podcast not found")
+
+    file_path = podcast_info.get("file_path")
+
+    if not file_path or not os.path.isfile(file_path):
+        raise HTTPException(status_code=404, detail="Podcast audio file not found")
+
+    def iterfile():
+        with open(file_path, mode="rb") as file_like:
+            yield from file_like
+
+    return StreamingResponse(
+        iterfile(),
+        media_type="audio/mpeg",
+        headers={
+            "Accept-Ranges": "bytes",
+            "Content-Disposition": f"inline; filename={os.path.basename(file_path)}",
+        },
    )
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@ -19,10 +19,12 @@ Non-OAuth connectors (BookStack, GitHub, etc.) are limited to one per search spa
 """

 import logging
+import os
 from datetime import UTC, datetime, timedelta
 from typing import Any

 import pytz
+import redis
 from dateutil.parser import isoparse
 from fastapi import APIRouter, Body, Depends, HTTPException, Query
 from pydantic import BaseModel, Field, ValidationError
@ -78,6 +80,27 @@ from app.utils.rbac import check_permission
 # Set up logging
 logger = logging.getLogger(__name__)

+# Redis client for heartbeat tracking
+_heartbeat_redis_client: redis.Redis | None = None
+
+# Redis key TTL - notification is stale if no heartbeat in this time
+HEARTBEAT_TTL_SECONDS = 120  # 2 minutes
+
+
+def get_heartbeat_redis_client() -> redis.Redis:
+    """Get or create Redis client for heartbeat tracking."""
+    global _heartbeat_redis_client
+    if _heartbeat_redis_client is None:
+        redis_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
+        _heartbeat_redis_client = redis.from_url(redis_url, decode_responses=True)
+    return _heartbeat_redis_client
+
+
+def _get_heartbeat_key(notification_id: int) -> str:
+    """Generate Redis key for notification heartbeat."""
+    return f"indexing:heartbeat:{notification_id}"
+
+
 router = APIRouter()


@ -1137,6 +1160,7 @@ async def run_slack_indexing(
        end_date=end_date,
        indexing_function=index_slack_messages,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -1150,6 +1174,7 @@ async def _run_indexing_with_notifications(
    indexing_function,
    update_timestamp_func=None,
    supports_retry_callback: bool = False,
+    supports_heartbeat_callback: bool = False,
 ):
    """
    Generic helper to run indexing with real-time notifications.
@ -1164,11 +1189,14 @@ async def _run_indexing_with_notifications(
        indexing_function: Async function that performs the indexing
        update_timestamp_func: Optional function to update connector timestamp
        supports_retry_callback: Whether the indexing function supports on_retry_callback
+        supports_heartbeat_callback: Whether the indexing function supports on_heartbeat_callback
    """
    from uuid import UUID

+    from celery.exceptions import SoftTimeLimitExceeded
+
    notification = None
-    # Track indexed count for retry notifications
+    # Track indexed count for retry notifications and heartbeat
    current_indexed_count = 0

    try:
@ -1195,6 +1223,16 @@ async def _run_indexing_with_notifications(
                )
            )

+            # Set initial Redis heartbeat for stale detection
+            if notification:
+                try:
+                    heartbeat_key = _get_heartbeat_key(notification.id)
+                    get_heartbeat_redis_client().setex(
+                        heartbeat_key, HEARTBEAT_TTL_SECONDS, "0"
+                    )
+                except Exception as e:
+                    logger.warning(f"Failed to set initial Redis heartbeat: {e}")
+
        # Update notification to fetching stage
        if notification:
            await NotificationService.connector_indexing.notify_indexing_progress(
@ -1227,6 +1265,40 @@ async def _run_indexing_with_notifications(
                    # Don't let notification errors break the indexing
                    logger.warning(f"Failed to update retry notification: {e}")

+        # Create heartbeat callback for connectors that support it
+        # This updates the notification periodically during long-running indexing loops
+        # to prevent the task from appearing stuck if the worker crashes
+        async def on_heartbeat_callback(indexed_count: int) -> None:
+            """Callback to update notification during indexing (heartbeat)."""
+            nonlocal notification, current_indexed_count
+            current_indexed_count = indexed_count
+            if notification:
+                try:
+                    # Set Redis heartbeat key with TTL (fast, for stale detection)
+                    heartbeat_key = _get_heartbeat_key(notification.id)
+                    get_heartbeat_redis_client().setex(
+                        heartbeat_key, HEARTBEAT_TTL_SECONDS, str(indexed_count)
+                    )
+                except Exception as e:
+                    # Don't let Redis errors break the indexing
+                    logger.warning(f"Failed to set Redis heartbeat: {e}")
+
+                try:
+                    # Still update DB notification for progress display
+                    await session.refresh(notification)
+                    await (
+                        NotificationService.connector_indexing.notify_indexing_progress(
+                            session=session,
+                            notification=notification,
+                            indexed_count=indexed_count,
+                            stage="processing",
+                        )
+                    )
+                    await session.commit()
+                except Exception as e:
+                    # Don't let notification errors break the indexing
+                    logger.warning(f"Failed to update heartbeat notification: {e}")
+
        # Build kwargs for indexing function
        indexing_kwargs = {
            "session": session,
@ -1242,6 +1314,10 @@ async def _run_indexing_with_notifications(
        if supports_retry_callback:
            indexing_kwargs["on_retry_callback"] = on_retry_callback

+        # Add heartbeat callback for connectors that support it
+        if supports_heartbeat_callback:
+            indexing_kwargs["on_heartbeat_callback"] = on_heartbeat_callback
+
        # Run the indexing function
        # Some indexers return (indexed, error), others return (indexed, skipped, error)
        result = await indexing_function(**indexing_kwargs)
@ -1398,6 +1474,32 @@ async def _run_indexing_with_notifications(
                    await (
                        session.commit()
                    )  # Commit to ensure Electric SQL syncs the notification update
+    except SoftTimeLimitExceeded:
+        # Celery soft time limit was reached - task is about to be killed
+        # Gracefully save progress and mark as interrupted
+        logger.warning(
+            f"Soft time limit reached for connector {connector_id}. "
+            f"Saving partial progress: {current_indexed_count} items indexed."
+        )
+
+        if notification:
+            try:
+                await session.refresh(notification)
+                await NotificationService.connector_indexing.notify_indexing_completed(
+                    session=session,
+                    notification=notification,
+                    indexed_count=current_indexed_count,
+                    error_message="Time limit reached. Partial sync completed. Please run again for remaining items.",
+                    is_warning=True,  # Mark as warning since partial data was indexed
+                )
+                await session.commit()
+            except Exception as notif_error:
+                logger.error(
+                    f"Failed to update notification on soft timeout: {notif_error!s}"
+                )
+
+        # Re-raise so Celery knows the task was terminated
+        raise
    except Exception as e:
        logger.error(f"Error in indexing task: {e!s}", exc_info=True)

@ -1409,12 +1511,20 @@ async def _run_indexing_with_notifications(
                await NotificationService.connector_indexing.notify_indexing_completed(
                    session=session,
                    notification=notification,
-                    indexed_count=0,
+                    indexed_count=current_indexed_count,  # Use tracked count, not 0
                    error_message=str(e),
                    skipped_count=None,  # Unknown on exception
                )
            except Exception as notif_error:
                logger.error(f"Failed to update notification: {notif_error!s}")
+    finally:
+        # Clean up Redis heartbeat key when task completes (success or failure)
+        if notification:
+            try:
+                heartbeat_key = _get_heartbeat_key(notification.id)
+                get_heartbeat_redis_client().delete(heartbeat_key)
+            except Exception:
+                pass  # Ignore cleanup errors - key will expire anyway


 async def run_notion_indexing_with_new_session(
@ -1439,6 +1549,7 @@ async def run_notion_indexing_with_new_session(
            indexing_function=index_notion_pages,
            update_timestamp_func=_update_connector_timestamp_by_id,
            supports_retry_callback=True,  # Notion connector supports retry notifications
+            supports_heartbeat_callback=True,  # Notion connector supports heartbeat notifications
        )


@ -1471,6 +1582,7 @@ async def run_notion_indexing(
        indexing_function=index_notion_pages,
        update_timestamp_func=_update_connector_timestamp_by_id,
        supports_retry_callback=True,  # Notion connector supports retry notifications
+        supports_heartbeat_callback=True,  # Notion connector supports heartbeat notifications
    )


@ -1521,6 +1633,7 @@ async def run_github_indexing(
        end_date=end_date,
        indexing_function=index_github_repos,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -1571,6 +1684,7 @@ async def run_linear_indexing(
        end_date=end_date,
        indexing_function=index_linear_issues,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -1620,6 +1734,7 @@ async def run_discord_indexing(
        end_date=end_date,
        indexing_function=index_discord_messages,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -1670,6 +1785,7 @@ async def run_teams_indexing(
        end_date=end_date,
        indexing_function=index_teams_messages,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -1720,6 +1836,7 @@ async def run_jira_indexing(
        end_date=end_date,
        indexing_function=index_jira_issues,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -1772,6 +1889,7 @@ async def run_confluence_indexing(
        end_date=end_date,
        indexing_function=index_confluence_pages,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -1822,6 +1940,7 @@ async def run_clickup_indexing(
        end_date=end_date,
        indexing_function=index_clickup_tasks,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -1872,6 +1991,7 @@ async def run_airtable_indexing(
        end_date=end_date,
        indexing_function=index_airtable_records,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -1924,6 +2044,7 @@ async def run_google_calendar_indexing(
        end_date=end_date,
        indexing_function=index_google_calendar_events,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -1998,6 +2119,7 @@ async def run_google_gmail_indexing(
        end_date=end_date,
        indexing_function=gmail_indexing_wrapper,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -2206,6 +2328,7 @@ async def run_luma_indexing(
        end_date=end_date,
        indexing_function=index_luma_events,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -2257,6 +2380,7 @@ async def run_elasticsearch_indexing(
        end_date=end_date,
        indexing_function=index_elasticsearch_documents,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -2306,6 +2430,7 @@ async def run_web_page_indexing(
        end_date=end_date,
        indexing_function=index_crawled_urls,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -2360,6 +2485,7 @@ async def run_bookstack_indexing(
        end_date=end_date,
        indexing_function=index_bookstack_pages,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -2412,6 +2538,7 @@ async def run_obsidian_indexing(
        end_date=end_date,
        indexing_function=index_obsidian_vault,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


@ -2465,6 +2592,7 @@ async def run_composio_indexing(
        end_date=end_date,
        indexing_function=index_composio_connector,
        update_timestamp_func=_update_connector_timestamp_by_id,
+        supports_heartbeat_callback=True,
    )


--- a/surfsense_backend/app/routes/slack_add_connector_route.py
+++ b/surfsense_backend/app/routes/slack_add_connector_route.py
@ -6,6 +6,7 @@ Handles OAuth 2.0 authentication flow for Slack connector.

 import logging
 from datetime import UTC, datetime, timedelta
+from typing import Any
 from uuid import UUID

 import httpx
@ -14,6 +15,7 @@ from fastapi.responses import RedirectResponse
 from pydantic import ValidationError
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select

 from app.config import config
 from app.db import (
@ -418,6 +420,19 @@ async def refresh_slack_token(
                error_detail = error_json.get("error", error_detail)
            except Exception:
                pass
+            # Check if this is a token expiration/revocation error
+            error_lower = error_detail.lower()
+            if (
+                "invalid_grant" in error_lower
+                or "invalid_auth" in error_lower
+                or "token_revoked" in error_lower
+                or "expired" in error_lower
+                or "revoked" in error_lower
+            ):
+                raise HTTPException(
+                    status_code=401,
+                    detail="Slack authentication failed. Please re-authenticate.",
+                )
            raise HTTPException(
                status_code=400, detail=f"Token refresh failed: {error_detail}"
            )
@ -427,6 +442,20 @@ async def refresh_slack_token(
        # Slack OAuth v2 returns success status in the JSON
        if not token_json.get("ok", False):
            error_msg = token_json.get("error", "Unknown error")
+            # Check if this is a token expiration/revocation error
+            error_lower = error_msg.lower()
+            if (
+                "invalid_grant" in error_lower
+                or "invalid_auth" in error_lower
+                or "invalid_refresh_token" in error_lower
+                or "token_revoked" in error_lower
+                or "expired" in error_lower
+                or "revoked" in error_lower
+            ):
+                raise HTTPException(
+                    status_code=401,
+                    detail="Slack authentication failed. Please re-authenticate.",
+                )
            raise HTTPException(
                status_code=400, detail=f"Slack OAuth refresh error: {error_msg}"
            )
@ -490,3 +519,88 @@ async def refresh_slack_token(
        raise HTTPException(
            status_code=500, detail=f"Failed to refresh Slack token: {e!s}"
        ) from e
+
+
+@router.get("/slack/connector/{connector_id}/channels")
+async def get_slack_channels(
+    connector_id: int,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+) -> list[dict[str, Any]]:
+    """
+    Get list of Slack channels with bot membership status.
+
+    This endpoint fetches all channels the bot can see and indicates
+    whether the bot is a member of each channel (required for accessing messages).
+
+    Args:
+        connector_id: The Slack connector ID
+        session: Database session
+        user: Current authenticated user
+
+    Returns:
+        List of channels with id, name, is_private, and is_member fields
+    """
+    try:
+        # Get the connector and verify ownership
+        result = await session.execute(
+            select(SearchSourceConnector).where(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.user_id == user.id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.SLACK_CONNECTOR,
+            )
+        )
+        connector = result.scalar_one_or_none()
+
+        if not connector:
+            raise HTTPException(
+                status_code=404,
+                detail="Slack connector not found or access denied",
+            )
+
+        # Get credentials and decrypt bot token
+        credentials = SlackAuthCredentialsBase.from_dict(connector.config)
+        token_encryption = get_token_encryption()
+        is_encrypted = connector.config.get("_token_encrypted", False)
+
+        bot_token = credentials.bot_token
+        if is_encrypted and bot_token:
+            try:
+                bot_token = token_encryption.decrypt_token(bot_token)
+            except Exception as e:
+                logger.error(f"Failed to decrypt bot token: {e!s}")
+                raise HTTPException(
+                    status_code=500, detail="Failed to decrypt stored bot token"
+                ) from e
+
+        if not bot_token:
+            raise HTTPException(
+                status_code=400,
+                detail="No bot token available. Please re-authenticate.",
+            )
+
+        # Import SlackHistory here to avoid circular imports
+        from app.connectors.slack_history import SlackHistory
+
+        # Create Slack client with direct token (simple pattern for quick operations)
+        slack_client = SlackHistory(token=bot_token)
+
+        channels = await slack_client.get_all_channels(include_private=True)
+
+        logger.info(
+            f"Fetched {len(channels)} channels for Slack connector {connector_id}"
+        )
+
+        return channels
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(
+            f"Failed to get Slack channels for connector {connector_id}: {e!s}",
+            exc_info=True,
+        )
+        raise HTTPException(
+            status_code=500, detail=f"Failed to get Slack channels: {e!s}"
+        ) from e
--- a/surfsense_backend/app/routes/teams_add_connector_route.py
+++ b/surfsense_backend/app/routes/teams_add_connector_route.py
@ -420,11 +420,24 @@ async def refresh_teams_token(

    if token_response.status_code != 200:
        error_detail = token_response.text
+        error_code = ""
        try:
            error_json = token_response.json()
            error_detail = error_json.get("error_description", error_detail)
+            error_code = error_json.get("error", "")
        except Exception:
            pass
+        # Check if this is a token expiration/revocation error
+        error_lower = (error_detail + error_code).lower()
+        if (
+            "invalid_grant" in error_lower
+            or "expired" in error_lower
+            or "revoked" in error_lower
+        ):
+            raise HTTPException(
+                status_code=401,
+                detail="Microsoft Teams authentication failed. Please re-authenticate.",
+            )
        raise HTTPException(
            status_code=400, detail=f"Token refresh failed: {error_detail}"
        )
--- a/surfsense_backend/app/schemas/new_chat.py
+++ b/surfsense_backend/app/schemas/new_chat.py
@ -95,9 +95,6 @@ class NewChatThreadRead(NewChatThreadBase, IDModel):
    search_space_id: int
    visibility: ChatVisibility
    created_by_id: UUID | None = None
-    public_share_enabled: bool = False
-    public_share_token: str | None = None
-    clone_pending: bool = False
    created_at: datetime
    updated_at: datetime

@ -137,7 +134,6 @@ class ThreadListItem(BaseModel):
    visibility: ChatVisibility
    created_by_id: UUID | None = None
    is_own_thread: bool = False
-    public_share_enabled: bool = False
    created_at: datetime = Field(alias="createdAt")
    updated_at: datetime = Field(alias="updatedAt")

@ -211,22 +207,33 @@ class RegenerateRequest(BaseModel):


 # =============================================================================
-# Public Sharing Schemas
+# Public Chat Snapshot Schemas
 # =============================================================================


-class PublicShareToggleRequest(BaseModel):
-    """Request to enable/disable public sharing for a thread."""
+class SnapshotCreateResponse(BaseModel):
+    """Response after creating a public snapshot."""

-    enabled: bool
+    snapshot_id: int
+    share_token: str
+    public_url: str
+    is_new: bool  # False if existing snapshot returned (same content)


-class PublicShareToggleResponse(BaseModel):
-    """Response after toggling public sharing."""
+class SnapshotInfo(BaseModel):
+    """Info about a single snapshot."""

-    enabled: bool
-    public_url: str | None = None
-    share_token: str | None = None
+    id: int
+    share_token: str
+    public_url: str
+    created_at: datetime
+    message_count: int
+
+
+class SnapshotListResponse(BaseModel):
+    """List of snapshots for a thread."""
+
+    snapshots: list[SnapshotInfo]


 # =============================================================================
@ -256,12 +263,8 @@ class PublicChatResponse(BaseModel):
    messages: list[PublicChatMessage]


-class CloneInitResponse(BaseModel):
+class CloneResponse(BaseModel):
+    """Response after cloning a public snapshot."""
+
    thread_id: int
    search_space_id: int
-    share_token: str
-
-
-class CompleteCloneResponse(BaseModel):
-    status: str
-    message_count: int
--- a/surfsense_backend/app/services/llm_router_service.py
+++ b/surfsense_backend/app/services/llm_router_service.py
@ -32,7 +32,7 @@ PROVIDER_MAP = {
    "GROQ": "groq",
    "COHERE": "cohere",
    "GOOGLE": "gemini",
-    "OLLAMA": "ollama",
+    "OLLAMA": "ollama_chat",
    "MISTRAL": "mistral",
    "AZURE_OPENAI": "azure",
    "OPENROUTER": "openrouter",
--- a/surfsense_backend/app/services/llm_service.py
+++ b/surfsense_backend/app/services/llm_service.py
@ -94,7 +94,7 @@ async def validate_llm_config(
                "GROQ": "groq",
                "COHERE": "cohere",
                "GOOGLE": "gemini",
-                "OLLAMA": "ollama",
+                "OLLAMA": "ollama_chat",
                "MISTRAL": "mistral",
                "AZURE_OPENAI": "azure",
                "OPENROUTER": "openrouter",
@ -241,7 +241,7 @@ async def get_search_space_llm_instance(
                    "GROQ": "groq",
                    "COHERE": "cohere",
                    "GOOGLE": "gemini",
-                    "OLLAMA": "ollama",
+                    "OLLAMA": "ollama_chat",
                    "MISTRAL": "mistral",
                    "AZURE_OPENAI": "azure",
                    "OPENROUTER": "openrouter",
@ -311,7 +311,7 @@ async def get_search_space_llm_instance(
                "GROQ": "groq",
                "COHERE": "cohere",
                "GOOGLE": "gemini",
-                "OLLAMA": "ollama",
+                "OLLAMA": "ollama_chat",
                "MISTRAL": "mistral",
                "AZURE_OPENAI": "azure",
                "OPENROUTER": "openrouter",
--- a/surfsense_backend/app/services/public_chat_service.py
+++ b/surfsense_backend/app/services/public_chat_service.py
@ -1,17 +1,35 @@
 """
-Service layer for public chat sharing and cloning.
+Service layer for public chat sharing via immutable snapshots.
+
+Key concepts:
+- Snapshots are frozen copies of a chat at a specific point in time
+- Content hash enables deduplication (same content = same URL)
+- Podcasts are embedded in snapshot_data for self-contained public views
+- Single-phase clone reads directly from snapshot_data
 """

+import hashlib
+import json
 import re
 import secrets
+from datetime import UTC, datetime
 from uuid import UUID

 from fastapi import HTTPException
-from sqlalchemy import select
+from sqlalchemy import delete, select
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import selectinload

-from app.db import NewChatThread, User
+from app.db import (
+    ChatVisibility,
+    NewChatMessage,
+    NewChatThread,
+    Podcast,
+    PodcastStatus,
+    PublicChatSnapshot,
+    SearchSpaceMembership,
+    User,
+)

 UI_TOOLS = {
    "display_image",
@ -100,20 +118,242 @@ async def get_author_display(
    return user_cache[author_id]


-async def toggle_public_share(
+# =============================================================================
+# Content Hashing
+# =============================================================================
+
+
+def compute_content_hash(messages: list[dict]) -> str:
+    """
+    Compute SHA-256 hash of message content for deduplication.
+
+    The hash is based on message IDs and content, ensuring that:
+    - Same messages = same hash = same URL (deduplication)
+    - Any change = different hash = new URL
+    """
+    # Sort by message ID to ensure consistent ordering
+    sorted_messages = sorted(messages, key=lambda m: m.get("id", 0))
+
+    # Create normalized representation
+    normalized = []
+    for msg in sorted_messages:
+        normalized.append(
+            {
+                "id": msg.get("id"),
+                "role": msg.get("role"),
+                "content": msg.get("content"),
+            }
+        )
+
+    content_str = json.dumps(normalized, sort_keys=True, separators=(",", ":"))
+    return hashlib.sha256(content_str.encode()).hexdigest()
+
+
+# =============================================================================
+# Snapshot Creation
+# =============================================================================
+
+
+async def create_snapshot(
    session: AsyncSession,
    thread_id: int,
-    enabled: bool,
    user: User,
    base_url: str,
 ) -> dict:
    """
-    Enable or disable public sharing for a thread.
+    Create a public snapshot of a chat thread.

-    Only the thread owner can toggle public sharing.
-    When enabling, generates a new token if one doesn't exist.
-    When disabling, keeps the token for potential re-enable.
+    Returns existing snapshot if content unchanged (same hash).
+    Returns new snapshot with unique URL if content changed.
    """
+    result = await session.execute(
+        select(NewChatThread)
+        .options(selectinload(NewChatThread.messages))
+        .filter(NewChatThread.id == thread_id)
+    )
+    thread = result.scalars().first()
+
+    if not thread:
+        raise HTTPException(status_code=404, detail="Thread not found")
+
+    if thread.created_by_id != user.id:
+        raise HTTPException(
+            status_code=403,
+            detail="Only the creator of this chat can create public snapshots",
+        )
+
+    # Build snapshot data
+    user_cache: dict[UUID, dict] = {}
+    messages_data = []
+    message_ids = []
+    podcasts_data = []
+    podcast_ids_seen: set[int] = set()
+
+    for msg in sorted(thread.messages, key=lambda m: m.created_at):
+        author = await get_author_display(session, msg.author_id, user_cache)
+        sanitized_content = sanitize_content_for_public(msg.content)
+
+        # Extract podcast references and update status to "ready" for completed podcasts
+        if isinstance(sanitized_content, list):
+            for part in sanitized_content:
+                if (
+                    isinstance(part, dict)
+                    and part.get("type") == "tool-call"
+                    and part.get("toolName") == "generate_podcast"
+                ):
+                    result_data = part.get("result", {})
+                    podcast_id = result_data.get("podcast_id")
+                    if podcast_id and podcast_id not in podcast_ids_seen:
+                        podcast_info = await _get_podcast_for_snapshot(
+                            session, podcast_id
+                        )
+                        if podcast_info:
+                            podcasts_data.append(podcast_info)
+                            podcast_ids_seen.add(podcast_id)
+                            # Update status to "ready" so frontend renders PodcastPlayer
+                            part["result"] = {**result_data, "status": "ready"}
+
+
+        messages_data.append(
+            {
+                "id": msg.id,
+                "role": msg.role.value if hasattr(msg.role, "value") else str(msg.role),
+                "content": sanitized_content,
+                "author": author,
+                "author_id": str(msg.author_id) if msg.author_id else None,
+                "created_at": msg.created_at.isoformat() if msg.created_at else None,
+            }
+        )
+        message_ids.append(msg.id)
+
+    if not messages_data:
+        raise HTTPException(status_code=400, detail="Cannot share an empty chat")
+
+    # Compute content hash for deduplication
+    content_hash = compute_content_hash(messages_data)
+
+    # Check if identical snapshot already exists
+    existing_result = await session.execute(
+        select(PublicChatSnapshot).filter(
+            PublicChatSnapshot.thread_id == thread_id,
+            PublicChatSnapshot.content_hash == content_hash,
+        )
+    )
+    existing = existing_result.scalars().first()
+
+    if existing:
+        # Return existing snapshot URL
+        return {
+            "snapshot_id": existing.id,
+            "share_token": existing.share_token,
+            "public_url": f"{base_url}/public/{existing.share_token}",
+            "is_new": False,
+        }
+
+    # Get thread author info
+    thread_author = await get_author_display(session, thread.created_by_id, user_cache)
+
+    # Create snapshot data
+    snapshot_data = {
+        "title": thread.title,
+        "snapshot_at": datetime.now(UTC).isoformat(),
+        "author": thread_author,
+        "messages": messages_data,
+        "podcasts": podcasts_data,
+    }
+
+    # Create new snapshot
+    share_token = secrets.token_urlsafe(48)
+    snapshot = PublicChatSnapshot(
+        thread_id=thread_id,
+        share_token=share_token,
+        content_hash=content_hash,
+        snapshot_data=snapshot_data,
+        message_ids=message_ids,
+        created_by_user_id=user.id,
+    )
+    session.add(snapshot)
+    await session.commit()
+    await session.refresh(snapshot)
+
+    return {
+        "snapshot_id": snapshot.id,
+        "share_token": snapshot.share_token,
+        "public_url": f"{base_url}/public/{snapshot.share_token}",
+        "is_new": True,
+    }
+
+
+async def _get_podcast_for_snapshot(
+    session: AsyncSession,
+    podcast_id: int,
+) -> dict | None:
+    """Get podcast info for embedding in snapshot_data."""
+    result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id))
+    podcast = result.scalars().first()
+
+    if not podcast or podcast.status != PodcastStatus.READY:
+        return None
+
+    return {
+        "original_id": podcast.id,
+        "title": podcast.title,
+        "transcript": podcast.podcast_transcript,
+        "file_path": podcast.file_location,
+    }
+
+
+# =============================================================================
+# Snapshot Retrieval
+# =============================================================================
+
+
+async def get_snapshot_by_token(
+    session: AsyncSession,
+    share_token: str,
+) -> PublicChatSnapshot | None:
+    """Get a snapshot by its share token."""
+    result = await session.execute(
+        select(PublicChatSnapshot).filter(
+            PublicChatSnapshot.share_token == share_token
+        )
+    )
+    return result.scalars().first()
+
+
+async def get_public_chat(
+    session: AsyncSession,
+    share_token: str,
+) -> dict:
+    """
+    Get public chat data from a snapshot.
+
+    Returns sanitized content suitable for public viewing.
+    """
+    snapshot = await get_snapshot_by_token(session, share_token)
+
+    if not snapshot:
+        raise HTTPException(status_code=404, detail="Not found")
+
+    data = snapshot.snapshot_data
+
+    return {
+        "thread": {
+            "title": data.get("title", "Untitled"),
+            "created_at": data.get("snapshot_at"),
+        },
+        "messages": data.get("messages", []),
+    }
+
+
+async def list_snapshots_for_thread(
+    session: AsyncSession,
+    thread_id: int,
+    user: User,
+    base_url: str,
+) -> list[dict]:
+    """List all public snapshots for a thread."""
+    # Verify ownership
    result = await session.execute(
        select(NewChatThread).filter(NewChatThread.id == thread_id)
    )
@ -125,92 +365,101 @@ async def toggle_public_share(
    if thread.created_by_id != user.id:
        raise HTTPException(
            status_code=403,
-            detail="Only the creator of this chat can manage public sharing",
+            detail="Only the creator can view snapshots",
        )

-    if enabled and not thread.public_share_token:
-        thread.public_share_token = secrets.token_urlsafe(48)
+    # Get snapshots
+    result = await session.execute(
+        select(PublicChatSnapshot)
+        .filter(PublicChatSnapshot.thread_id == thread_id)
+        .order_by(PublicChatSnapshot.created_at.desc())
+    )
+    snapshots = result.scalars().all()

-    thread.public_share_enabled = enabled
-
-    await session.commit()
-    await session.refresh(thread)
-
-    if enabled:
-        return {
-            "enabled": True,
-            "public_url": f"{base_url}/public/{thread.public_share_token}",
-            "share_token": thread.public_share_token,
+    return [
+        {
+            "id": s.id,
+            "share_token": s.share_token,
+            "public_url": f"{base_url}/public/{s.share_token}",
+            "created_at": s.created_at.isoformat() if s.created_at else None,
+            "message_count": len(s.message_ids) if s.message_ids else 0,
        }
-
-    return {
-        "enabled": False,
-        "public_url": None,
-        "share_token": None,
-    }
+        for s in snapshots
+    ]


-async def get_public_chat(
+# =============================================================================
+# Snapshot Deletion
+# =============================================================================
+
+
+async def delete_snapshot(
    session: AsyncSession,
-    share_token: str,
-) -> dict:
-    """
-    Get a public chat by share token.
-
-    Returns sanitized content suitable for public viewing.
-    """
+    thread_id: int,
+    snapshot_id: int,
+    user: User,
+) -> bool:
+    """Delete a specific snapshot. Only thread owner can delete."""
+    # Get snapshot with thread
    result = await session.execute(
-        select(NewChatThread)
-        .options(selectinload(NewChatThread.messages))
+        select(PublicChatSnapshot)
+        .options(selectinload(PublicChatSnapshot.thread))
        .filter(
-            NewChatThread.public_share_token == share_token,
-            NewChatThread.public_share_enabled.is_(True),
+            PublicChatSnapshot.id == snapshot_id,
+            PublicChatSnapshot.thread_id == thread_id,
        )
    )
-    thread = result.scalars().first()
+    snapshot = result.scalars().first()

-    if not thread:
-        raise HTTPException(status_code=404, detail="Not found")
+    if not snapshot:
+        raise HTTPException(status_code=404, detail="Snapshot not found")

-    user_cache: dict[UUID, dict] = {}
-
-    messages = []
-    for msg in sorted(thread.messages, key=lambda m: m.created_at):
-        author = await get_author_display(session, msg.author_id, user_cache)
-        sanitized_content = sanitize_content_for_public(msg.content)
-
-        messages.append(
-            {
-                "role": msg.role,
-                "content": sanitized_content,
-                "author": author,
-                "created_at": msg.created_at,
-            }
+    if snapshot.thread.created_by_id != user.id:
+        raise HTTPException(
+            status_code=403,
+            detail="Only the creator can delete snapshots",
        )

-    return {
-        "thread": {
-            "title": thread.title,
-            "created_at": thread.created_at,
-        },
-        "messages": messages,
-    }
+    await session.delete(snapshot)
+    await session.commit()
+    return True


-async def get_thread_by_share_token(
-    session: AsyncSession,
-    share_token: str,
-) -> NewChatThread | None:
-    """Get a thread by its public share token if sharing is enabled."""
-    result = await session.execute(
-        select(NewChatThread)
-        .options(selectinload(NewChatThread.messages))
-        .filter(
-            NewChatThread.public_share_token == share_token,
-            NewChatThread.public_share_enabled.is_(True),
+async def delete_affected_snapshots(
+    session: AsyncSession,  # noqa: ARG001 - kept for API compatibility
+    thread_id: int,
+    message_ids: list[int],
+) -> int:
+    """
+    Delete snapshots that contain any of the given message IDs.
+
+    Called when messages are edited/deleted/regenerated.
+    Uses independent session to work reliably in streaming response cleanup.
+    """
+    if not message_ids:
+        return 0
+
+    from sqlalchemy.dialects.postgresql import array
+
+    from app.db import async_session_maker
+
+    async with async_session_maker() as independent_session:
+        result = await independent_session.execute(
+            delete(PublicChatSnapshot)
+            .where(PublicChatSnapshot.thread_id == thread_id)
+            .where(PublicChatSnapshot.message_ids.op("&&")(array(message_ids)))
+            .returning(PublicChatSnapshot.id)
        )
-    )
-    return result.scalars().first()
+
+        deleted_ids = result.scalars().all()
+        await independent_session.commit()
+
+        return len(deleted_ids)
+
+
+# =============================================================================
+# Cloning from Snapshot
+# =============================================================================


 async def get_user_default_search_space(
@ -222,8 +471,6 @@ async def get_user_default_search_space(

    Returns the first search space where user is owner, or None if not found.
    """
-    from app.db import SearchSpaceMembership
-
    result = await session.execute(
        select(SearchSpaceMembership)
        .filter(
@ -240,140 +487,153 @@ async def get_user_default_search_space(
    return None


-async def complete_clone_content(
+async def clone_from_snapshot(
    session: AsyncSession,
-    target_thread: NewChatThread,
-    source_thread_id: int,
-    target_search_space_id: int,
-) -> int:
+    share_token: str,
+    user: User,
+) -> dict:
    """
    Copy messages and podcasts from source thread to target thread.

-    Sets clone_pending=False and needs_history_bootstrap=True when done.
-    Returns the number of messages copied.
+    Creates thread and copies messages from snapshot_data.
+    When encountering generate_podcast tool-calls, creates cloned podcast records
+    and updates the podcast_id references inline.
+    Returns the new thread info.
    """
-    from app.db import NewChatMessage
+    import copy

-    result = await session.execute(
-        select(NewChatThread)
-        .options(selectinload(NewChatThread.messages))
-        .filter(NewChatThread.id == source_thread_id)
+    snapshot = await get_snapshot_by_token(session, share_token)
+
+    if not snapshot:
+        raise HTTPException(
+            status_code=404, detail="Chat not found or no longer public"
+        )
+
+    target_search_space_id = await get_user_default_search_space(session, user.id)
+
+    if target_search_space_id is None:
+        raise HTTPException(status_code=400, detail="No search space found for user")
+
+    data = snapshot.snapshot_data
+    messages_data = data.get("messages", [])
+    podcasts_lookup = {p.get("original_id"): p for p in data.get("podcasts", [])}
+
+    new_thread = NewChatThread(
+        title=data.get("title", "Cloned Chat"),
+        archived=False,
+        visibility=ChatVisibility.PRIVATE,
+        search_space_id=target_search_space_id,
+        created_by_id=user.id,
+        cloned_from_thread_id=snapshot.thread_id,
+        cloned_from_snapshot_id=snapshot.id,
+        cloned_at=datetime.now(UTC),
+        needs_history_bootstrap=True,
    )
-    source_thread = result.scalars().first()
+    session.add(new_thread)
+    await session.flush()

-    if not source_thread:
-        raise ValueError("Source thread not found")
+    podcast_id_mapping: dict[int, int] = {}

-    podcast_id_map: dict[int, int] = {}
-    message_count = 0
+    # Check which authors from snapshot still exist in DB
+    author_ids_from_snapshot: set[UUID] = set()
+    for msg_data in messages_data:
+        if author_str := msg_data.get("author_id"):
+            try:
+                author_ids_from_snapshot.add(UUID(author_str))
+            except (ValueError, TypeError):
+                pass

-    for msg in sorted(source_thread.messages, key=lambda m: m.created_at):
-        new_content = sanitize_content_for_public(msg.content)
+    existing_authors: set[UUID] = set()
+    if author_ids_from_snapshot:
+        result = await session.execute(
+            select(User.id).where(User.id.in_(author_ids_from_snapshot))
+        )
+        existing_authors = {row[0] for row in result.fetchall()}

-        if isinstance(new_content, list):
-            for part in new_content:
+    for msg_data in messages_data:
+        role = msg_data.get("role", "user")
+
+        # Use original author if exists, otherwise None
+        author_id = None
+        if author_str := msg_data.get("author_id"):
+            try:
+                parsed_id = UUID(author_str)
+                if parsed_id in existing_authors:
+                    author_id = parsed_id
+            except (ValueError, TypeError):
+                pass
+
+        content = copy.deepcopy(msg_data.get("content", []))
+
+        if isinstance(content, list):
+            for part in content:
                if (
                    isinstance(part, dict)
                    and part.get("type") == "tool-call"
                    and part.get("toolName") == "generate_podcast"
                ):
-                    result_data = part.get("result", {})
-                    old_podcast_id = result_data.get("podcast_id")
-                    if old_podcast_id and old_podcast_id not in podcast_id_map:
-                        new_podcast_id = await _clone_podcast(
-                            session,
-                            old_podcast_id,
-                            target_search_space_id,
-                            target_thread.id,
-                        )
-                        if new_podcast_id:
-                            podcast_id_map[old_podcast_id] = new_podcast_id
+                    result = part.get("result", {})
+                    old_podcast_id = result.get("podcast_id")

-                    if old_podcast_id and old_podcast_id in podcast_id_map:
-                        result_data["podcast_id"] = podcast_id_map[old_podcast_id]
-                    elif old_podcast_id:
-                        # Podcast couldn't be cloned (not ready), remove reference
-                        result_data.pop("podcast_id", None)
+                    if old_podcast_id and old_podcast_id not in podcast_id_mapping:
+                        podcast_info = podcasts_lookup.get(old_podcast_id)
+                        if podcast_info:
+                            new_podcast = Podcast(
+                                title=podcast_info.get("title", "Cloned Podcast"),
+                                podcast_transcript=podcast_info.get("transcript"),
+                                file_location=podcast_info.get("file_path"),
+                                status=PodcastStatus.READY,
+                                search_space_id=target_search_space_id,
+                                thread_id=new_thread.id,
+                            )
+                            session.add(new_podcast)
+                            await session.flush()
+                            podcast_id_mapping[old_podcast_id] = new_podcast.id
+
+                    if old_podcast_id and old_podcast_id in podcast_id_mapping:
+                        part["result"] = {
+                            **result,
+                            "podcast_id": podcast_id_mapping[old_podcast_id],
+                        }

        new_message = NewChatMessage(
-            thread_id=target_thread.id,
-            role=msg.role,
-            content=new_content,
-            author_id=msg.author_id,
-            created_at=msg.created_at,
+            thread_id=new_thread.id,
+            role=role,
+            content=content,
+            author_id=author_id,
        )
        session.add(new_message)
-        message_count += 1
-
-    target_thread.clone_pending = False
-    target_thread.needs_history_bootstrap = True

    await session.commit()
+    await session.refresh(new_thread)

-    return message_count
+    return {
+        "thread_id": new_thread.id,
+        "search_space_id": target_search_space_id,
+    }


-async def _clone_podcast(
+async def get_snapshot_podcast(
    session: AsyncSession,
+    share_token: str,
    podcast_id: int,
-    target_search_space_id: int,
-    target_thread_id: int,
-) -> int | None:
-    """Clone a podcast record and its audio file. Only clones ready podcasts."""
-    import shutil
-    import uuid
-    from pathlib import Path
+) -> dict | None:
+    """
+    Get podcast info from a snapshot by original podcast ID.

-    from app.db import Podcast, PodcastStatus
+    Used for streaming podcast audio from public view.
+    Looks up the podcast by its original_id in the snapshot's podcasts array.
+    """
+    snapshot = await get_snapshot_by_token(session, share_token)

-    result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id))
-    original = result.scalars().first()
-    if not original or original.status != PodcastStatus.READY:
+    if not snapshot:
        return None

-    new_file_path = None
-    if original.file_location:
-        original_path = Path(original.file_location)
-        if original_path.exists():
-            new_filename = f"{uuid.uuid4()}_podcast.mp3"
-            new_dir = Path("podcasts")
-            new_dir.mkdir(parents=True, exist_ok=True)
-            new_file_path = str(new_dir / new_filename)
-            shutil.copy2(original.file_location, new_file_path)
+    podcasts = snapshot.snapshot_data.get("podcasts", [])

-    new_podcast = Podcast(
-        title=original.title,
-        podcast_transcript=original.podcast_transcript,
-        file_location=new_file_path,
-        status=PodcastStatus.READY,
-        search_space_id=target_search_space_id,
-        thread_id=target_thread_id,
-    )
-    session.add(new_podcast)
-    await session.flush()
+    # Find podcast by original_id
+    for podcast in podcasts:
+        if podcast.get("original_id") == podcast_id:
+            return podcast

-    return new_podcast.id
-
-
-async def is_podcast_publicly_accessible(
-    session: AsyncSession,
-    podcast_id: int,
-) -> bool:
-    """
-    Check if a podcast belongs to a publicly shared thread.
-
-    Uses the thread_id foreign key for efficient lookup.
-    """
-    from app.db import Podcast
-
-    result = await session.execute(
-        select(Podcast)
-        .options(selectinload(Podcast.thread))
-        .filter(Podcast.id == podcast_id)
-    )
-    podcast = result.scalars().first()
-
-    if not podcast or not podcast.thread:
-        return False
-
-    return podcast.thread.public_share_enabled
+    return None
--- a/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py
+++ b/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py
@ -0,0 +1,164 @@
+"""Celery task to detect and mark stale connector indexing notifications as failed.
+
+This task runs periodically (every 5 minutes by default) to find notifications
+that are stuck in "in_progress" status but don't have an active Redis heartbeat key.
+These are marked as "failed" to prevent the frontend from showing a perpetual "syncing" state.
+
+Detection mechanism:
+- Active indexing tasks set a Redis key with TTL (2 minutes) as a heartbeat
+- If the task crashes, the Redis key expires automatically
+- This cleanup task checks for in-progress notifications without a Redis heartbeat key
+- Such notifications are marked as failed with O(1) batch UPDATE
+"""
+
+import json
+import logging
+import os
+from datetime import UTC, datetime
+
+import redis
+from sqlalchemy import and_, text
+from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
+from sqlalchemy.future import select
+from sqlalchemy.pool import NullPool
+
+from app.celery_app import celery_app
+from app.config import config
+from app.db import Notification
+
+logger = logging.getLogger(__name__)
+
+# Redis client for checking heartbeats
+_redis_client: redis.Redis | None = None
+
+
+def get_redis_client() -> redis.Redis:
+    """Get or create Redis client for heartbeat checking."""
+    global _redis_client
+    if _redis_client is None:
+        redis_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
+        _redis_client = redis.from_url(redis_url, decode_responses=True)
+    return _redis_client
+
+
+def _get_heartbeat_key(notification_id: int) -> str:
+    """Generate Redis key for notification heartbeat."""
+    return f"indexing:heartbeat:{notification_id}"
+
+
+def get_celery_session_maker():
+    """Create async session maker for Celery tasks."""
+    engine = create_async_engine(
+        config.DATABASE_URL,
+        poolclass=NullPool,
+        echo=False,
+    )
+    return async_sessionmaker(engine, expire_on_commit=False)
+
+
+@celery_app.task(name="cleanup_stale_indexing_notifications")
+def cleanup_stale_indexing_notifications_task():
+    """
+    Check for stale connector indexing notifications and mark them as failed.
+
+    This task finds notifications that:
+    - Have type = 'connector_indexing'
+    - Have metadata.status = 'in_progress'
+    - Do NOT have a corresponding Redis heartbeat key (meaning task crashed)
+
+    And marks them as failed with O(1) batch UPDATE.
+    """
+    import asyncio
+
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    try:
+        loop.run_until_complete(_cleanup_stale_notifications())
+    finally:
+        loop.close()
+
+
+async def _cleanup_stale_notifications():
+    """Find and mark stale connector indexing notifications as failed.
+
+    Uses Redis TTL-based detection:
+    1. Find all in-progress notifications
+    2. Check which ones are missing their Redis heartbeat key
+    3. Mark those as failed with O(1) batch UPDATE using JSONB || operator
+    """
+    async with get_celery_session_maker()() as session:
+        try:
+            # Find all in-progress connector indexing notifications
+            result = await session.execute(
+                select(Notification.id).where(
+                    and_(
+                        Notification.type == "connector_indexing",
+                        Notification.notification_metadata["status"].astext
+                        == "in_progress",
+                    )
+                )
+            )
+            in_progress_ids = [row[0] for row in result.fetchall()]
+
+            if not in_progress_ids:
+                logger.debug("No in-progress connector indexing notifications found")
+                return
+
+            # Check which ones are missing heartbeat keys in Redis
+            redis_client = get_redis_client()
+            stale_notification_ids = []
+
+            for notification_id in in_progress_ids:
+                heartbeat_key = _get_heartbeat_key(notification_id)
+                if not redis_client.exists(heartbeat_key):
+                    stale_notification_ids.append(notification_id)
+
+            if not stale_notification_ids:
+                logger.debug(
+                    f"All {len(in_progress_ids)} in-progress notifications have active Redis heartbeats"
+                )
+                return
+
+            logger.warning(
+                f"Found {len(stale_notification_ids)} stale connector indexing notifications "
+                f"(no Redis heartbeat key): {stale_notification_ids}"
+            )
+
+            # O(1) Batch UPDATE using JSONB || operator
+            # This merges the update data into existing notification_metadata
+            # Also updates title and message for proper UI display
+            error_message = (
+                "Something went wrong while syncing your content. Please retry."
+            )
+
+            update_data = {
+                "status": "failed",
+                "completed_at": datetime.now(UTC).isoformat(),
+                "error_message": error_message,
+                "sync_stage": "failed",
+            }
+
+            await session.execute(
+                text("""
+                    UPDATE notifications 
+                    SET metadata = metadata || CAST(:update_json AS jsonb),
+                        title = 'Failed: ' || COALESCE(metadata->>'connector_name', 'Connector'),
+                        message = :display_message
+                    WHERE id = ANY(:ids)
+                """),
+                {
+                    "update_json": json.dumps(update_data),
+                    "display_message": f"{error_message}",
+                    "ids": stale_notification_ids,
+                },
+            )
+
+            await session.commit()
+            logger.info(
+                f"Successfully marked {len(stale_notification_ids)} stale notifications as failed (batch UPDATE)"
+            )
+
+        except Exception as e:
+            logger.error(f"Error cleaning up stale notifications: {e!s}", exc_info=True)
+            await session.rollback()
--- a/surfsense_backend/app/tasks/composio_indexer.py
+++ b/surfsense_backend/app/tasks/composio_indexer.py
@ -9,6 +9,7 @@ to avoid circular import issues with the connector_indexers package.
 """

 import logging
+from collections.abc import Awaitable, Callable
 from importlib import import_module

 from sqlalchemy.exc import SQLAlchemyError
@ -22,6 +23,9 @@ from app.db import (
 from app.services.composio_service import INDEXABLE_TOOLKITS, TOOLKIT_TO_INDEXER
 from app.services.task_logging_service import TaskLoggingService

+# Type alias for heartbeat callback function
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
 # Set up logging
 logger = logging.getLogger(__name__)

@ -86,6 +90,7 @@ async def index_composio_connector(
    end_date: str | None = None,
    update_last_indexed: bool = True,
    max_items: int = 1000,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int, str | None]:
    """
    Index content from a Composio connector.
@ -102,6 +107,7 @@ async def index_composio_connector(
        end_date: End date for filtering (YYYY-MM-DD format)
        update_last_indexed: Whether to update the last_indexed_at timestamp
        max_items: Maximum number of items to fetch
+        on_heartbeat_callback: Optional callback to report progress for heartbeat updates

    Returns:
        Tuple of (number_of_indexed_items, number_of_skipped_items, error_message or None)
@ -180,6 +186,7 @@ async def index_composio_connector(
            "log_entry": log_entry,
            "update_last_indexed": update_last_indexed,
            "max_items": max_items,
+            "on_heartbeat_callback": on_heartbeat_callback,
        }

        # Add date params for toolkits that support them
--- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
@ -2,6 +2,9 @@
 Airtable connector indexer.
 """

+import time
+from collections.abc import Awaitable, Callable
+
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession

@ -27,6 +30,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_airtable_records(
    session: AsyncSession,
@ -37,6 +46,7 @@ async def index_airtable_records(
    end_date: str | None = None,
    max_records: int = 2500,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index Airtable records for a given connector.
@ -50,6 +60,7 @@ async def index_airtable_records(
        end_date: End date for filtering records (YYYY-MM-DD)
        max_records: Maximum number of records to fetch per table
        update_last_indexed: Whether to update the last_indexed_at timestamp
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple of (number_of_documents_processed, error_message)
@ -127,8 +138,20 @@ async def index_airtable_records(

            logger.info(f"Found {len(bases)} Airtable bases to process")

+            # Heartbeat tracking - update notification periodically to prevent appearing stuck
+            last_heartbeat_time = time.time()
+            total_documents_indexed = 0
+
            # Process each base
            for base in bases:
+                # Check if it's time for a heartbeat update
+                if (
+                    on_heartbeat_callback
+                    and (time.time() - last_heartbeat_time)
+                    >= HEARTBEAT_INTERVAL_SECONDS
+                ):
+                    await on_heartbeat_callback(total_documents_indexed)
+                    last_heartbeat_time = time.time()
                base_id = base.get("id")
                base_name = base.get("name", "Unknown Base")

@ -204,6 +227,15 @@ async def index_airtable_records(
                    documents_skipped = 0
                    # Process each record
                    for record in records:
+                        # Check if it's time for a heartbeat update
+                        if (
+                            on_heartbeat_callback
+                            and (time.time() - last_heartbeat_time)
+                            >= HEARTBEAT_INTERVAL_SECONDS
+                        ):
+                            await on_heartbeat_callback(total_documents_indexed)
+                            last_heartbeat_time = time.time()
+
                        try:
                            # Generate markdown content
                            markdown_content = (
--- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
@ -2,6 +2,8 @@
 BookStack connector indexer.
 """

+import time
+from collections.abc import Awaitable, Callable
 from datetime import datetime

 from sqlalchemy.exc import SQLAlchemyError
@ -29,6 +31,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_bookstack_pages(
    session: AsyncSession,
@ -38,6 +46,7 @@ async def index_bookstack_pages(
    start_date: str | None = None,
    end_date: str | None = None,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index BookStack pages.
@ -50,6 +59,7 @@ async def index_bookstack_pages(
        start_date: Start date for indexing (YYYY-MM-DD format)
        end_date: End date for indexing (YYYY-MM-DD format)
        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -179,7 +189,17 @@ async def index_bookstack_pages(
        skipped_pages = []
        documents_skipped = 0

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        for page in pages:
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = time.time()
            try:
                page_id = page.get("id")
                page_name = page.get("name", "")
--- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
@ -3,6 +3,8 @@ ClickUp connector indexer.
 """

 import contextlib
+import time
+from collections.abc import Awaitable, Callable
 from datetime import datetime

 from sqlalchemy.exc import SQLAlchemyError
@ -29,6 +31,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_clickup_tasks(
    session: AsyncSession,
@ -38,6 +46,7 @@ async def index_clickup_tasks(
    start_date: str | None = None,
    end_date: str | None = None,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index tasks from ClickUp workspace.
@ -50,6 +59,7 @@ async def index_clickup_tasks(
        start_date: Start date for filtering tasks (YYYY-MM-DD format)
        end_date: End date for filtering tasks (YYYY-MM-DD format)
        update_last_indexed: Whether to update the last_indexed_at timestamp
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple of (number of indexed tasks, error message if any)
@ -132,6 +142,9 @@ async def index_clickup_tasks(
        documents_indexed = 0
        documents_skipped = 0

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        # Iterate workspaces and fetch tasks
        for workspace in workspaces:
            workspace_id = workspace.get("id")
@ -170,6 +183,15 @@ async def index_clickup_tasks(
            )

            for task in tasks:
+                # Check if it's time for a heartbeat update
+                if (
+                    on_heartbeat_callback
+                    and (time.time() - last_heartbeat_time)
+                    >= HEARTBEAT_INTERVAL_SECONDS
+                ):
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = time.time()
+
                try:
                    task_id = task.get("id")
                    task_name = task.get("name", "Untitled Task")
--- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
@ -3,6 +3,8 @@ Confluence connector indexer.
 """

 import contextlib
+import time
+from collections.abc import Awaitable, Callable
 from datetime import datetime

 from sqlalchemy.exc import SQLAlchemyError
@ -30,6 +32,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_confluence_pages(
    session: AsyncSession,
@ -39,6 +47,7 @@ async def index_confluence_pages(
    start_date: str | None = None,
    end_date: str | None = None,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index Confluence pages and comments.
@ -51,6 +60,7 @@ async def index_confluence_pages(
        start_date: Start date for indexing (YYYY-MM-DD format)
        end_date: End date for indexing (YYYY-MM-DD format)
        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -175,7 +185,17 @@ async def index_confluence_pages(
        skipped_pages = []
        documents_skipped = 0

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        for page in pages:
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = time.time()
            try:
                page_id = page.get("id")
                page_title = page.get("title", "")
--- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
@ -3,6 +3,8 @@ Discord connector indexer.
 """

 import asyncio
+import time
+from collections.abc import Awaitable, Callable
 from datetime import UTC, datetime, timedelta

 from sqlalchemy.exc import SQLAlchemyError
@ -28,6 +30,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds - update notification every 30 seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_discord_messages(
    session: AsyncSession,
@ -37,6 +45,7 @@ async def index_discord_messages(
    start_date: str | None = None,
    end_date: str | None = None,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index Discord messages from all accessible channels.
@ -49,6 +58,8 @@ async def index_discord_messages(
        start_date: Start date for indexing (YYYY-MM-DD format)
        end_date: End date for indexing (YYYY-MM-DD format)
        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.
+            Called periodically with (indexed_count) to prevent task appearing stuck.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -281,6 +292,9 @@ async def index_discord_messages(
        documents_skipped = 0
        skipped_channels: list[str] = []

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        # Process each guild and channel
        await task_logger.log_task_progress(
            log_entry,
@ -290,6 +304,14 @@ async def index_discord_messages(

        try:
            for guild in guilds:
+                # Check if it's time for a heartbeat update
+                if (
+                    on_heartbeat_callback
+                    and (time.time() - last_heartbeat_time)
+                    >= HEARTBEAT_INTERVAL_SECONDS
+                ):
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = time.time()
                guild_id = guild["id"]
                guild_name = guild["name"]
                logger.info(f"Processing guild: {guild_name} ({guild_id})")
--- a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py
@ -4,6 +4,8 @@ Elasticsearch indexer for SurfSense

 import json
 import logging
+import time
+from collections.abc import Awaitable, Callable
 from datetime import UTC, datetime
 from typing import Any

@ -25,6 +27,12 @@ from .base import (
    get_current_timestamp,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+
 logger = logging.getLogger(__name__)


@ -36,6 +44,7 @@ async def index_elasticsearch_documents(
    start_date: str,
    end_date: str,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index documents from Elasticsearch into SurfSense
@ -48,6 +57,7 @@ async def index_elasticsearch_documents(
        start_date: Start date for indexing (not used for Elasticsearch, kept for compatibility)
        end_date: End date for indexing (not used for Elasticsearch, kept for compatibility)
        update_last_indexed: Whether to update the last indexed timestamp
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple of (number of documents processed, error message if any)
@ -155,6 +165,9 @@ async def index_elasticsearch_documents(

        documents_processed = 0

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        try:
            await task_logger.log_task_progress(
                log_entry,
@ -172,6 +185,15 @@ async def index_elasticsearch_documents(
                size=min(max_documents, 100),  # Scroll in batches
                fields=config.get("ELASTICSEARCH_FIELDS"),
            ):
+                # Check if it's time for a heartbeat update
+                if (
+                    on_heartbeat_callback
+                    and (time.time() - last_heartbeat_time)
+                    >= HEARTBEAT_INTERVAL_SECONDS
+                ):
+                    await on_heartbeat_callback(documents_processed)
+                    last_heartbeat_time = time.time()
+
                if documents_processed >= max_documents:
                    break

--- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
@ -5,6 +5,8 @@ This indexer processes entire repository digests in one pass, dramatically
 reducing LLM API calls compared to the previous file-by-file approach.
 """

+import time
+from collections.abc import Awaitable, Callable
 from datetime import UTC, datetime

 from sqlalchemy.exc import SQLAlchemyError
@ -30,6 +32,12 @@ from .base import (
    logger,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds - update notification every 30 seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+
 # Maximum tokens for a single digest before splitting
 # Most LLMs can handle 128k+ tokens now, but we'll be conservative
 MAX_DIGEST_CHARS = 500_000  # ~125k tokens
@ -43,6 +51,7 @@ async def index_github_repos(
    start_date: str | None = None,  # Ignored - GitHub indexes full repo snapshots
    end_date: str | None = None,  # Ignored - GitHub indexes full repo snapshots
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index GitHub repositories using gitingest for efficient processing.
@ -62,6 +71,7 @@ async def index_github_repos(
        start_date: Ignored - kept for API compatibility
        end_date: Ignored - kept for API compatibility
        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -168,7 +178,18 @@ async def index_github_repos(
            f"Starting gitingest indexing for {len(repo_full_names_to_index)} repositories."
        )

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+        documents_indexed = 0
+
        for repo_full_name in repo_full_names_to_index:
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = time.time()
            if not repo_full_name or not isinstance(repo_full_name, str):
                logger.warning(f"Skipping invalid repository entry: {repo_full_name}")
                continue
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@ -2,10 +2,10 @@
 Google Calendar connector indexer.
 """

+import time
+from collections.abc import Awaitable, Callable
 from datetime import datetime, timedelta

-import pytz
-from dateutil.parser import isoparse
 from google.oauth2.credentials import Credentials
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
@ -30,6 +30,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_google_calendar_events(
    session: AsyncSession,
@ -39,6 +45,7 @@ async def index_google_calendar_events(
    start_date: str | None = None,
    end_date: str | None = None,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index Google Calendar events.
@ -52,6 +59,7 @@ async def index_google_calendar_events(
        end_date: End date for indexing (YYYY-MM-DD format). Can be in the future to index upcoming events.
                  Defaults to today if not provided.
        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -281,7 +289,17 @@ async def index_google_calendar_events(
            0  # Track events skipped due to duplicate content_hash
        )

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        for event in events:
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = time.time()
            try:
                event_id = event.get("id")
                event_summary = event.get("summary", "No Title")
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@ -1,6 +1,8 @@
 """Google Drive indexer using Surfsense file processors."""

 import logging
+import time
+from collections.abc import Awaitable, Callable

 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
@ -24,6 +26,12 @@ from app.tasks.connector_indexers.base import (
 )
 from app.utils.document_converters import generate_unique_identifier_hash

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+
 logger = logging.getLogger(__name__)


@ -38,6 +46,7 @@ async def index_google_drive_files(
    update_last_indexed: bool = True,
    max_files: int = 500,
    include_subfolders: bool = False,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index Google Drive files for a specific connector.
@ -53,6 +62,7 @@ async def index_google_drive_files(
        update_last_indexed: Whether to update last_indexed_at timestamp
        max_files: Maximum number of files to index
        include_subfolders: Whether to recursively index files in subfolders
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple of (number_of_indexed_files, error_message)
@ -147,6 +157,7 @@ async def index_google_drive_files(
                log_entry=log_entry,
                max_files=max_files,
                include_subfolders=include_subfolders,
+                on_heartbeat_callback=on_heartbeat_callback,
            )
        else:
            logger.info(f"Using full scan for connector {connector_id}")
@ -163,6 +174,7 @@ async def index_google_drive_files(
                log_entry=log_entry,
                max_files=max_files,
                include_subfolders=include_subfolders,
+                on_heartbeat_callback=on_heartbeat_callback,
            )

        documents_indexed, documents_skipped = result
@ -383,6 +395,7 @@ async def _index_full_scan(
    log_entry: any,
    max_files: int,
    include_subfolders: bool = False,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int]:
    """Perform full scan indexing of a folder."""
    await task_logger.log_task_progress(
@ -399,10 +412,20 @@ async def _index_full_scan(
    documents_skipped = 0
    files_processed = 0

+    # Heartbeat tracking - update notification periodically to prevent appearing stuck
+    last_heartbeat_time = time.time()
+
    # Queue of folders to process: (folder_id, folder_name)
    folders_to_process = [(folder_id, folder_name)]

    while folders_to_process and files_processed < max_files:
+        # Check if it's time for a heartbeat update
+        if (
+            on_heartbeat_callback
+            and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+        ):
+            await on_heartbeat_callback(documents_indexed)
+            last_heartbeat_time = time.time()
        current_folder_id, current_folder_name = folders_to_process.pop(0)
        logger.info(f"Processing folder: {current_folder_name} ({current_folder_id})")
        page_token = None
@ -485,6 +508,7 @@ async def _index_with_delta_sync(
    log_entry: any,
    max_files: int,
    include_subfolders: bool = False,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int]:
    """Perform delta sync indexing using change tracking.

@ -515,7 +539,17 @@ async def _index_with_delta_sync(
    documents_skipped = 0
    files_processed = 0

+    # Heartbeat tracking - update notification periodically to prevent appearing stuck
+    last_heartbeat_time = time.time()
+
    for change in changes:
+        # Check if it's time for a heartbeat update
+        if (
+            on_heartbeat_callback
+            and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+        ):
+            await on_heartbeat_callback(documents_indexed)
+            last_heartbeat_time = time.time()
        if files_processed >= max_files:
            break

--- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@ -2,6 +2,8 @@
 Google Gmail connector indexer.
 """

+import time
+from collections.abc import Awaitable, Callable
 from datetime import datetime

 from google.oauth2.credentials import Credentials
@ -33,6 +35,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_google_gmail_messages(
    session: AsyncSession,
@ -43,6 +51,7 @@ async def index_google_gmail_messages(
    end_date: str | None = None,
    update_last_indexed: bool = True,
    max_messages: int = 1000,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str]:
    """
    Index Gmail messages for a specific connector.
@ -56,6 +65,7 @@ async def index_google_gmail_messages(
        end_date: End date for filtering messages (YYYY-MM-DD format)
        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
        max_messages: Maximum number of messages to fetch (default: 100)
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple of (number_of_indexed_messages, status_message)
@ -212,7 +222,18 @@ async def index_google_gmail_messages(
        documents_indexed = 0
        skipped_messages = []
        documents_skipped = 0
+
+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        for message in messages:
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = time.time()
            try:
                # Extract message information
                message_id = message.get("id", "")
--- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
@ -3,6 +3,8 @@ Jira connector indexer.
 """

 import contextlib
+import time
+from collections.abc import Awaitable, Callable
 from datetime import datetime

 from sqlalchemy.exc import SQLAlchemyError
@ -30,6 +32,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds - update notification every 30 seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_jira_issues(
    session: AsyncSession,
@ -39,6 +47,7 @@ async def index_jira_issues(
    start_date: str | None = None,
    end_date: str | None = None,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index Jira issues and comments.
@ -51,6 +60,7 @@ async def index_jira_issues(
        start_date: Start date for indexing (YYYY-MM-DD format)
        end_date: End date for indexing (YYYY-MM-DD format)
        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -169,7 +179,17 @@ async def index_jira_issues(
        skipped_issues = []
        documents_skipped = 0

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        for issue in issues:
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = time.time()
            try:
                issue_id = issue.get("key")
                issue_identifier = issue.get("key", "")
--- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
@ -2,6 +2,8 @@
 Linear connector indexer.
 """

+import time
+from collections.abc import Awaitable, Callable
 from datetime import datetime

 from sqlalchemy.exc import SQLAlchemyError
@ -29,6 +31,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds - update notification every 30 seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_linear_issues(
    session: AsyncSession,
@ -38,6 +46,7 @@ async def index_linear_issues(
    start_date: str | None = None,
    end_date: str | None = None,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index Linear issues and comments.
@ -50,6 +59,7 @@ async def index_linear_issues(
        start_date: Start date for indexing (YYYY-MM-DD format)
        end_date: End date for indexing (YYYY-MM-DD format)
        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -188,6 +198,9 @@ async def index_linear_issues(
        documents_skipped = 0
        skipped_issues = []

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        await task_logger.log_task_progress(
            log_entry,
            f"Starting to process {len(issues)} Linear issues",
@ -196,6 +209,14 @@ async def index_linear_issues(

        # Process each issue
        for issue in issues:
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = time.time()
+
            try:
                issue_id = issue.get("id", "")
                issue_identifier = issue.get("identifier", "")
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@ -2,6 +2,8 @@
 Luma connector indexer.
 """

+import time
+from collections.abc import Awaitable, Callable
 from datetime import datetime, timedelta

 from sqlalchemy.exc import SQLAlchemyError
@ -28,6 +30,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_luma_events(
    session: AsyncSession,
@ -37,6 +45,7 @@ async def index_luma_events(
    start_date: str | None = None,
    end_date: str | None = None,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index Luma events.
@ -50,6 +59,7 @@ async def index_luma_events(
        end_date: End date for indexing (YYYY-MM-DD format). Can be in the future to index upcoming events.
                  Defaults to today if not provided.
        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -221,7 +231,17 @@ async def index_luma_events(
        documents_skipped = 0
        skipped_events = []

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        for event in events:
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = time.time()
            try:
                # Luma event structure fields - events have nested 'event' field
                event_data = event.get("event", {})
--- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
@ -2,6 +2,7 @@
 Notion connector indexer.
 """

+import time
 from collections.abc import Awaitable, Callable
 from datetime import datetime

@ -34,6 +35,13 @@ from .base import (
 # Signature: async callback(retry_reason, attempt, max_attempts, wait_seconds) -> None
 RetryCallbackType = Callable[[str, int, int, float], Awaitable[None]]

+# Type alias for heartbeat callback
+# Signature: async callback(indexed_count) -> None
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds - update notification every 30 seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_notion_pages(
    session: AsyncSession,
@ -44,6 +52,7 @@ async def index_notion_pages(
    end_date: str | None = None,
    update_last_indexed: bool = True,
    on_retry_callback: RetryCallbackType | None = None,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index Notion pages from all accessible pages.
@ -59,6 +68,8 @@ async def index_notion_pages(
        on_retry_callback: Optional callback for retry progress notifications.
            Signature: async callback(retry_reason, attempt, max_attempts, wait_seconds)
            retry_reason is one of: 'rate_limit', 'server_error', 'timeout'
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.
+            Called periodically with (indexed_count) to prevent task appearing stuck.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -211,6 +222,9 @@ async def index_notion_pages(
        documents_skipped = 0
        skipped_pages = []

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        await task_logger.log_task_progress(
            log_entry,
            f"Starting to process {len(pages)} Notion pages",
@ -219,6 +233,14 @@ async def index_notion_pages(

        # Process each page
        for page in pages:
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = time.time()
+
            try:
                page_id = page.get("page_id")
                page_title = page.get("title", f"Untitled page ({page_id})")
--- a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py
@ -7,6 +7,8 @@ This connector is only available in self-hosted mode.

 import os
 import re
+import time
+from collections.abc import Awaitable, Callable
 from datetime import UTC, datetime
 from pathlib import Path

@ -35,6 +37,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 def parse_frontmatter(content: str) -> tuple[dict | None, str]:
    """
@ -152,6 +160,7 @@ async def index_obsidian_vault(
    start_date: str | None = None,
    end_date: str | None = None,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index notes from a local Obsidian vault.
@ -167,6 +176,7 @@ async def index_obsidian_vault(
        start_date: Start date for filtering (YYYY-MM-DD format) - optional
        end_date: End date for filtering (YYYY-MM-DD format) - optional
        update_last_indexed: Whether to update the last_indexed_at timestamp
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -305,7 +315,17 @@ async def index_obsidian_vault(
        indexed_count = 0
        skipped_count = 0

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        for file_info in files:
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(indexed_count)
+                last_heartbeat_time = time.time()
            try:
                file_path = file_info["path"]
                relative_path = file_info["relative_path"]
--- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
@ -2,6 +2,8 @@
 Slack connector indexer.
 """

+import time
+from collections.abc import Awaitable, Callable
 from datetime import datetime

 from slack_sdk.errors import SlackApiError
@ -29,6 +31,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds - update notification every 30 seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_slack_messages(
    session: AsyncSession,
@ -38,6 +46,7 @@ async def index_slack_messages(
    start_date: str | None = None,
    end_date: str | None = None,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index Slack messages from all accessible channels.
@ -50,6 +59,8 @@ async def index_slack_messages(
        start_date: Start date for indexing (YYYY-MM-DD format)
        end_date: End date for indexing (YYYY-MM-DD format)
        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.
+            Called periodically with (indexed_count) to prevent task appearing stuck.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -164,6 +175,9 @@ async def index_slack_messages(
        documents_skipped = 0
        skipped_channels = []

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        await task_logger.log_task_progress(
            log_entry,
            f"Starting to process {len(channels)} Slack channels",
@ -172,6 +186,13 @@ async def index_slack_messages(

        # Process each channel
        for channel_obj in channels:
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = time.time()
            channel_id = channel_obj["id"]
            channel_name = channel_obj["name"]
            is_private = channel_obj["is_private"]
--- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
@ -2,6 +2,8 @@
 Microsoft Teams connector indexer.
 """

+import time
+from collections.abc import Awaitable, Callable
 from datetime import UTC

 from sqlalchemy.exc import SQLAlchemyError
@ -28,6 +30,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds - update notification every 30 seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_teams_messages(
    session: AsyncSession,
@ -37,6 +45,7 @@ async def index_teams_messages(
    start_date: str | None = None,
    end_date: str | None = None,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index Microsoft Teams messages from all accessible teams and channels.
@ -49,6 +58,8 @@ async def index_teams_messages(
        start_date: Start date for indexing (YYYY-MM-DD format)
        end_date: End date for indexing (YYYY-MM-DD format)
        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.
+            Called periodically with (indexed_count) to prevent task appearing stuck.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -161,6 +172,9 @@ async def index_teams_messages(
        documents_skipped = 0
        skipped_channels = []

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        await task_logger.log_task_progress(
            log_entry,
            f"Starting to process {len(teams)} Teams",
@ -185,6 +199,14 @@ async def index_teams_messages(

        # Process each team
        for team in teams:
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = time.time()
+
            team_id = team.get("id")
            team_name = team.get("displayName", "Unknown Team")

--- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
@ -2,6 +2,8 @@
 Webcrawler connector indexer.
 """

+import time
+from collections.abc import Awaitable, Callable
 from datetime import datetime

 from sqlalchemy.exc import SQLAlchemyError
@ -29,6 +31,12 @@ from .base import (
    update_connector_last_indexed,
 )

+# Type hint for heartbeat callback
+HeartbeatCallbackType = Callable[[int], Awaitable[None]]
+
+# Heartbeat interval in seconds
+HEARTBEAT_INTERVAL_SECONDS = 30
+

 async def index_crawled_urls(
    session: AsyncSession,
@ -38,6 +46,7 @@ async def index_crawled_urls(
    start_date: str | None = None,
    end_date: str | None = None,
    update_last_indexed: bool = True,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
    Index web page URLs.
@ -50,6 +59,7 @@ async def index_crawled_urls(
        start_date: Start date for filtering (YYYY-MM-DD format) - optional
        end_date: End date for filtering (YYYY-MM-DD format) - optional
        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+        on_heartbeat_callback: Optional callback to update notification during long-running indexing.

    Returns:
        Tuple containing (number of documents indexed, error message or None)
@ -140,7 +150,17 @@ async def index_crawled_urls(
        documents_skipped = 0
        failed_urls = []

+        # Heartbeat tracking - update notification periodically to prevent appearing stuck
+        last_heartbeat_time = time.time()
+
        for idx, url in enumerate(urls, 1):
+            # Check if it's time for a heartbeat update
+            if (
+                on_heartbeat_callback
+                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
+            ):
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = time.time()
            try:
                logger.info(f"Processing URL {idx}/{len(urls)}: {url}")