Merge pull request #790 from MODSetter/dev

feat: new manage docs ui/ux
2026-07-22 23:31:12 +02:00 · 2026-02-05 23:50:42 -08:00 · 2026-02-05 23:50:42 -08:00 · d97068882a
commit d97068882a
parent ac35f9d674 017c262842
65 changed files with 8215 additions and 4388 deletions
--- a/surfsense_backend/alembic/versions/66_add_notifications_table_and_electric_replication.py
+++ b/surfsense_backend/alembic/versions/66_add_notifications_table_and_electric_replication.py
@ -17,13 +17,6 @@ from collections.abc import Sequence

 from alembic import context, op

-# Get Electric SQL user credentials from env.py configuration
-_config = context.config
-ELECTRIC_DB_USER = _config.get_main_option("electric_db_user", "electric")
-ELECTRIC_DB_PASSWORD = _config.get_main_option(
-    "electric_db_password", "electric_password"
-)
-
 # revision identifiers, used by Alembic.
 revision: str = "66"
 down_revision: str | None = "65"
@ -31,8 +24,21 @@ branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None


+def _get_electric_credentials() -> tuple[str, str]:
+    """Get Electric SQL credentials from Alembic config.
+
+    Must be called inside upgrade()/downgrade(), not at module level,
+    because context.config is only available during migration execution.
+    """
+    _config = context.config
+    user = _config.get_main_option("electric_db_user", "electric")
+    password = _config.get_main_option("electric_db_password", "electric_password")
+    return user, password
+
+
 def upgrade() -> None:
    """Upgrade schema - add notifications table and Electric SQL replication."""
+    electric_db_user, electric_db_password = _get_electric_credentials()
    # Create notifications table
    op.execute(
        """
@ -74,8 +80,8 @@ def upgrade() -> None:
        f"""
        DO $$
        BEGIN
-            IF NOT EXISTS (SELECT FROM pg_user WHERE usename = '{ELECTRIC_DB_USER}') THEN
-                CREATE USER {ELECTRIC_DB_USER} WITH REPLICATION PASSWORD '{ELECTRIC_DB_PASSWORD}';
+            IF NOT EXISTS (SELECT FROM pg_user WHERE usename = '{electric_db_user}') THEN
+                CREATE USER {electric_db_user} WITH REPLICATION PASSWORD '{electric_db_password}';
            END IF;
        END
        $$;
@ -89,19 +95,19 @@ def upgrade() -> None:
        DECLARE
            db_name TEXT := current_database();
        BEGIN
-            EXECUTE format('GRANT CONNECT ON DATABASE %I TO {ELECTRIC_DB_USER}', db_name);
+            EXECUTE format('GRANT CONNECT ON DATABASE %I TO {electric_db_user}', db_name);
        END
        $$;
        """
    )
-    op.execute(f"GRANT USAGE ON SCHEMA public TO {ELECTRIC_DB_USER};")
-    op.execute(f"GRANT SELECT ON ALL TABLES IN SCHEMA public TO {ELECTRIC_DB_USER};")
-    op.execute(f"GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO {ELECTRIC_DB_USER};")
+    op.execute(f"GRANT USAGE ON SCHEMA public TO {electric_db_user};")
+    op.execute(f"GRANT SELECT ON ALL TABLES IN SCHEMA public TO {electric_db_user};")
+    op.execute(f"GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO {electric_db_user};")
    op.execute(
-        f"ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO {ELECTRIC_DB_USER};"
+        f"ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO {electric_db_user};"
    )
    op.execute(
-        f"ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON SEQUENCES TO {ELECTRIC_DB_USER};"
+        f"ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON SEQUENCES TO {electric_db_user};"
    )

    # Create the publication if not exists
--- a/surfsense_backend/alembic/versions/94_add_access_token_to_image_generations.py
+++ b/surfsense_backend/alembic/versions/94_add_access_token_to_image_generations.py
@ -10,8 +10,6 @@ SECRET_KEY rotation.

 from collections.abc import Sequence

-import sqlalchemy as sa
-
 from alembic import op

 # revision identifiers, used by Alembic.
@ -23,17 +21,45 @@ depends_on: str | Sequence[str] | None = None

 def upgrade() -> None:
    # Add access_token column (nullable so existing rows are unaffected)
-    op.add_column(
-        "image_generations",
-        sa.Column("access_token", sa.String(64), nullable=True),
-    )
-    op.create_index(
-        "ix_image_generations_access_token",
-        "image_generations",
-        ["access_token"],
+    # Guard: skip entirely if image_generations table doesn't exist
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF EXISTS (
+                SELECT 1 FROM information_schema.tables
+                WHERE table_name = 'image_generations'
+            ) THEN
+                -- Add column if not exists
+                IF NOT EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = 'image_generations' AND column_name = 'access_token'
+                ) THEN
+                    ALTER TABLE image_generations
+                    ADD COLUMN access_token VARCHAR(64);
+                END IF;
+
+                -- Create index if not exists
+                CREATE INDEX IF NOT EXISTS ix_image_generations_access_token
+                ON image_generations (access_token);
+            END IF;
+        END$$;
+        """
    )


 def downgrade() -> None:
-    op.drop_index("ix_image_generations_access_token", table_name="image_generations")
-    op.drop_column("image_generations", "access_token")
+    op.execute("DROP INDEX IF EXISTS ix_image_generations_access_token")
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF EXISTS (
+                SELECT 1 FROM information_schema.columns
+                WHERE table_name = 'image_generations' AND column_name = 'access_token'
+            ) THEN
+                ALTER TABLE image_generations DROP COLUMN access_token;
+            END IF;
+        END$$;
+        """
+    )
--- a/surfsense_backend/alembic/versions/95_add_document_status_column.py
+++ b/surfsense_backend/alembic/versions/95_add_document_status_column.py
@ -0,0 +1,77 @@
+"""Add status column to documents table for per-document processing status
+
+Revision ID: 95
+Revises: 94
+Create Date: 2026-02-05
+
+Changes:
+1. Add status column (JSONB) to documents table
+2. Default value is {"state": "ready"} for backward compatibility
+3. Existing documents are set to ready status
+4. Index created for efficient status filtering
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "95"
+down_revision: str | None = "94"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Add status column to documents with default ready state."""
+
+    # 1. Add status column with default value for new rows
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF NOT EXISTS (
+                SELECT 1 FROM information_schema.columns
+                WHERE table_name = 'documents' AND column_name = 'status'
+            ) THEN
+                ALTER TABLE documents
+                ADD COLUMN status JSONB NOT NULL DEFAULT '{"state": "ready"}'::jsonb;
+            END IF;
+        END$$;
+        """
+    )
+
+    # 2. Create index on status for efficient filtering by state
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS ix_documents_status
+        ON documents ((status->>'state'));
+        """
+    )
+
+
+def downgrade() -> None:
+    """Remove status column from documents."""
+
+    # Drop index
+    op.execute(
+        """
+        DROP INDEX IF EXISTS ix_documents_status;
+        """
+    )
+
+    # Drop column
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF EXISTS (
+                SELECT 1 FROM information_schema.columns
+                WHERE table_name = 'documents' AND column_name = 'status'
+            ) THEN
+                ALTER TABLE documents
+                DROP COLUMN status;
+            END IF;
+        END$$;
+        """
+    )
--- a/surfsense_backend/app/connectors/composio_gmail_connector.py
+++ b/surfsense_backend/app/connectors/composio_gmail_connector.py
@ -16,11 +16,15 @@ from sqlalchemy.orm import selectinload

 from app.config import config
 from app.connectors.composio_connector import ComposioConnector
-from app.db import Document, DocumentType
+from app.db import Document, DocumentStatus, DocumentType
 from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
-from app.tasks.connector_indexers.base import calculate_date_range
+from app.tasks.connector_indexers.base import (
+    calculate_date_range,
+    check_duplicate_document_by_hash,
+    safe_set_chunks,
+)
 from app.utils.document_converters import (
    create_document_chunks,
    generate_content_hash,
@ -206,26 +210,24 @@ class ComposioGmailConnector(ComposioConnector):
 # ============ Indexer Functions ============


-async def _process_gmail_message_batch(
+async def _analyze_gmail_messages_phase1(
    session: AsyncSession,
    messages: list[dict[str, Any]],
    composio_connector: ComposioGmailConnector,
    connector_id: int,
    search_space_id: int,
    user_id: str,
-    total_documents_indexed: int = 0,
-) -> tuple[int, int]:
+) -> tuple[list[dict[str, Any]], int, int]:
    """
-    Process a batch of Gmail messages and index them.
-
-    Args:
-        total_documents_indexed: Running total of documents indexed so far (for batch commits).
+    Phase 1: Analyze all messages, create pending documents.
+    Makes ALL documents visible in the UI immediately with pending status.

    Returns:
-        Tuple of (documents_indexed, documents_skipped)
+        Tuple of (messages_to_process, documents_skipped, duplicate_content_count)
    """
-    documents_indexed = 0
+    messages_to_process = []
    documents_skipped = 0
+    duplicate_content_count = 0

    for message in messages:
        try:
@ -235,11 +237,7 @@ async def _process_gmail_message_batch(
                documents_skipped += 1
                continue

-            # Composio's GMAIL_FETCH_EMAILS already returns full message content
-            # No need for a separate detail API call
-
            # Extract message info from Composio response
-            # Composio structure: messageId, messageText, messageTimestamp, payload.headers, labelIds
            payload = message.get("payload", {})
            headers = payload.get("headers", [])

@ -262,7 +260,7 @@ async def _process_gmail_message_batch(
                message
            )

-            # Check for empty content (defensive parsing per Composio best practices)
+            # Check for empty content
            if not markdown_content.strip():
                logger.warning(f"Skipping Gmail message with no content: {subject}")
                documents_skipped += 1
@ -280,102 +278,58 @@ async def _process_gmail_message_batch(
                session, unique_identifier_hash
            )

-            # Get label IDs from Composio response
+            # Get label IDs and thread_id from Composio response
            label_ids = message.get("labelIds", [])
-            # Extract thread_id if available (for consistency with non-Composio implementation)
            thread_id = message.get("threadId", "") or message.get("thread_id", "")

            if existing_document:
                if existing_document.content_hash == content_hash:
+                    # Ensure status is ready (might have been stuck in processing/pending)
+                    if not DocumentStatus.is_state(
+                        existing_document.status, DocumentStatus.READY
+                    ):
+                        existing_document.status = DocumentStatus.ready()
                    documents_skipped += 1
                    continue

-                # Update existing
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
+                # Queue existing document for update (will be set to processing in Phase 2)
+                messages_to_process.append(
+                    {
+                        "document": existing_document,
+                        "is_new": False,
+                        "markdown_content": markdown_content,
+                        "content_hash": content_hash,
                        "message_id": message_id,
                        "thread_id": thread_id,
                        "subject": subject,
                        "sender": sender,
-                        "document_type": "Gmail Message (Composio)",
+                        "date_str": date_str,
+                        "label_ids": label_ids,
                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        markdown_content, user_llm, document_metadata
-                    )
-                else:
-                    summary_content = (
-                        f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
-                    )
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                chunks = await create_document_chunks(markdown_content)
-
-                existing_document.title = f"Gmail: {subject}"
-                existing_document.content = summary_content
-                existing_document.content_hash = content_hash
-                existing_document.embedding = summary_embedding
-                existing_document.document_metadata = {
-                    "message_id": message_id,
-                    "thread_id": thread_id,
-                    "subject": subject,
-                    "sender": sender,
-                    "date": date_str,
-                    "labels": label_ids,
-                    "connector_id": connector_id,
-                    "source": "composio",
-                }
-                existing_document.chunks = chunks
-                existing_document.updated_at = get_current_timestamp()
-
-                documents_indexed += 1
-
-                # Batch commit every 10 documents
-                current_total = total_documents_indexed + documents_indexed
-                if current_total % 10 == 0:
-                    logger.info(
-                        f"Committing batch: {current_total} Gmail messages processed so far"
-                    )
-                    await session.commit()
+                )
                continue

-            # Create new document
-            user_llm = await get_user_long_context_llm(
-                session, user_id, search_space_id
-            )
-
-            if user_llm:
-                document_metadata = {
-                    "message_id": message_id,
-                    "thread_id": thread_id,
-                    "subject": subject,
-                    "sender": sender,
-                    "document_type": "Gmail Message (Composio)",
-                }
-                summary_content, summary_embedding = await generate_document_summary(
-                    markdown_content, user_llm, document_metadata
-                )
-            else:
-                summary_content = (
-                    f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
-                )
-                summary_embedding = config.embedding_model_instance.embed(
-                    summary_content
+            # Document doesn't exist by unique_identifier_hash
+            # Check if a document with the same content_hash exists (from standard connector)
+            with session.no_autoflush:
+                duplicate_by_content = await check_duplicate_document_by_hash(
+                    session, content_hash
                )

-            chunks = await create_document_chunks(markdown_content)
+            if duplicate_by_content:
+                logger.info(
+                    f"Message {subject} already indexed by another connector "
+                    f"(existing document ID: {duplicate_by_content.id}, "
+                    f"type: {duplicate_by_content.document_type}). Skipping."
+                )
+                duplicate_content_count += 1
+                documents_skipped += 1
+                continue

+            # Create new document with PENDING status (visible in UI immediately)
            document = Document(
                search_space_id=search_space_id,
-                title=f"Gmail: {subject}",
+                title=subject,
                document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]),
                document_metadata={
                    "message_id": message_id,
@ -388,39 +342,140 @@ async def _process_gmail_message_batch(
                    "toolkit_id": "gmail",
                    "source": "composio",
                },
-                content=summary_content,
-                content_hash=content_hash,
+                content="Pending...",  # Placeholder until processed
+                content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                unique_identifier_hash=unique_identifier_hash,
-                embedding=summary_embedding,
-                chunks=chunks,
+                embedding=None,
+                chunks=[],  # Empty at creation - safe for async
+                status=DocumentStatus.pending(),  # Pending until processing starts
                updated_at=get_current_timestamp(),
                created_by_id=user_id,
                connector_id=connector_id,
            )
            session.add(document)
+
+            messages_to_process.append(
+                {
+                    "document": document,
+                    "is_new": True,
+                    "markdown_content": markdown_content,
+                    "content_hash": content_hash,
+                    "message_id": message_id,
+                    "thread_id": thread_id,
+                    "subject": subject,
+                    "sender": sender,
+                    "date_str": date_str,
+                    "label_ids": label_ids,
+                }
+            )
+
+        except Exception as e:
+            logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
+            documents_skipped += 1
+            continue
+
+    return messages_to_process, documents_skipped, duplicate_content_count
+
+
+async def _process_gmail_messages_phase2(
+    session: AsyncSession,
+    messages_to_process: list[dict[str, Any]],
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
+) -> tuple[int, int]:
+    """
+    Phase 2: Process each document one by one.
+    Each document transitions: pending → processing → ready/failed
+
+    Returns:
+        Tuple of (documents_indexed, documents_failed)
+    """
+    documents_indexed = 0
+    documents_failed = 0
+    last_heartbeat_time = time.time()
+
+    for item in messages_to_process:
+        # Send heartbeat periodically
+        if on_heartbeat_callback:
+            current_time = time.time()
+            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = current_time
+
+        document = item["document"]
+        try:
+            # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+            document.status = DocumentStatus.processing()
+            await session.commit()
+
+            # Heavy processing (LLM, embeddings, chunks)
+            user_llm = await get_user_long_context_llm(
+                session, user_id, search_space_id
+            )
+
+            if user_llm:
+                document_metadata_for_summary = {
+                    "message_id": item["message_id"],
+                    "thread_id": item["thread_id"],
+                    "subject": item["subject"],
+                    "sender": item["sender"],
+                    "document_type": "Gmail Message (Composio)",
+                }
+                summary_content, summary_embedding = await generate_document_summary(
+                    item["markdown_content"], user_llm, document_metadata_for_summary
+                )
+            else:
+                summary_content = f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}"
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+            chunks = await create_document_chunks(item["markdown_content"])
+
+            # Update document to READY with actual content
+            document.title = item["subject"]
+            document.content = summary_content
+            document.content_hash = item["content_hash"]
+            document.embedding = summary_embedding
+            document.document_metadata = {
+                "message_id": item["message_id"],
+                "thread_id": item["thread_id"],
+                "subject": item["subject"],
+                "sender": item["sender"],
+                "date": item["date_str"],
+                "labels": item["label_ids"],
+                "connector_id": connector_id,
+                "source": "composio",
+            }
+            safe_set_chunks(document, chunks)
+            document.updated_at = get_current_timestamp()
+            document.status = DocumentStatus.ready()
+
            documents_indexed += 1

-            # Batch commit every 10 documents
-            current_total = total_documents_indexed + documents_indexed
-            if current_total % 10 == 0:
+            # Batch commit every 10 documents (for ready status updates)
+            if documents_indexed % 10 == 0:
                logger.info(
-                    f"Committing batch: {current_total} Gmail messages processed so far"
+                    f"Committing batch: {documents_indexed} Gmail messages processed so far"
                )
                await session.commit()

        except Exception as e:
            logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
-            documents_skipped += 1
-            # Rollback on error to avoid partial state (per Composio best practices)
+            # Mark document as failed with reason (visible in UI)
            try:
-                await session.rollback()
-            except Exception as rollback_error:
+                document.status = DocumentStatus.failed(str(e))
+                document.updated_at = get_current_timestamp()
+            except Exception as status_error:
                logger.error(
-                    f"Error during rollback: {rollback_error!s}", exc_info=True
+                    f"Failed to update document status to failed: {status_error}"
                )
+            documents_failed += 1
            continue

-    return documents_indexed, documents_skipped
+    return documents_indexed, documents_failed


 async def index_composio_gmail(
@ -437,7 +492,7 @@ async def index_composio_gmail(
    max_items: int = 1000,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str]:
-    """Index Gmail messages via Composio with pagination and incremental processing."""
+    """Index Gmail messages via Composio with real-time document status updates."""
    try:
        composio_connector = ComposioGmailConnector(session, connector_id)

@ -448,14 +503,10 @@ async def index_composio_gmail(
            end_date = None

        # Use provided dates directly if both are provided, otherwise calculate from last_indexed_at
-        # This ensures user-selected dates are respected (matching non-Composio Gmail connector behavior)
        if start_date is not None and end_date is not None:
-            # User provided both dates - use them directly
            start_date_str = start_date
            end_date_str = end_date
        else:
-            # Calculate date range with defaults (uses last_indexed_at or 365 days back)
-            # This ensures indexing works even when user doesn't specify dates
            start_date_str, end_date_str = calculate_date_range(
                connector, start_date, end_date, default_days_back=365
            )
@ -473,48 +524,32 @@ async def index_composio_gmail(
            f"(start_date={start_date_str}, end_date={end_date_str})"
        )

-        # Use smaller batch size to avoid 413 payload too large errors
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Fetching Gmail messages via Composio for connector {connector_id}",
+            {"stage": "fetching_messages"},
+        )
+
+        # =======================================================================
+        # FETCH ALL MESSAGES FIRST
+        # =======================================================================
        batch_size = 50
        page_token = None
-        total_documents_indexed = 0
-        total_documents_skipped = 0
-        total_messages_fetched = 0
-        result_size_estimate = None  # Will be set from first API response
+        all_messages = []
+        result_size_estimate = None
        last_heartbeat_time = time.time()

-        while total_messages_fetched < max_items:
-            # Send heartbeat periodically to indicate task is still alive
+        while len(all_messages) < max_items:
+            # Send heartbeat periodically
            if on_heartbeat_callback:
                current_time = time.time()
                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
-                    await on_heartbeat_callback(total_documents_indexed)
+                    await on_heartbeat_callback(len(all_messages))
                    last_heartbeat_time = current_time

-            # Calculate how many messages to fetch in this batch
-            remaining = max_items - total_messages_fetched
+            remaining = max_items - len(all_messages)
            current_batch_size = min(batch_size, remaining)

-            # Use result_size_estimate if available, otherwise fall back to max_items
-            estimated_total = (
-                result_size_estimate if result_size_estimate is not None else max_items
-            )
-            # Cap estimated_total at max_items to avoid showing misleading progress
-            estimated_total = min(estimated_total, max_items)
-
-            await task_logger.log_task_progress(
-                log_entry,
-                f"Fetching Gmail messages batch via Composio for connector {connector_id} "
-                f"({total_messages_fetched}/{estimated_total} fetched, {total_documents_indexed} indexed)",
-                {
-                    "stage": "fetching_messages",
-                    "batch_size": current_batch_size,
-                    "total_fetched": total_messages_fetched,
-                    "total_indexed": total_documents_indexed,
-                    "estimated_total": estimated_total,
-                },
-            )
-
-            # Fetch batch of messages
            (
                messages,
                next_token,
@ -533,97 +568,136 @@ async def index_composio_gmail(
                return 0, f"Failed to fetch Gmail messages: {error}"

            if not messages:
-                # No more messages available
                break

-            # Update result_size_estimate from first response (Gmail provides this estimate)
            if result_size_estimate is None and result_size_estimate_batch is not None:
                result_size_estimate = result_size_estimate_batch
                logger.info(
                    f"Gmail API estimated {result_size_estimate} total messages for query: '{query}'"
                )

-            total_messages_fetched += len(messages)
-            # Recalculate estimated_total after potentially updating result_size_estimate
-            estimated_total = (
-                result_size_estimate if result_size_estimate is not None else max_items
-            )
-            estimated_total = min(estimated_total, max_items)
-
+            all_messages.extend(messages)
            logger.info(
-                f"Fetched batch of {len(messages)} Gmail messages "
-                f"(total: {total_messages_fetched}/{estimated_total})"
+                f"Fetched {len(messages)} messages (total: {len(all_messages)})"
            )

-            # Process batch incrementally
-            batch_indexed, batch_skipped = await _process_gmail_message_batch(
-                session=session,
-                messages=messages,
-                composio_connector=composio_connector,
-                connector_id=connector_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                total_documents_indexed=total_documents_indexed,
-            )
-
-            total_documents_indexed += batch_indexed
-            total_documents_skipped += batch_skipped
-
-            logger.info(
-                f"Processed batch: {batch_indexed} indexed, {batch_skipped} skipped "
-                f"(total: {total_documents_indexed} indexed, {total_documents_skipped} skipped)"
-            )
-
-            # Batch commits happen in _process_gmail_message_batch every 10 documents
-            # This ensures progress is saved incrementally, preventing data loss on crashes
-
-            # Check if we should continue
-            if not next_token:
-                # No more pages available
+            if not next_token or len(messages) < current_batch_size:
                break

-            if len(messages) < current_batch_size:
-                # Last page had fewer items than requested, we're done
-                break
-
-            # Continue with next page
            page_token = next_token

-        if total_messages_fetched == 0:
+        if not all_messages:
            success_msg = "No Gmail messages found in the specified date range"
            await task_logger.log_task_success(
                log_entry, success_msg, {"messages_count": 0}
            )
-            # CRITICAL: Update timestamp even when no messages found so Electric SQL syncs and UI shows indexed status
            await update_connector_last_indexed(session, connector, update_last_indexed)
            await session.commit()
-            return 0, None  # Return None (not error) when no items found
+            return (
+                0,
+                None,
+            )  # Return None (not error) when no items found - this is success with 0 items

-        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
-        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        logger.info(f"Found {len(all_messages)} Gmail messages to index via Composio")
+
+        # =======================================================================
+        # PHASE 1: Analyze all messages, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Phase 1: Creating pending documents for {len(all_messages)} messages",
+            {"stage": "phase1_pending"},
+        )
+
+        (
+            messages_to_process,
+            documents_skipped,
+            duplicate_content_count,
+        ) = await _analyze_gmail_messages_phase1(
+            session=session,
+            messages=all_messages,
+            composio_connector=composio_connector,
+            connector_id=connector_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+        )
+
+        # Commit all pending documents - they all appear in UI now
+        new_documents_count = len([m for m in messages_to_process if m["is_new"]])
+        if new_documents_count > 0:
+            logger.info(f"Phase 1: Committing {new_documents_count} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(messages_to_process)} documents")
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Phase 2: Processing {len(messages_to_process)} documents",
+            {"stage": "phase2_processing"},
+        )
+
+        documents_indexed, documents_failed = await _process_gmail_messages_phase2(
+            session=session,
+            messages_to_process=messages_to_process,
+            connector_id=connector_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+            on_heartbeat_callback=on_heartbeat_callback,
+        )
+
+        # CRITICAL: Always update timestamp so Electric SQL syncs
        await update_connector_last_indexed(session, connector, update_last_indexed)

-        # Final commit to ensure all documents are persisted (safety net)
-        # This matches the pattern used in non-Composio Gmail indexer
-        logger.info(
-            f"Final commit: Total {total_documents_indexed} Gmail messages processed"
-        )
-        await session.commit()
-        logger.info(
-            "Successfully committed all Composio Gmail document changes to database"
-        )
+        # Final commit to ensure all documents are persisted
+        logger.info(f"Final commit: Total {documents_indexed} Gmail messages processed")
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Composio Gmail document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None

        await task_logger.log_task_success(
            log_entry,
            f"Successfully completed Gmail indexing via Composio for connector {connector_id}",
            {
-                "documents_indexed": total_documents_indexed,
-                "documents_skipped": total_documents_skipped,
-                "messages_fetched": total_messages_fetched,
+                "documents_indexed": documents_indexed,
+                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
            },
        )

-        return total_documents_indexed, None
+        logger.info(
+            f"Composio Gmail indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
+        )
+        return documents_indexed, warning_message

    except Exception as e:
        logger.error(f"Failed to index Gmail via Composio: {e!s}", exc_info=True)
--- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@ -16,13 +16,14 @@ from sqlalchemy.orm import selectinload

 from app.config import config
 from app.connectors.composio_connector import ComposioConnector
-from app.db import Document, DocumentType
+from app.db import Document, DocumentStatus, DocumentType
 from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.tasks.connector_indexers.base import (
    calculate_date_range,
    check_duplicate_document_by_hash,
+    safe_set_chunks,
 )
 from app.utils.document_converters import (
    create_document_chunks,
@ -266,18 +267,20 @@ async def index_composio_google_calendar(

        documents_indexed = 0
        documents_skipped = 0
+        documents_failed = 0  # Track events that failed processing
        duplicate_content_count = (
            0  # Track events skipped due to duplicate content_hash
        )
        last_heartbeat_time = time.time()

+        # =======================================================================
+        # PHASE 1: Analyze all events, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        events_to_process = []  # List of dicts with document and event data
+        new_documents_created = False
+
        for event in events:
-            # Send heartbeat periodically to indicate task is still alive
-            if on_heartbeat_callback:
-                current_time = time.time()
-                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
-                    await on_heartbeat_callback(documents_indexed)
-                    last_heartbeat_time = current_time
            try:
                # Handle both standard Google API and potential Composio variations
                event_id = event.get("id", "") or event.get("eventId", "")
@ -315,61 +318,28 @@ async def index_composio_google_calendar(

                if existing_document:
                    if existing_document.content_hash == content_hash:
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
+                            existing_document.status = DocumentStatus.ready()
                        documents_skipped += 1
                        continue

-                    # Update existing
-                    user_llm = await get_user_long_context_llm(
-                        session, user_id, search_space_id
-                    )
-
-                    if user_llm:
-                        document_metadata = {
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    events_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "markdown_content": markdown_content,
+                            "content_hash": content_hash,
                            "event_id": event_id,
                            "summary": summary,
                            "start_time": start_time,
-                            "document_type": "Google Calendar Event (Composio)",
+                            "end_time": end_time,
+                            "location": location,
                        }
-                        (
-                            summary_content,
-                            summary_embedding,
-                        ) = await generate_document_summary(
-                            markdown_content, user_llm, document_metadata
-                        )
-                    else:
-                        summary_content = f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
-                        if location:
-                            summary_content += f"\nLocation: {location}"
-                        summary_embedding = config.embedding_model_instance.embed(
-                            summary_content
-                        )
-
-                    chunks = await create_document_chunks(markdown_content)
-
-                    existing_document.title = f"Calendar: {summary}"
-                    existing_document.content = summary_content
-                    existing_document.content_hash = content_hash
-                    existing_document.embedding = summary_embedding
-                    existing_document.document_metadata = {
-                        "event_id": event_id,
-                        "summary": summary,
-                        "start_time": start_time,
-                        "end_time": end_time,
-                        "location": location,
-                        "connector_id": connector_id,
-                        "source": "composio",
-                    }
-                    existing_document.chunks = chunks
-                    existing_document.updated_at = get_current_timestamp()
-
-                    documents_indexed += 1
-
-                    # Batch commit every 10 documents
-                    if documents_indexed % 10 == 0:
-                        logger.info(
-                            f"Committing batch: {documents_indexed} Google Calendar events processed so far"
-                        )
-                        await session.commit()
+                    )
                    continue

                # Document doesn't exist by unique_identifier_hash
@ -380,49 +350,19 @@ async def index_composio_google_calendar(
                    )

                if duplicate_by_content:
-                    # A document with the same content already exists (likely from standard connector)
                    logger.info(
                        f"Event {summary} already indexed by another connector "
                        f"(existing document ID: {duplicate_by_content.id}, "
-                        f"type: {duplicate_by_content.document_type}). Skipping to avoid duplicate content."
+                        f"type: {duplicate_by_content.document_type}). Skipping."
                    )
                    duplicate_content_count += 1
                    documents_skipped += 1
                    continue

-                # Create new document
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "event_id": event_id,
-                        "summary": summary,
-                        "start_time": start_time,
-                        "document_type": "Google Calendar Event (Composio)",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        markdown_content, user_llm, document_metadata
-                    )
-                else:
-                    summary_content = (
-                        f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
-                    )
-                    if location:
-                        summary_content += f"\nLocation: {location}"
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                chunks = await create_document_chunks(markdown_content)
-
+                # Create new document with PENDING status (visible in UI immediately)
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Calendar: {summary}",
+                    title=summary,
                    document_type=DocumentType(
                        TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"]
                    ),
@ -436,19 +376,116 @@ async def index_composio_google_calendar(
                        "toolkit_id": "googlecalendar",
                        "source": "composio",
                    },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                    updated_at=get_current_timestamp(),
                    created_by_id=user_id,
                    connector_id=connector_id,
                )
                session.add(document)
+                new_documents_created = True
+
+                events_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "markdown_content": markdown_content,
+                        "content_hash": content_hash,
+                        "event_id": event_id,
+                        "summary": summary,
+                        "start_time": start_time,
+                        "end_time": end_time,
+                        "location": location,
+                    }
+                )
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(events_to_process)} documents")
+
+        for item in events_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "event_id": item["event_id"],
+                        "summary": item["summary"],
+                        "start_time": item["start_time"],
+                        "document_type": "Google Calendar Event (Composio)",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["markdown_content"],
+                        user_llm,
+                        document_metadata_for_summary,
+                    )
+                else:
+                    summary_content = f"Calendar: {item['summary']}\n\nStart: {item['start_time']}\nEnd: {item['end_time']}"
+                    if item["location"]:
+                        summary_content += f"\nLocation: {item['location']}"
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(item["markdown_content"])
+
+                # Update document to READY with actual content
+                document.title = item["summary"]
+                document.content = summary_content
+                document.content_hash = item["content_hash"]
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "event_id": item["event_id"],
+                    "summary": item["summary"],
+                    "start_time": item["start_time"],
+                    "end_time": item["end_time"],
+                    "location": item["location"],
+                    "connector_id": connector_id,
+                    "source": "composio",
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
                documents_indexed += 1

-                # Batch commit every 10 documents
+                # Batch commit every 10 documents (for ready status updates)
                if documents_indexed % 10 == 0:
                    logger.info(
                        f"Committing batch: {documents_indexed} Google Calendar events processed so far"
@ -457,7 +494,15 @@ async def index_composio_google_calendar(

            except Exception as e:
                logger.error(f"Error processing Calendar event: {e!s}", exc_info=True)
-                documents_skipped += 1
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
+                documents_failed += 1
                continue

        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
@ -490,10 +535,13 @@ async def index_composio_google_calendar(
            else:
                raise

-        # Build warning message if duplicates were found
-        warning_message = None
+        # Build warning message if there were issues
+        warning_parts = []
        if duplicate_content_count > 0:
-            warning_message = f"{duplicate_content_count} skipped (duplicate)"
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None

        await task_logger.log_task_success(
            log_entry,
@ -501,13 +549,15 @@ async def index_composio_google_calendar(
            {
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                "duplicate_content_count": duplicate_content_count,
            },
        )

        logger.info(
-            f"Composio Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped "
-            f"({duplicate_content_count} due to duplicate content from other connectors)"
+            f"Composio Google Calendar indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
        )
        return documents_indexed, warning_message

--- a/surfsense_backend/app/connectors/composio_google_drive_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py
@ -21,10 +21,14 @@ from sqlalchemy.orm.attributes import flag_modified

 from app.config import config
 from app.connectors.composio_connector import ComposioConnector
-from app.db import Document, DocumentType, Log
+from app.db import Document, DocumentStatus, DocumentType, Log
 from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
+from app.tasks.connector_indexers.base import (
+    check_duplicate_document_by_hash,
+    safe_set_chunks,
+)
 from app.utils.document_converters import (
    create_document_chunks,
    generate_content_hash,
@ -537,22 +541,6 @@ async def check_document_by_unique_identifier(
    return existing_doc_result.scalars().first()


-async def check_document_by_content_hash(
-    session: AsyncSession, content_hash: str
-) -> Document | None:
-    """Check if a document with the given content hash already exists.
-
-    This is used to prevent duplicate content from being indexed, regardless
-    of which connector originally indexed it.
-    """
-    from sqlalchemy.future import select
-
-    existing_doc_result = await session.execute(
-        select(Document).where(Document.content_hash == content_hash)
-    )
-    return existing_doc_result.scalars().first()
-
-
 async def check_document_by_google_drive_file_id(
    session: AsyncSession, file_id: str, search_space_id: int
 ) -> Document | None:
@ -843,14 +831,16 @@ async def _index_composio_drive_delta_sync(
    log_entry,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int, list[str]]:
-    """Index Google Drive files using delta sync (only changed files).
+    """Index Google Drive files using delta sync with real-time document status updates.

    Uses GOOGLEDRIVE_LIST_CHANGES to fetch only files that changed since last sync.
    Handles: new files, modified files, and deleted files.
    """
    documents_indexed = 0
    documents_skipped = 0
+    documents_failed = 0
    processing_errors = []
+    duplicate_content_count = 0
    last_heartbeat_time = time.time()

    # Fetch all changes with pagination
@ -881,14 +871,13 @@ async def _index_composio_drive_delta_sync(

    logger.info(f"Processing {len(all_changes)} changes from delta sync")

-    for change in all_changes[:max_items]:
-        # Send heartbeat periodically to indicate task is still alive
-        if on_heartbeat_callback:
-            current_time = time.time()
-            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = current_time
+    # =======================================================================
+    # PHASE 1: Analyze all changes, handle deletions, create pending documents
+    # =======================================================================
+    files_to_process = []
+    new_documents_created = False

+    for change in all_changes[:max_items]:
        try:
            # Handle removed files
            is_removed = change.get("removed", False)
@ -899,9 +888,8 @@ async def _index_composio_drive_delta_sync(
                documents_skipped += 1
                continue

-            # Check if file was trashed or removed
+            # Check if file was trashed or removed - handle deletions immediately
            if is_removed or file_info.get("trashed", False):
-                # Remove document from database
                document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
                unique_identifier_hash = generate_unique_identifier_hash(
                    document_type, f"drive_{file_id}", search_space_id
@ -923,37 +911,233 @@ async def _index_composio_drive_delta_sync(
            if mime_type == "application/vnd.google-apps.folder":
                continue

-            # Process the file
-            indexed, skipped, errors = await _process_single_drive_file(
-                session=session,
-                composio_connector=composio_connector,
-                file_id=file_id,
-                file_name=file_name,
-                mime_type=mime_type,
-                connector_id=connector_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                task_logger=task_logger,
-                log_entry=log_entry,
+            # Check for existing document by file ID (from any connector)
+            existing_by_file_id = await check_document_by_google_drive_file_id(
+                session, file_id, search_space_id
            )

-            documents_indexed += indexed
-            documents_skipped += skipped
-            processing_errors.extend(errors)
+            # Generate unique identifier hash
+            document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
+            unique_identifier_hash = generate_unique_identifier_hash(
+                document_type, f"drive_{file_id}", search_space_id
+            )
+
+            # Check if document exists by unique identifier
+            existing_document = await check_document_by_unique_identifier(
+                session, unique_identifier_hash
+            )
+
+            if existing_by_file_id and not existing_document:
+                # File already indexed by different connector - skip
+                logger.info(
+                    f"Skipping file {file_name} (file_id={file_id}): already indexed "
+                    f"by {existing_by_file_id.document_type.value}"
+                )
+                documents_skipped += 1
+                continue
+
+            if existing_document:
+                # Queue existing document for update
+                files_to_process.append(
+                    {
+                        "document": existing_document,
+                        "is_new": False,
+                        "file_id": file_id,
+                        "file_name": file_name,
+                        "mime_type": mime_type,
+                    }
+                )
+                continue
+
+            # Create new document with PENDING status
+            document = Document(
+                search_space_id=search_space_id,
+                title=file_name,
+                document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
+                document_metadata={
+                    "file_id": file_id,
+                    "file_name": file_name,
+                    "FILE_NAME": file_name,
+                    "mime_type": mime_type,
+                    "connector_id": connector_id,
+                    "toolkit_id": "googledrive",
+                    "source": "composio",
+                },
+                content="Pending...",
+                content_hash=unique_identifier_hash,
+                unique_identifier_hash=unique_identifier_hash,
+                embedding=None,
+                chunks=[],
+                status=DocumentStatus.pending(),
+                updated_at=get_current_timestamp(),
+                created_by_id=user_id,
+                connector_id=connector_id,
+            )
+            session.add(document)
+            new_documents_created = True
+
+            files_to_process.append(
+                {
+                    "document": document,
+                    "is_new": True,
+                    "file_id": file_id,
+                    "file_name": file_name,
+                    "mime_type": mime_type,
+                }
+            )
+
+        except Exception as e:
+            logger.error(f"Error in Phase 1 for change: {e!s}", exc_info=True)
+            documents_skipped += 1
+            continue
+
+    # Commit all pending documents - they all appear in UI now
+    if new_documents_created:
+        logger.info(
+            f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
+        )
+        await session.commit()
+
+    # =======================================================================
+    # PHASE 2: Process each document one by one
+    # =======================================================================
+    logger.info(f"Phase 2: Processing {len(files_to_process)} documents")
+
+    for item in files_to_process:
+        # Send heartbeat periodically
+        if on_heartbeat_callback:
+            current_time = time.time()
+            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = current_time
+
+        document = item["document"]
+        try:
+            # Set to PROCESSING and commit
+            document.status = DocumentStatus.processing()
+            await session.commit()
+
+            # Get file content
+            content, content_error = await composio_connector.get_drive_file_content(
+                item["file_id"], original_mime_type=item["mime_type"]
+            )
+
+            if content_error or not content:
+                logger.warning(
+                    f"Could not get content for file {item['file_name']}: {content_error}"
+                )
+                markdown_content = f"# {item['file_name']}\n\n"
+                markdown_content += f"**File ID:** {item['file_id']}\n"
+                markdown_content += f"**Type:** {item['mime_type']}\n"
+            elif isinstance(content, dict):
+                error_msg = f"Unexpected dict content format for file {item['file_name']}: {list(content.keys())}"
+                logger.error(error_msg)
+                processing_errors.append(error_msg)
+                markdown_content = f"# {item['file_name']}\n\n"
+                markdown_content += f"**File ID:** {item['file_id']}\n"
+                markdown_content += f"**Type:** {item['mime_type']}\n"
+            else:
+                markdown_content = await _process_file_content(
+                    content=content,
+                    file_name=item["file_name"],
+                    file_id=item["file_id"],
+                    mime_type=item["mime_type"],
+                    search_space_id=search_space_id,
+                    user_id=user_id,
+                    session=session,
+                    task_logger=task_logger,
+                    log_entry=log_entry,
+                    processing_errors=processing_errors,
+                )
+
+            content_hash = generate_content_hash(markdown_content, search_space_id)
+
+            # For existing documents, check if content changed
+            if not item["is_new"] and document.content_hash == content_hash:
+                if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
+                    document.status = DocumentStatus.ready()
+                documents_skipped += 1
+                continue
+
+            # Check for duplicate content hash (for new documents)
+            if item["is_new"]:
+                with session.no_autoflush:
+                    duplicate_by_content = await check_duplicate_document_by_hash(
+                        session, content_hash
+                    )
+                if duplicate_by_content:
+                    logger.info(
+                        f"File {item['file_name']} already indexed by another connector. Skipping."
+                    )
+                    await session.delete(document)
+                    duplicate_content_count += 1
+                    documents_skipped += 1
+                    continue
+
+            # Heavy processing (LLM, embeddings, chunks)
+            user_llm = await get_user_long_context_llm(
+                session, user_id, search_space_id
+            )
+
+            if user_llm:
+                document_metadata_for_summary = {
+                    "file_id": item["file_id"],
+                    "file_name": item["file_name"],
+                    "mime_type": item["mime_type"],
+                    "document_type": "Google Drive File (Composio)",
+                }
+                summary_content, summary_embedding = await generate_document_summary(
+                    markdown_content, user_llm, document_metadata_for_summary
+                )
+            else:
+                summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+            chunks = await create_document_chunks(markdown_content)
+
+            # Update document to READY
+            document.title = item["file_name"]
+            document.content = summary_content
+            document.content_hash = content_hash
+            document.embedding = summary_embedding
+            document.document_metadata = {
+                "file_id": item["file_id"],
+                "file_name": item["file_name"],
+                "FILE_NAME": item["file_name"],
+                "mime_type": item["mime_type"],
+                "connector_id": connector_id,
+                "source": "composio",
+            }
+            safe_set_chunks(document, chunks)
+            document.updated_at = get_current_timestamp()
+            document.status = DocumentStatus.ready()
+
+            documents_indexed += 1

            # Batch commit every 10 documents
-            if documents_indexed > 0 and documents_indexed % 10 == 0:
+            if documents_indexed % 10 == 0:
                await session.commit()
                logger.info(f"Committed batch: {documents_indexed} changes processed")

        except Exception as e:
-            error_msg = f"Error processing change for file {file_id}: {e!s}"
+            error_msg = f"Error processing change for file {item['file_id']}: {e!s}"
            logger.error(error_msg, exc_info=True)
            processing_errors.append(error_msg)
-            documents_skipped += 1
+            try:
+                document.status = DocumentStatus.failed(str(e))
+                document.updated_at = get_current_timestamp()
+            except Exception as status_error:
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
+            documents_failed += 1
+            continue

    logger.info(
-        f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped"
+        f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped, "
+        f"{documents_failed} failed ({duplicate_content_count} duplicate content)"
    )
    return documents_indexed, documents_skipped, processing_errors

@ -973,10 +1157,12 @@ async def _index_composio_drive_full_scan(
    log_entry,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int, list[str]]:
-    """Index Google Drive files using full scan (first sync or when no delta token)."""
+    """Index Google Drive files using full scan with real-time document status updates."""
    documents_indexed = 0
    documents_skipped = 0
+    documents_failed = 0
    processing_errors = []
+    duplicate_content_count = 0
    last_heartbeat_time = time.time()

    all_files = []
@ -1108,14 +1294,14 @@ async def _index_composio_drive_full_scan(
        f"Found {len(all_files)} Google Drive files to index via Composio (full scan)"
    )

-    for file_info in all_files:
-        # Send heartbeat periodically to indicate task is still alive
-        if on_heartbeat_callback:
-            current_time = time.time()
-            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = current_time
+    # =======================================================================
+    # PHASE 1: Analyze all files, create pending documents
+    # This makes ALL documents visible in the UI immediately with pending status
+    # =======================================================================
+    files_to_process = []  # List of dicts with document and file data
+    new_documents_created = False

+    for file_info in all_files:
        try:
            # Handle both standard Google API and potential Composio variations
            file_id = file_info.get("id", "") or file_info.get("fileId", "")
@ -1132,227 +1318,242 @@ async def _index_composio_drive_full_scan(
            if mime_type == "application/vnd.google-apps.folder":
                continue

-            # Process the file
-            indexed, skipped, errors = await _process_single_drive_file(
-                session=session,
-                composio_connector=composio_connector,
-                file_id=file_id,
-                file_name=file_name,
-                mime_type=mime_type,
-                connector_id=connector_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                task_logger=task_logger,
-                log_entry=log_entry,
+            # ========== EARLY DUPLICATE CHECK BY FILE ID ==========
+            existing_by_file_id = await check_document_by_google_drive_file_id(
+                session, file_id, search_space_id
+            )
+            if existing_by_file_id:
+                logger.info(
+                    f"Skipping file {file_name} (file_id={file_id}): already indexed "
+                    f"by {existing_by_file_id.document_type.value}"
+                )
+                documents_skipped += 1
+                continue
+
+            # Generate unique identifier hash
+            document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
+            unique_identifier_hash = generate_unique_identifier_hash(
+                document_type, f"drive_{file_id}", search_space_id
            )

-            documents_indexed += indexed
-            documents_skipped += skipped
-            processing_errors.extend(errors)
+            # Check if document exists by unique identifier
+            existing_document = await check_document_by_unique_identifier(
+                session, unique_identifier_hash
+            )
+
+            if existing_document:
+                # Queue existing document for update (will be set to processing in Phase 2)
+                files_to_process.append(
+                    {
+                        "document": existing_document,
+                        "is_new": False,
+                        "file_id": file_id,
+                        "file_name": file_name,
+                        "mime_type": mime_type,
+                    }
+                )
+                continue
+
+            # Create new document with PENDING status (visible in UI immediately)
+            document = Document(
+                search_space_id=search_space_id,
+                title=file_name,
+                document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
+                document_metadata={
+                    "file_id": file_id,
+                    "file_name": file_name,
+                    "FILE_NAME": file_name,
+                    "mime_type": mime_type,
+                    "connector_id": connector_id,
+                    "toolkit_id": "googledrive",
+                    "source": "composio",
+                },
+                content="Pending...",  # Placeholder until processed
+                content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+                unique_identifier_hash=unique_identifier_hash,
+                embedding=None,
+                chunks=[],  # Empty at creation - safe for async
+                status=DocumentStatus.pending(),  # Pending until processing starts
+                updated_at=get_current_timestamp(),
+                created_by_id=user_id,
+                connector_id=connector_id,
+            )
+            session.add(document)
+            new_documents_created = True
+
+            files_to_process.append(
+                {
+                    "document": document,
+                    "is_new": True,
+                    "file_id": file_id,
+                    "file_name": file_name,
+                    "mime_type": mime_type,
+                }
+            )
+
+        except Exception as e:
+            logger.error(f"Error in Phase 1 for file: {e!s}", exc_info=True)
+            documents_skipped += 1
+            continue
+
+    # Commit all pending documents - they all appear in UI now
+    if new_documents_created:
+        logger.info(
+            f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
+        )
+        await session.commit()
+
+    # =======================================================================
+    # PHASE 2: Process each document one by one
+    # Each document transitions: pending → processing → ready/failed
+    # =======================================================================
+    logger.info(f"Phase 2: Processing {len(files_to_process)} documents")
+
+    for item in files_to_process:
+        # Send heartbeat periodically
+        if on_heartbeat_callback:
+            current_time = time.time()
+            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = current_time
+
+        document = item["document"]
+        try:
+            # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+            document.status = DocumentStatus.processing()
+            await session.commit()
+
+            # Get file content (pass mime_type for Google Workspace export handling)
+            content, content_error = await composio_connector.get_drive_file_content(
+                item["file_id"], original_mime_type=item["mime_type"]
+            )
+
+            if content_error or not content:
+                logger.warning(
+                    f"Could not get content for file {item['file_name']}: {content_error}"
+                )
+                markdown_content = f"# {item['file_name']}\n\n"
+                markdown_content += f"**File ID:** {item['file_id']}\n"
+                markdown_content += f"**Type:** {item['mime_type']}\n"
+            elif isinstance(content, dict):
+                error_msg = f"Unexpected dict content format for file {item['file_name']}: {list(content.keys())}"
+                logger.error(error_msg)
+                processing_errors.append(error_msg)
+                markdown_content = f"# {item['file_name']}\n\n"
+                markdown_content += f"**File ID:** {item['file_id']}\n"
+                markdown_content += f"**Type:** {item['mime_type']}\n"
+            else:
+                # Process content based on file type
+                markdown_content = await _process_file_content(
+                    content=content,
+                    file_name=item["file_name"],
+                    file_id=item["file_id"],
+                    mime_type=item["mime_type"],
+                    search_space_id=search_space_id,
+                    user_id=user_id,
+                    session=session,
+                    task_logger=task_logger,
+                    log_entry=log_entry,
+                    processing_errors=processing_errors,
+                )
+
+            content_hash = generate_content_hash(markdown_content, search_space_id)
+
+            # For existing documents, check if content changed
+            if not item["is_new"] and document.content_hash == content_hash:
+                # Ensure status is ready
+                if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
+                    document.status = DocumentStatus.ready()
+                documents_skipped += 1
+                continue
+
+            # Check for duplicate content hash (for new documents)
+            if item["is_new"]:
+                with session.no_autoflush:
+                    duplicate_by_content = await check_duplicate_document_by_hash(
+                        session, content_hash
+                    )
+                if duplicate_by_content:
+                    logger.info(
+                        f"File {item['file_name']} already indexed by another connector. Skipping."
+                    )
+                    # Remove the pending document we created
+                    await session.delete(document)
+                    duplicate_content_count += 1
+                    documents_skipped += 1
+                    continue
+
+            # Heavy processing (LLM, embeddings, chunks)
+            user_llm = await get_user_long_context_llm(
+                session, user_id, search_space_id
+            )
+
+            if user_llm:
+                document_metadata_for_summary = {
+                    "file_id": item["file_id"],
+                    "file_name": item["file_name"],
+                    "mime_type": item["mime_type"],
+                    "document_type": "Google Drive File (Composio)",
+                }
+                summary_content, summary_embedding = await generate_document_summary(
+                    markdown_content, user_llm, document_metadata_for_summary
+                )
+            else:
+                summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+            chunks = await create_document_chunks(markdown_content)
+
+            # Update document to READY with actual content
+            document.title = item["file_name"]
+            document.content = summary_content
+            document.content_hash = content_hash
+            document.embedding = summary_embedding
+            document.document_metadata = {
+                "file_id": item["file_id"],
+                "file_name": item["file_name"],
+                "FILE_NAME": item["file_name"],
+                "mime_type": item["mime_type"],
+                "connector_id": connector_id,
+                "source": "composio",
+            }
+            safe_set_chunks(document, chunks)
+            document.updated_at = get_current_timestamp()
+            document.status = DocumentStatus.ready()
+
+            documents_indexed += 1

            # Batch commit every 10 documents
-            if documents_indexed > 0 and documents_indexed % 10 == 0:
+            if documents_indexed % 10 == 0:
                logger.info(
                    f"Committing batch: {documents_indexed} Google Drive files processed so far"
                )
                await session.commit()

        except Exception as e:
-            error_msg = f"Error processing Drive file {file_name or 'unknown'}: {e!s}"
+            error_msg = f"Error processing Drive file {item['file_name']}: {e!s}"
            logger.error(error_msg, exc_info=True)
            processing_errors.append(error_msg)
-            documents_skipped += 1
+            # Mark document as failed with reason (visible in UI)
+            try:
+                document.status = DocumentStatus.failed(str(e))
+                document.updated_at = get_current_timestamp()
+            except Exception as status_error:
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
+            documents_failed += 1
+            continue

    logger.info(
-        f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped"
+        f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped, "
+        f"{documents_failed} failed ({duplicate_content_count} duplicate content)"
    )
    return documents_indexed, documents_skipped, processing_errors


-async def _process_single_drive_file(
-    session: AsyncSession,
-    composio_connector: ComposioGoogleDriveConnector,
-    file_id: str,
-    file_name: str,
-    mime_type: str,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    task_logger: TaskLoggingService,
-    log_entry,
-) -> tuple[int, int, list[str]]:
-    """Process a single Google Drive file for indexing.
-
-    Returns:
-        Tuple of (documents_indexed, documents_skipped, processing_errors)
-    """
-    processing_errors = []
-
-    # ========== EARLY DUPLICATE CHECK BY FILE ID ==========
-    # Check if this Google Drive file was already indexed by ANY connector
-    # This happens BEFORE download/ETL to save expensive API calls
-    existing_by_file_id = await check_document_by_google_drive_file_id(
-        session, file_id, search_space_id
-    )
-    if existing_by_file_id:
-        logger.info(
-            f"Skipping file {file_name} (file_id={file_id}): already indexed "
-            f"by {existing_by_file_id.document_type.value} as '{existing_by_file_id.title}' "
-            f"(saved download & ETL cost)"
-        )
-        return 0, 1, processing_errors  # Skip - NO download, NO ETL!
-    # ======================================================
-
-    # Generate unique identifier hash
-    document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
-    unique_identifier_hash = generate_unique_identifier_hash(
-        document_type, f"drive_{file_id}", search_space_id
-    )
-
-    # Check if document exists by unique identifier (same connector, same file)
-    existing_document = await check_document_by_unique_identifier(
-        session, unique_identifier_hash
-    )
-
-    # Get file content (pass mime_type for Google Workspace export handling)
-    content, content_error = await composio_connector.get_drive_file_content(
-        file_id, original_mime_type=mime_type
-    )
-
-    if content_error or not content:
-        logger.warning(f"Could not get content for file {file_name}: {content_error}")
-        # Use metadata as content fallback
-        markdown_content = f"# {file_name}\n\n"
-        markdown_content += f"**File ID:** {file_id}\n"
-        markdown_content += f"**Type:** {mime_type}\n"
-    elif isinstance(content, dict):
-        # Safety check: if content is still a dict, log error and use fallback
-        error_msg = f"Unexpected dict content format for file {file_name}: {list(content.keys())}"
-        logger.error(error_msg)
-        processing_errors.append(error_msg)
-        markdown_content = f"# {file_name}\n\n"
-        markdown_content += f"**File ID:** {file_id}\n"
-        markdown_content += f"**Type:** {mime_type}\n"
-    else:
-        # Process content based on file type
-        markdown_content = await _process_file_content(
-            content=content,
-            file_name=file_name,
-            file_id=file_id,
-            mime_type=mime_type,
-            search_space_id=search_space_id,
-            user_id=user_id,
-            session=session,
-            task_logger=task_logger,
-            log_entry=log_entry,
-            processing_errors=processing_errors,
-        )
-
-    content_hash = generate_content_hash(markdown_content, search_space_id)
-
-    if existing_document:
-        if existing_document.content_hash == content_hash:
-            return 0, 1, processing_errors  # Skipped - unchanged
-
-        # Update existing document
-        user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
-
-        if user_llm:
-            document_metadata = {
-                "file_id": file_id,
-                "file_name": file_name,
-                "mime_type": mime_type,
-                "document_type": "Google Drive File (Composio)",
-            }
-            (
-                summary_content,
-                summary_embedding,
-            ) = await generate_document_summary(
-                markdown_content, user_llm, document_metadata
-            )
-        else:
-            summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
-            summary_embedding = config.embedding_model_instance.embed(summary_content)
-
-        chunks = await create_document_chunks(markdown_content)
-
-        existing_document.title = f"Drive: {file_name}"
-        existing_document.content = summary_content
-        existing_document.content_hash = content_hash
-        existing_document.embedding = summary_embedding
-        existing_document.document_metadata = {
-            "file_id": file_id,
-            "file_name": file_name,
-            "FILE_NAME": file_name,  # For compatibility
-            "mime_type": mime_type,
-            "connector_id": connector_id,
-            "source": "composio",
-        }
-        existing_document.chunks = chunks
-        existing_document.updated_at = get_current_timestamp()
-
-        return 1, 0, processing_errors  # Indexed - updated
-
-    # Check if content_hash already exists (from any connector)
-    # This prevents duplicate content and avoids IntegrityError on unique constraint
-    existing_by_content_hash = await check_document_by_content_hash(
-        session, content_hash
-    )
-    if existing_by_content_hash:
-        logger.info(
-            f"Skipping file {file_name} (file_id={file_id}): identical content "
-            f"already indexed as '{existing_by_content_hash.title}'"
-        )
-        return 0, 1, processing_errors  # Skipped - duplicate content
-
-    # Create new document
-    user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
-
-    if user_llm:
-        document_metadata = {
-            "file_id": file_id,
-            "file_name": file_name,
-            "mime_type": mime_type,
-            "document_type": "Google Drive File (Composio)",
-        }
-        (
-            summary_content,
-            summary_embedding,
-        ) = await generate_document_summary(
-            markdown_content, user_llm, document_metadata
-        )
-    else:
-        summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
-        summary_embedding = config.embedding_model_instance.embed(summary_content)
-
-    chunks = await create_document_chunks(markdown_content)
-
-    document = Document(
-        search_space_id=search_space_id,
-        title=f"Drive: {file_name}",
-        document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
-        document_metadata={
-            "file_id": file_id,
-            "file_name": file_name,
-            "FILE_NAME": file_name,  # For compatibility
-            "mime_type": mime_type,
-            "toolkit_id": "googledrive",
-            "source": "composio",
-        },
-        content=summary_content,
-        content_hash=content_hash,
-        unique_identifier_hash=unique_identifier_hash,
-        embedding=summary_embedding,
-        chunks=chunks,
-        updated_at=get_current_timestamp(),
-        created_by_id=user_id,
-        connector_id=connector_id,
-    )
-    session.add(document)
-
-    return 1, 0, processing_errors  # Indexed - new
-
-
 async def _fetch_folder_files_recursively(
    composio_connector: ComposioGoogleDriveConnector,
    folder_id: str,
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -100,6 +100,83 @@ class PodcastStatus(str, Enum):
    FAILED = "failed"


+class DocumentStatus:
+    """
+    Helper class for document processing status (stored as JSONB).
+
+    Status values:
+    - {"state": "ready"} - Document is fully processed and searchable
+    - {"state": "pending"} - Document is queued, waiting to be processed
+    - {"state": "processing"} - Document is currently being processed (only 1 at a time)
+    - {"state": "failed", "reason": "..."} - Processing failed with reason
+
+    Usage:
+        document.status = DocumentStatus.pending()
+        document.status = DocumentStatus.processing()
+        document.status = DocumentStatus.ready()
+        document.status = DocumentStatus.failed("LLM rate limit exceeded")
+    """
+
+    # State constants
+    READY = "ready"
+    PENDING = "pending"
+    PROCESSING = "processing"
+    FAILED = "failed"
+
+    @staticmethod
+    def ready() -> dict:
+        """Return status dict for a ready/searchable document."""
+        return {"state": DocumentStatus.READY}
+
+    @staticmethod
+    def pending() -> dict:
+        """Return status dict for a document waiting to be processed."""
+        return {"state": DocumentStatus.PENDING}
+
+    @staticmethod
+    def processing() -> dict:
+        """Return status dict for a document being processed."""
+        return {"state": DocumentStatus.PROCESSING}
+
+    @staticmethod
+    def failed(reason: str, **extra_details) -> dict:
+        """
+        Return status dict for a failed document.
+
+        Args:
+            reason: Human-readable failure reason
+            **extra_details: Optional additional details (duplicate_of, error_code, etc.)
+        """
+        status = {
+            "state": DocumentStatus.FAILED,
+            "reason": reason[:500],
+        }  # Truncate long reasons
+        if extra_details:
+            status.update(extra_details)
+        return status
+
+    @staticmethod
+    def get_state(status: dict | None) -> str | None:
+        """Extract state from status dict, returns None if invalid."""
+        if status is None:
+            return None
+        return status.get("state") if isinstance(status, dict) else None
+
+    @staticmethod
+    def is_state(status: dict | None, state: str) -> bool:
+        """Check if status matches a given state."""
+        return DocumentStatus.get_state(status) == state
+
+    @staticmethod
+    def get_failure_reason(status: dict | None) -> str | None:
+        """Extract failure reason from status dict."""
+        if status is None or not isinstance(status, dict):
+            return None
+        if status.get("state") == DocumentStatus.FAILED:
+            return status.get("reason")
+        return None
+
+
 class LiteLLMProvider(str, Enum):
    """
    Enum for LLM providers supported by LiteLLM.
@ -813,6 +890,17 @@ class Document(BaseModel, TimestampMixin):
        index=True,
    )

+    # Processing status for real-time visibility (JSONB)
+    # Format: {"state": "ready"} or {"state": "processing"} or {"state": "failed", "reason": "..."}
+    # Default to {"state": "ready"} for backward compatibility with existing documents
+    status = Column(
+        JSONB,
+        nullable=False,
+        default=DocumentStatus.ready,
+        server_default=text('\'{"state": "ready"}\'::jsonb'),
+        index=True,
+    )
+
    # Relationships
    search_space = relationship("SearchSpace", back_populates="documents")
    created_by = relationship("User", back_populates="documents")
--- a/surfsense_backend/app/routes/documents_routes.py
+++ b/surfsense_backend/app/routes/documents_routes.py
@ -19,6 +19,7 @@ from app.db import (
 from app.schemas import (
    DocumentRead,
    DocumentsCreate,
+    DocumentStatusSchema,
    DocumentTitleRead,
    DocumentTitleSearchResponse,
    DocumentUpdate,
@ -112,9 +113,23 @@ async def create_documents_file_upload(
    user: User = Depends(current_active_user),
 ):
    """
-    Upload files as documents.
+    Upload files as documents with real-time status tracking.
+
+    Implements 2-phase document status updates for real-time UI feedback:
+    - Phase 1: Create all documents with 'pending' status (visible in UI immediately via ElectricSQL)
+    - Phase 2: Celery processes each file: pending → processing → ready/failed
+
    Requires DOCUMENTS_CREATE permission.
    """
+    from datetime import datetime
+
+    from app.db import DocumentStatus
+    from app.tasks.document_processors.base import (
+        check_document_by_unique_identifier,
+        get_current_timestamp,
+    )
+    from app.utils.document_converters import generate_unique_identifier_hash
+
    try:
        # Check permission
        await check_permission(
@ -128,38 +143,105 @@ async def create_documents_file_upload(
        if not files:
            raise HTTPException(status_code=400, detail="No files provided")

+        created_documents: list[Document] = []
+        files_to_process: list[
+            tuple[Document, str, str]
+        ] = []  # (document, temp_path, filename)
+        skipped_duplicates = 0
+
+        # ===== PHASE 1: Create pending documents for all files =====
+        # This makes ALL documents visible in the UI immediately with pending status
        for file in files:
            try:
-                # Save file to a temporary location to avoid stream issues
                import os
                import tempfile

-                # Create temp file
+                # Save file to temp location
                with tempfile.NamedTemporaryFile(
-                    delete=False, suffix=os.path.splitext(file.filename)[1]
+                    delete=False, suffix=os.path.splitext(file.filename or "")[1]
                ) as temp_file:
                    temp_path = temp_file.name

-                # Write uploaded file to temp file
                content = await file.read()
                with open(temp_path, "wb") as f:
                    f.write(content)

-                from app.tasks.celery_tasks.document_tasks import (
-                    process_file_upload_task,
+                file_size = len(content)
+
+                # Generate unique identifier for deduplication check
+                unique_identifier_hash = generate_unique_identifier_hash(
+                    DocumentType.FILE, file.filename or "unknown", search_space_id
                )

-                process_file_upload_task.delay(
-                    temp_path, file.filename, search_space_id, str(user.id)
+                # Check if document already exists (by unique identifier)
+                existing = await check_document_by_unique_identifier(
+                    session, unique_identifier_hash
                )
+                if existing:
+                    # Clean up temp file for duplicates
+                    os.unlink(temp_path)
+                    skipped_duplicates += 1
+                    continue
+
+                # Create pending document (visible immediately in UI via ElectricSQL)
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=file.filename or "Uploaded File",
+                    document_type=DocumentType.FILE,
+                    document_metadata={
+                        "FILE_NAME": file.filename,
+                        "file_size": file_size,
+                        "upload_time": datetime.now().isoformat(),
+                    },
+                    content="Processing...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary, updated when ready
+                    unique_identifier_hash=unique_identifier_hash,
+                    embedding=None,
+                    status=DocumentStatus.pending(),  # Shows "pending" in UI
+                    updated_at=get_current_timestamp(),
+                    created_by_id=str(user.id),
+                )
+                session.add(document)
+                created_documents.append(document)
+                files_to_process.append(
+                    (document, temp_path, file.filename or "unknown")
+                )
+
            except Exception as e:
                raise HTTPException(
                    status_code=422,
                    detail=f"Failed to process file {file.filename}: {e!s}",
                ) from e

-        await session.commit()
-        return {"message": "Files uploaded for processing"}
+        # Commit all pending documents - they appear in UI immediately via ElectricSQL
+        if created_documents:
+            await session.commit()
+            # Refresh to get generated IDs
+            for doc in created_documents:
+                await session.refresh(doc)
+
+        # ===== PHASE 2: Dispatch Celery tasks for each file =====
+        # Each task will update document status: pending → processing → ready/failed
+        from app.tasks.celery_tasks.document_tasks import (
+            process_file_upload_with_document_task,
+        )
+
+        for document, temp_path, filename in files_to_process:
+            process_file_upload_with_document_task.delay(
+                document_id=document.id,
+                temp_path=temp_path,
+                filename=filename,
+                search_space_id=search_space_id,
+                user_id=str(user.id),
+            )
+
+        return {
+            "message": "Files uploaded for processing",
+            "document_ids": [doc.id for doc in created_documents],
+            "total_files": len(files),
+            "pending_files": len(files_to_process),
+            "skipped_duplicates": skipped_duplicates,
+        }
    except HTTPException:
        raise
    except Exception as e:
@ -211,7 +293,11 @@ async def read_documents(
                Permission.DOCUMENTS_READ.value,
                "You don't have permission to read documents in this search space",
            )
-            query = select(Document).filter(Document.search_space_id == search_space_id)
+            query = (
+                select(Document)
+                .options(selectinload(Document.created_by))
+                .filter(Document.search_space_id == search_space_id)
+            )
            count_query = (
                select(func.count())
                .select_from(Document)
@ -221,6 +307,7 @@ async def read_documents(
            # Get documents from all search spaces user has membership in
            query = (
                select(Document)
+                .options(selectinload(Document.created_by))
                .join(SearchSpace)
                .join(SearchSpaceMembership)
                .filter(SearchSpaceMembership.user_id == user.id)
@ -261,6 +348,19 @@ async def read_documents(
        # Convert database objects to API-friendly format
        api_documents = []
        for doc in db_documents:
+            # Get user name (display_name or email fallback)
+            created_by_name = None
+            if doc.created_by:
+                created_by_name = doc.created_by.display_name or doc.created_by.email
+
+            # Parse status from JSONB
+            status_data = None
+            if hasattr(doc, "status") and doc.status:
+                status_data = DocumentStatusSchema(
+                    state=doc.status.get("state", "ready"),
+                    reason=doc.status.get("reason"),
+                )
+
            api_documents.append(
                DocumentRead(
                    id=doc.id,
@ -273,6 +373,9 @@ async def read_documents(
                    created_at=doc.created_at,
                    updated_at=doc.updated_at,
                    search_space_id=doc.search_space_id,
+                    created_by_id=doc.created_by_id,
+                    created_by_name=created_by_name,
+                    status=status_data,
                )
            )

@ -341,7 +444,11 @@ async def search_documents(
                Permission.DOCUMENTS_READ.value,
                "You don't have permission to read documents in this search space",
            )
-            query = select(Document).filter(Document.search_space_id == search_space_id)
+            query = (
+                select(Document)
+                .options(selectinload(Document.created_by))
+                .filter(Document.search_space_id == search_space_id)
+            )
            count_query = (
                select(func.count())
                .select_from(Document)
@ -351,6 +458,7 @@ async def search_documents(
            # Get documents from all search spaces user has membership in
            query = (
                select(Document)
+                .options(selectinload(Document.created_by))
                .join(SearchSpace)
                .join(SearchSpaceMembership)
                .filter(SearchSpaceMembership.user_id == user.id)
@ -395,6 +503,19 @@ async def search_documents(
        # Convert database objects to API-friendly format
        api_documents = []
        for doc in db_documents:
+            # Get user name (display_name or email fallback)
+            created_by_name = None
+            if doc.created_by:
+                created_by_name = doc.created_by.display_name or doc.created_by.email
+
+            # Parse status from JSONB
+            status_data = None
+            if hasattr(doc, "status") and doc.status:
+                status_data = DocumentStatusSchema(
+                    state=doc.status.get("state", "ready"),
+                    reason=doc.status.get("reason"),
+                )
+
            api_documents.append(
                DocumentRead(
                    id=doc.id,
@ -407,6 +528,9 @@ async def search_documents(
                    created_at=doc.created_at,
                    updated_at=doc.updated_at,
                    search_space_id=doc.search_space_id,
+                    created_by_id=doc.created_by_id,
+                    created_by_name=created_by_name,
+                    status=status_data,
                )
            )

@ -782,6 +906,7 @@ async def delete_document(
    """
    Delete a document.
    Requires DOCUMENTS_DELETE permission for the search space.
+    Documents in "processing" state cannot be deleted.
    """
    try:
        result = await session.execute(
@ -794,6 +919,14 @@ async def delete_document(
                status_code=404, detail=f"Document with id {document_id} not found"
            )

+        # Check if document is pending or currently being processed
+        doc_state = document.status.get("state") if document.status else None
+        if doc_state in ("pending", "processing"):
+            raise HTTPException(
+                status_code=409,  # Conflict
+                detail="Cannot delete document while it is pending or being processed. Please wait for processing to complete.",
+            )
+
        # Check permission for the search space
        await check_permission(
            session,
--- a/surfsense_backend/app/routes/notes_routes.py
+++ b/surfsense_backend/app/routes/notes_routes.py
@ -230,6 +230,14 @@ async def delete_note(
    if not document:
        raise HTTPException(status_code=404, detail="Note not found")

+    # Check if note is pending or currently being processed
+    doc_state = document.status.get("state") if document.status else None
+    if doc_state in ("pending", "processing"):
+        raise HTTPException(
+            status_code=409,
+            detail="Cannot delete note while it is pending or being processed. Please wait for processing to complete.",
+        )
+
    # Delete document (chunks will be cascade deleted)
    await session.delete(document)
    await session.commit()
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@ -2127,6 +2127,7 @@ async def run_google_gmail_indexing(
        start_date: str | None,
        end_date: str | None,
        update_last_indexed: bool,
+        on_heartbeat_callback=None,
    ) -> tuple[int, str | None]:
        # Use a reasonable default for max_messages
        max_messages = 1000
@ -2139,6 +2140,7 @@ async def run_google_gmail_indexing(
            end_date=end_date,
            update_last_indexed=update_last_indexed,
            max_messages=max_messages,
+            on_heartbeat_callback=on_heartbeat_callback,
        )
        # index_google_gmail_messages returns (int, str) but we need (int, str | None)
        return indexed_count, error_message if error_message else None
--- a/surfsense_backend/app/schemas/init.py
+++ b/surfsense_backend/app/schemas/init.py
@ -11,6 +11,7 @@ from .documents import (
    DocumentBase,
    DocumentRead,
    DocumentsCreate,
+    DocumentStatusSchema,
    DocumentTitleRead,
    DocumentTitleSearchResponse,
    DocumentUpdate,
@ -104,6 +105,7 @@ __all__ = [
    # Document schemas
    "DocumentBase",
    "DocumentRead",
+    "DocumentStatusSchema",
    "DocumentTitleRead",
    "DocumentTitleSearchResponse",
    "DocumentUpdate",
--- a/surfsense_backend/app/schemas/documents.py
+++ b/surfsense_backend/app/schemas/documents.py
@ -41,6 +41,13 @@ class DocumentUpdate(DocumentBase):
    pass


+class DocumentStatusSchema(BaseModel):
+    """Document processing status."""
+
+    state: str  # "ready", "processing", "failed"
+    reason: str | None = None
+
+
 class DocumentRead(BaseModel):
    id: int
    title: str
@ -53,6 +60,12 @@ class DocumentRead(BaseModel):
    updated_at: datetime | None
    search_space_id: int
    created_by_id: UUID | None = None  # User who created/uploaded this document
+    created_by_name: str | None = (
+        None  # Display name or email of the user who created this document
+    )
+    status: DocumentStatusSchema | None = (
+        None  # Processing status (ready, processing, failed)
+    )

    model_config = ConfigDict(from_attributes=True)

--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@ -982,7 +982,7 @@ class ConnectorService:
        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            channel_name = metadata.get("channel_name", "Unknown Channel")
            message_date = metadata.get("start_date", "")
-            title = f"Slack: {channel_name}"
+            title = channel_name
            if message_date:
                title += f" ({message_date})"
            return title
@ -1056,7 +1056,7 @@ class ConnectorService:
        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            page_title = metadata.get("page_title", "Untitled Page")
            indexed_at = metadata.get("indexed_at", "")
-            title = f"Notion: {page_title}"
+            title = page_title
            if indexed_at:
                title += f" (indexed: {indexed_at})"
            return title
@ -1366,9 +1366,9 @@ class ConnectorService:
            issue_title = metadata.get("issue_title", "Untitled Issue")
            issue_state = metadata.get("state", "")
            title = (
-                f"Linear: {issue_identifier} - {issue_title}"
+                f"{issue_identifier} - {issue_title}"
                if issue_identifier
-                else f"Linear: {issue_title}"
+                else issue_title
            )
            if issue_state:
                title += f" ({issue_state})"
@ -1465,11 +1465,7 @@ class ConnectorService:
            issue_key = metadata.get("issue_key", "")
            issue_title = metadata.get("issue_title", "Untitled Issue")
            status = metadata.get("status", "")
-            title = (
-                f"Jira: {issue_key} - {issue_title}"
-                if issue_key
-                else f"Jira: {issue_title}"
-            )
+            title = f"{issue_key} - {issue_title}" if issue_key else issue_title
            if status:
                title += f" ({status})"
            return title
@ -1570,7 +1566,7 @@ class ConnectorService:
        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            event_summary = metadata.get("event_summary", "Untitled Event")
            start_time = metadata.get("start_time", "")
-            title = f"Calendar: {event_summary}"
+            title = event_summary
            if start_time:
                title += f" ({start_time})"
            return title
@ -1675,7 +1671,7 @@ class ConnectorService:

        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            record_id = metadata.get("record_id", "")
-            return f"Airtable Record: {record_id}" if record_id else "Airtable Record"
+            return record_id if record_id else "Airtable Record"

        def _description_fn(
            _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
@ -1952,7 +1948,7 @@ class ConnectorService:
        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            page_title = metadata.get("page_title", "Untitled Page")
            space_key = metadata.get("space_key", "")
-            title = f"Confluence: {page_title}"
+            title = page_title
            if space_key:
                title += f" ({space_key})"
            return title
@ -2238,7 +2234,7 @@ class ConnectorService:
        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            channel_name = metadata.get("channel_name", "Unknown Channel")
            message_date = metadata.get("start_date", "")
-            title = f"Discord: {channel_name}"
+            title = channel_name
            if message_date:
                title += f" ({message_date})"
            return title
@ -2314,7 +2310,7 @@ class ConnectorService:
            team_name = metadata.get("team_name", "Unknown Team")
            channel_name = metadata.get("channel_name", "Unknown Channel")
            message_date = metadata.get("start_date", "")
-            title = f"Teams: {team_name} - {channel_name}"
+            title = f"{team_name} - {channel_name}"
            if message_date:
                title += f" ({message_date})"
            return title
@ -2387,11 +2383,7 @@ class ConnectorService:
        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            event_name = metadata.get("event_name", "Untitled Event")
            start_time = metadata.get("start_time", "")
-            return (
-                f"Luma: {event_name} ({start_time})"
-                if start_time
-                else f"Luma: {event_name}"
-            )
+            return f"{event_name} ({start_time})" if start_time else event_name

        def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            return metadata.get("event_url", "") or ""
@ -2651,7 +2643,7 @@ class ConnectorService:

        def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            page_name = metadata.get("page_name", "Untitled Page")
-            return f"BookStack: {page_name}"
+            return page_name

        def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
            page_slug = metadata.get("page_slug", "")
--- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
@ -537,6 +537,304 @@ async def _process_file_upload(
            raise


+@celery_app.task(name="process_file_upload_with_document", bind=True)
+def process_file_upload_with_document_task(
+    self,
+    document_id: int,
+    temp_path: str,
+    filename: str,
+    search_space_id: int,
+    user_id: str,
+):
+    """
+    Celery task to process uploaded file with existing pending document.
+
+    This task is used by the 2-phase document upload flow:
+    - Phase 1 (API): Creates pending document (visible in UI immediately)
+    - Phase 2 (this task): Updates document status: pending → processing → ready/failed
+
+    Args:
+        document_id: ID of the pending document created in Phase 1
+        temp_path: Path to the uploaded file
+        filename: Original filename
+        search_space_id: ID of the search space
+        user_id: ID of the user
+    """
+    import asyncio
+    import os
+    import traceback
+
+    logger.info(
+        f"[process_file_upload_with_document] Task started - document_id: {document_id}, "
+        f"file: {filename}, search_space_id: {search_space_id}"
+    )
+
+    # Check if file exists and is accessible
+    if not os.path.exists(temp_path):
+        logger.error(
+            f"[process_file_upload_with_document] File does not exist: {temp_path}. "
+            "The temp file may have been cleaned up before the task ran."
+        )
+        # Mark document as failed since file is missing
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            loop.run_until_complete(
+                _mark_document_failed(
+                    document_id,
+                    "File not found - temp file may have been cleaned up",
+                )
+            )
+        finally:
+            loop.close()
+        return
+
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    try:
+        loop.run_until_complete(
+            _process_file_with_document(
+                document_id, temp_path, filename, search_space_id, user_id
+            )
+        )
+        logger.info(
+            f"[process_file_upload_with_document] Task completed successfully for: {filename}"
+        )
+    except Exception as e:
+        logger.error(
+            f"[process_file_upload_with_document] Task failed for {filename}: {e}\n"
+            f"Traceback:\n{traceback.format_exc()}"
+        )
+        raise
+    finally:
+        loop.close()
+
+
+async def _mark_document_failed(document_id: int, reason: str):
+    """Mark a document as failed when task cannot proceed."""
+    from app.db import Document, DocumentStatus
+    from app.tasks.document_processors.base import get_current_timestamp
+
+    async with get_celery_session_maker()() as session:
+        document = await session.get(Document, document_id)
+        if document:
+            document.status = DocumentStatus.failed(reason)
+            document.updated_at = get_current_timestamp()
+            await session.commit()
+            logger.info(f"Marked document {document_id} as failed: {reason}")
+
+
+async def _process_file_with_document(
+    document_id: int,
+    temp_path: str,
+    filename: str,
+    search_space_id: int,
+    user_id: str,
+):
+    """
+    Process file and update existing pending document status.
+
+    This function implements Phase 2 of the 2-phase document upload:
+    - Sets document status to 'processing' (shows spinner in UI)
+    - Processes the file (parsing, embedding, chunking)
+    - Updates document to 'ready' on success or 'failed' on error
+    """
+    import os
+
+    from app.db import Document, DocumentStatus
+    from app.tasks.document_processors.base import get_current_timestamp
+    from app.tasks.document_processors.file_processors import (
+        process_file_in_background_with_document,
+    )
+
+    logger.info(
+        f"[_process_file_with_document] Starting async processing for: {filename}"
+    )
+
+    async with get_celery_session_maker()() as session:
+        logger.info(
+            f"[_process_file_with_document] Database session created for: {filename}"
+        )
+        task_logger = TaskLoggingService(session, search_space_id)
+
+        # Get the document
+        document = await session.get(Document, document_id)
+        if not document:
+            logger.error(f"Document {document_id} not found")
+            return
+
+        # Get file size for notification metadata
+        try:
+            file_size = os.path.getsize(temp_path)
+            logger.info(f"[_process_file_with_document] File size: {file_size} bytes")
+        except Exception as e:
+            logger.warning(
+                f"[_process_file_with_document] Could not get file size: {e}"
+            )
+            file_size = None
+
+        # Create notification for document processing
+        logger.info(
+            f"[_process_file_with_document] Creating notification for: {filename}"
+        )
+        notification = (
+            await NotificationService.document_processing.notify_processing_started(
+                session=session,
+                user_id=UUID(user_id),
+                document_type="FILE",
+                document_name=filename,
+                search_space_id=search_space_id,
+                file_size=file_size,
+            )
+        )
+
+        log_entry = await task_logger.log_task_start(
+            task_name="process_file_upload_with_document",
+            source="document_processor",
+            message=f"Starting file processing for: {filename} (document_id: {document_id})",
+            metadata={
+                "document_type": "FILE",
+                "document_id": document_id,
+                "filename": filename,
+                "file_path": temp_path,
+                "user_id": user_id,
+            },
+        )
+
+        try:
+            # Set status to PROCESSING (shows spinner in UI via ElectricSQL)
+            document.status = DocumentStatus.processing()
+            await session.commit()
+            logger.info(
+                f"[_process_file_with_document] Document {document_id} status set to 'processing'"
+            )
+
+            # Process the file and update document
+            result = await process_file_in_background_with_document(
+                document=document,
+                file_path=temp_path,
+                filename=filename,
+                search_space_id=search_space_id,
+                user_id=user_id,
+                session=session,
+                task_logger=task_logger,
+                log_entry=log_entry,
+                notification=notification,
+            )
+
+            # Update notification on success
+            if result:
+                await (
+                    NotificationService.document_processing.notify_processing_completed(
+                        session=session,
+                        notification=notification,
+                        document_id=result.id,
+                        chunks_count=None,
+                    )
+                )
+                logger.info(
+                    f"[_process_file_with_document] Successfully processed document {document_id}"
+                )
+            else:
+                # Duplicate detected - mark as failed
+                document.status = DocumentStatus.failed("Duplicate content detected")
+                document.updated_at = get_current_timestamp()
+                await session.commit()
+                await (
+                    NotificationService.document_processing.notify_processing_completed(
+                        session=session,
+                        notification=notification,
+                        error_message="Document already exists (duplicate)",
+                    )
+                )
+
+        except Exception as e:
+            # Import here to avoid circular dependencies
+            from fastapi import HTTPException
+
+            from app.services.page_limit_service import PageLimitExceededError
+
+            # Check if this is a page limit error
+            page_limit_error: PageLimitExceededError | None = None
+            if isinstance(e, PageLimitExceededError):
+                page_limit_error = e
+            elif (
+                isinstance(e, HTTPException)
+                and e.__cause__
+                and isinstance(e.__cause__, PageLimitExceededError)
+            ):
+                page_limit_error = e.__cause__
+
+            # Mark document as failed (shows error in UI via ElectricSQL)
+            error_message = str(e)[:500]
+            document.status = DocumentStatus.failed(error_message)
+            document.updated_at = get_current_timestamp()
+            await session.commit()
+            logger.info(
+                f"[_process_file_with_document] Document {document_id} marked as failed: {error_message[:100]}"
+            )
+
+            # Handle page limit errors with dedicated notification
+            if page_limit_error is not None:
+                try:
+                    await session.refresh(notification)
+                    await NotificationService.document_processing.notify_processing_completed(
+                        session=session,
+                        notification=notification,
+                        error_message="Page limit exceeded",
+                    )
+                    await NotificationService.page_limit.notify_page_limit_exceeded(
+                        session=session,
+                        user_id=UUID(user_id),
+                        document_name=filename,
+                        document_type="FILE",
+                        search_space_id=search_space_id,
+                        pages_used=page_limit_error.pages_used,
+                        pages_limit=page_limit_error.pages_limit,
+                        pages_to_add=page_limit_error.pages_to_add,
+                    )
+                except Exception as notif_error:
+                    logger.error(
+                        f"Failed to create page limit notification: {notif_error!s}"
+                    )
+            else:
+                # Update notification on failure
+                try:
+                    await session.refresh(notification)
+                    await NotificationService.document_processing.notify_processing_completed(
+                        session=session,
+                        notification=notification,
+                        error_message=str(e)[:100],
+                    )
+                except Exception as notif_error:
+                    logger.error(
+                        f"Failed to update notification on failure: {notif_error!s}"
+                    )
+
+            await task_logger.log_task_failure(
+                log_entry,
+                error_message[:100],
+                str(e),
+                {"error_type": type(e).__name__, "document_id": document_id},
+            )
+            logger.error(f"Error processing file {filename}: {e!s}")
+            raise
+
+        finally:
+            # Clean up temp file
+            if os.path.exists(temp_path):
+                try:
+                    os.unlink(temp_path)
+                    logger.info(
+                        f"[_process_file_with_document] Cleaned up temp file: {temp_path}"
+                    )
+                except Exception as cleanup_error:
+                    logger.warning(
+                        f"[_process_file_with_document] Failed to clean up temp file: {cleanup_error}"
+                    )
+
+
@celery_app.task(name="process_circleback_meeting", bind=True)
 def process_circleback_meeting_task(
    self,
--- a/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py
+++ b/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py
@ -4,33 +4,41 @@ This task runs periodically (every 5 minutes by default) to find notifications
 that are stuck in "in_progress" status but don't have an active Redis heartbeat key.
 These are marked as "failed" to prevent the frontend from showing a perpetual "syncing" state.

+Additionally, it cleans up documents stuck in pending/processing state that belong
+to connectors with stale notifications.
+
 Detection mechanism:
 - Active indexing tasks set a Redis key with TTL (2 minutes) as a heartbeat
 - If the task crashes, the Redis key expires automatically
 - This cleanup task checks for in-progress notifications without a Redis heartbeat key
 - Such notifications are marked as failed with O(1) batch UPDATE
+- Documents with pending/processing status for those connectors are also marked as failed
 """

+import contextlib
 import json
 import logging
 import os
 from datetime import UTC, datetime

 import redis
-from sqlalchemy import and_, text
+from sqlalchemy import and_, or_, text
 from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
 from sqlalchemy.future import select
 from sqlalchemy.pool import NullPool

 from app.celery_app import celery_app
 from app.config import config
-from app.db import Notification
+from app.db import Document, DocumentStatus, Notification

 logger = logging.getLogger(__name__)

 # Redis client for checking heartbeats
 _redis_client: redis.Redis | None = None

+# Error message shown to users when sync is interrupted
+STALE_SYNC_ERROR_MESSAGE = "Sync was interrupted unexpectedly. Please retry."
+

 def get_redis_client() -> redis.Redis:
    """Get or create Redis client for heartbeat checking."""
@ -70,6 +78,7 @@ def cleanup_stale_indexing_notifications_task():
    - Do NOT have a corresponding Redis heartbeat key (meaning task crashed)

    And marks them as failed with O(1) batch UPDATE.
+    Also marks associated pending/processing documents as failed.
    """
    import asyncio

@ -86,15 +95,20 @@ async def _cleanup_stale_notifications():
    """Find and mark stale connector indexing notifications as failed.

    Uses Redis TTL-based detection:
-    1. Find all in-progress notifications
+    1. Find all in-progress notifications with their connector_id
    2. Check which ones are missing their Redis heartbeat key
    3. Mark those as failed with O(1) batch UPDATE using JSONB || operator
+    4. Mark associated documents (pending/processing) as failed
    """
    async with get_celery_session_maker()() as session:
        try:
            # Find all in-progress connector indexing notifications
+            # Fetch full metadata to properly extract connector_id
            result = await session.execute(
-                select(Notification.id).where(
+                select(
+                    Notification.id,
+                    Notification.notification_metadata,
+                ).where(
                    and_(
                        Notification.type == "connector_indexing",
                        Notification.notification_metadata["status"].astext
@ -102,24 +116,37 @@ async def _cleanup_stale_notifications():
                    )
                )
            )
-            in_progress_ids = [row[0] for row in result.fetchall()]
+            in_progress_rows = result.fetchall()

-            if not in_progress_ids:
+            if not in_progress_rows:
                logger.debug("No in-progress connector indexing notifications found")
                return

            # Check which ones are missing heartbeat keys in Redis
            redis_client = get_redis_client()
            stale_notification_ids = []
+            stale_connector_ids = []

-            for notification_id in in_progress_ids:
+            for row in in_progress_rows:
+                notification_id = row[0]
+                metadata = row[1]  # Full metadata dict
                heartbeat_key = _get_heartbeat_key(notification_id)
                if not redis_client.exists(heartbeat_key):
                    stale_notification_ids.append(notification_id)
+                    # Extract connector_id from metadata dict for document cleanup
+                    if metadata and isinstance(metadata, dict):
+                        connector_id = metadata.get("connector_id")
+                        logger.debug(
+                            f"Notification {notification_id} metadata: {metadata}, "
+                            f"connector_id: {connector_id}"
+                        )
+                        if connector_id is not None:
+                            with contextlib.suppress(ValueError, TypeError):
+                                stale_connector_ids.append(int(connector_id))

            if not stale_notification_ids:
                logger.debug(
-                    f"All {len(in_progress_ids)} in-progress notifications have active Redis heartbeats"
+                    f"All {len(in_progress_rows)} in-progress notifications have active Redis heartbeats"
                )
                return

@ -127,18 +154,15 @@ async def _cleanup_stale_notifications():
                f"Found {len(stale_notification_ids)} stale connector indexing notifications "
                f"(no Redis heartbeat key): {stale_notification_ids}"
            )
+            logger.info(f"Connector IDs for document cleanup: {stale_connector_ids}")

-            # O(1) Batch UPDATE using JSONB || operator
+            # O(1) Batch UPDATE notifications using JSONB || operator
            # This merges the update data into existing notification_metadata
            # Also updates title and message for proper UI display
-            error_message = (
-                "Something went wrong while syncing your content. Please retry."
-            )
-
            update_data = {
                "status": "failed",
                "completed_at": datetime.now(UTC).isoformat(),
-                "error_message": error_message,
+                "error_message": STALE_SYNC_ERROR_MESSAGE,
                "sync_stage": "failed",
            }

@ -152,16 +176,96 @@ async def _cleanup_stale_notifications():
                """),
                {
                    "update_json": json.dumps(update_data),
-                    "display_message": f"{error_message}",
+                    "display_message": STALE_SYNC_ERROR_MESSAGE,
                    "ids": stale_notification_ids,
                },
            )

-            await session.commit()
            logger.info(
-                f"Successfully marked {len(stale_notification_ids)} stale notifications as failed (batch UPDATE)"
+                f"Successfully marked {len(stale_notification_ids)} stale notifications as failed"
            )

+            # ===== Clean up stuck documents for stale connectors =====
+            if stale_connector_ids:
+                await _cleanup_stuck_documents(session, stale_connector_ids)
+
+            await session.commit()
+
        except Exception as e:
            logger.error(f"Error cleaning up stale notifications: {e!s}", exc_info=True)
            await session.rollback()
+
+
+async def _cleanup_stuck_documents(session, connector_ids: list[int]):
+    """
+    Mark documents stuck in pending/processing state as failed for given connectors.
+
+    This ensures that when a connector sync is interrupted, all partially-processed
+    documents are marked with a clear error state instead of being stuck indefinitely.
+
+    Args:
+        session: Database session
+        connector_ids: List of connector IDs whose documents should be cleaned up
+    """
+    if not connector_ids:
+        return
+
+    try:
+        # Count documents that will be affected (for logging)
+        count_result = await session.execute(
+            select(Document.id).where(
+                and_(
+                    Document.connector_id.in_(connector_ids),
+                    or_(
+                        Document.status["state"].astext == DocumentStatus.PENDING,
+                        Document.status["state"].astext == DocumentStatus.PROCESSING,
+                    ),
+                )
+            )
+        )
+        stuck_doc_ids = [row[0] for row in count_result.fetchall()]
+
+        if not stuck_doc_ids:
+            logger.debug(f"No stuck documents found for connector IDs: {connector_ids}")
+            return
+
+        logger.warning(
+            f"Found {len(stuck_doc_ids)} stuck documents (pending/processing) "
+            f"for connector IDs {connector_ids}: {stuck_doc_ids[:20]}..."  # Log first 20
+        )
+
+        # O(1) Batch UPDATE: Mark all stuck documents as failed using JSONB
+        # The error message matches what we show in notifications
+        failed_status = DocumentStatus.failed(STALE_SYNC_ERROR_MESSAGE)
+
+        await session.execute(
+            text("""
+                UPDATE documents 
+                SET status = CAST(:failed_status AS jsonb),
+                    updated_at = :now
+                WHERE connector_id = ANY(:connector_ids)
+                  AND (
+                      status->>'state' = :pending_state
+                      OR status->>'state' = :processing_state
+                  )
+            """),
+            {
+                "failed_status": json.dumps(failed_status),
+                "now": datetime.now(UTC),
+                "connector_ids": connector_ids,
+                "pending_state": DocumentStatus.PENDING,
+                "processing_state": DocumentStatus.PROCESSING,
+            },
+        )
+
+        logger.info(
+            f"Successfully marked {len(stuck_doc_ids)} stuck documents as failed "
+            f"for connector IDs: {connector_ids}"
+        )
+
+    except Exception as e:
+        logger.error(
+            f"Error cleaning up stuck documents for connectors {connector_ids}: {e!s}",
+            exc_info=True,
+        )
+        # Don't raise - let the notification cleanup continue even if document cleanup fails
--- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
@ -1,5 +1,9 @@
 """
 Airtable connector indexer.
+
+Implements real-time document status updates using a two-phase approach:
+- Phase 1: Create all documents with PENDING status (visible in UI immediately)
+- Phase 2: Process each document one by one (pending → processing → ready/failed)
 """

 import time
@ -10,7 +14,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
 from app.connectors.airtable_history import AirtableHistoryConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -27,6 +31,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -134,24 +139,32 @@ async def index_airtable_records(
                await task_logger.log_task_success(
                    log_entry, success_msg, {"bases_count": 0}
                )
-                return 0, success_msg
+                # CRITICAL: Update timestamp even when no bases found so Electric SQL syncs
+                await update_connector_last_indexed(
+                    session, connector, update_last_indexed
+                )
+                await session.commit()
+                return 0, None  # Return None (not error) when no items found

            logger.info(f"Found {len(bases)} Airtable bases to process")

            # Heartbeat tracking - update notification periodically to prevent appearing stuck
            last_heartbeat_time = time.time()
-            total_documents_indexed = 0

-            # Process each base
+            # Track overall statistics
+            documents_indexed = 0
+            documents_skipped = 0
+            documents_failed = 0
+            duplicate_content_count = 0
+
+            # =======================================================================
+            # PHASE 1: Collect all records and create pending documents
+            # This makes ALL documents visible in the UI immediately with pending status
+            # =======================================================================
+            records_to_process = []  # List of dicts with document and record data
+            new_documents_created = False
+
            for base in bases:
-                # Check if it's time for a heartbeat update
-                if (
-                    on_heartbeat_callback
-                    and (time.time() - last_heartbeat_time)
-                    >= HEARTBEAT_INTERVAL_SECONDS
-                ):
-                    await on_heartbeat_callback(total_documents_indexed)
-                    last_heartbeat_time = time.time()
                base_id = base.get("id")
                base_name = base.get("name", "Unknown Base")

@ -201,7 +214,6 @@ async def index_airtable_records(
                                max_records=max_records,
                            )
                        )
-
                    else:
                        # Fetch all records
                        records, records_error = airtable_connector.get_all_records(
@ -222,21 +234,14 @@ async def index_airtable_records(

                    logger.info(f"Found {len(records)} records in table {table_name}")

-                    documents_indexed = 0
-                    skipped_messages = []
-                    documents_skipped = 0
-                    # Process each record
+                    # Phase 1: Analyze each record and create pending documents
                    for record in records:
-                        # Check if it's time for a heartbeat update
-                        if (
-                            on_heartbeat_callback
-                            and (time.time() - last_heartbeat_time)
-                            >= HEARTBEAT_INTERVAL_SECONDS
-                        ):
-                            await on_heartbeat_callback(total_documents_indexed)
-                            last_heartbeat_time = time.time()
-
                        try:
+                            record_id = record.get("id", "")
+                            if not record_id:
+                                documents_skipped += 1
+                                continue
+
                            # Generate markdown content
                            markdown_content = (
                                airtable_connector.format_record_to_markdown(
@ -246,16 +251,11 @@ async def index_airtable_records(

                            if not markdown_content.strip():
                                logger.warning(
-                                    f"Skipping message with no content: {record.get('id')}"
-                                )
-                                skipped_messages.append(
-                                    f"{record.get('id')} (no content)"
+                                    f"Skipping record with no content: {record_id}"
                                )
                                documents_skipped += 1
                                continue

-                            record_id = record.get("id", "Unknown")
-
                            # Generate unique identifier hash for this Airtable record
                            unique_identifier_hash = generate_unique_identifier_hash(
                                DocumentType.AIRTABLE_CONNECTOR,
@ -278,77 +278,30 @@ async def index_airtable_records(
                            if existing_document:
                                # Document exists - check if content has changed
                                if existing_document.content_hash == content_hash:
-                                    logger.info(
-                                        f"Document for Airtable record {record_id} unchanged. Skipping."
-                                    )
+                                    # Ensure status is ready (might have been stuck in processing/pending)
+                                    if not DocumentStatus.is_state(
+                                        existing_document.status, DocumentStatus.READY
+                                    ):
+                                        existing_document.status = (
+                                            DocumentStatus.ready()
+                                        )
                                    documents_skipped += 1
                                    continue
-                                else:
-                                    # Content has changed - update the existing document
-                                    logger.info(
-                                        f"Content changed for Airtable record {record_id}. Updating document."
-                                    )

-                                    # Generate document summary
-                                    user_llm = await get_user_long_context_llm(
-                                        session, user_id, search_space_id
-                                    )
-
-                                    if user_llm:
-                                        document_metadata = {
-                                            "record_id": record_id,
-                                            "created_time": record.get(
-                                                "CREATED_TIME()", ""
-                                            ),
-                                            "document_type": "Airtable Record",
-                                            "connector_type": "Airtable",
-                                        }
-                                        (
-                                            summary_content,
-                                            summary_embedding,
-                                        ) = await generate_document_summary(
-                                            markdown_content,
-                                            user_llm,
-                                            document_metadata,
-                                        )
-                                    else:
-                                        summary_content = (
-                                            f"Airtable Record: {record_id}\n\n"
-                                        )
-                                        summary_embedding = (
-                                            config.embedding_model_instance.embed(
-                                                summary_content
-                                            )
-                                        )
-
-                                    # Process chunks
-                                    chunks = await create_document_chunks(
-                                        markdown_content
-                                    )
-
-                                    # Update existing document
-                                    existing_document.title = (
-                                        f"Airtable Record: {record_id}"
-                                    )
-                                    existing_document.content = summary_content
-                                    existing_document.content_hash = content_hash
-                                    existing_document.embedding = summary_embedding
-                                    existing_document.document_metadata = {
+                                # Queue existing document for update (will be set to processing in Phase 2)
+                                records_to_process.append(
+                                    {
+                                        "document": existing_document,
+                                        "is_new": False,
+                                        "markdown_content": markdown_content,
+                                        "content_hash": content_hash,
                                        "record_id": record_id,
-                                        "created_time": record.get(
-                                            "CREATED_TIME()", ""
-                                        ),
+                                        "record": record,
+                                        "base_name": base_name,
+                                        "table_name": table_name,
                                    }
-                                    existing_document.chunks = chunks
-                                    existing_document.updated_at = (
-                                        get_current_timestamp()
-                                    )
-
-                                    documents_indexed += 1
-                                    logger.info(
-                                        f"Successfully updated Airtable record {record_id}"
-                                    )
-                                    continue
+                                )
+                                continue

                            # Document doesn't exist by unique_identifier_hash
                            # Check if a document with the same content_hash exists (from another connector)
@ -365,123 +318,210 @@ async def index_airtable_records(
                                    f"(existing document ID: {duplicate_by_content.id}, "
                                    f"type: {duplicate_by_content.document_type}). Skipping."
                                )
+                                duplicate_content_count += 1
                                documents_skipped += 1
                                continue

-                            # Document doesn't exist - create new one
-                            # Generate document summary
-                            user_llm = await get_user_long_context_llm(
-                                session, user_id, search_space_id
-                            )
-
-                            if user_llm:
-                                document_metadata = {
-                                    "record_id": record_id,
-                                    "created_time": record.get("CREATED_TIME()", ""),
-                                    "document_type": "Airtable Record",
-                                    "connector_type": "Airtable",
-                                }
-                                (
-                                    summary_content,
-                                    summary_embedding,
-                                ) = await generate_document_summary(
-                                    markdown_content, user_llm, document_metadata
-                                )
-                            else:
-                                # Fallback to simple summary if no LLM configured
-                                summary_content = f"Airtable Record: {record_id}\n\n"
-                                summary_embedding = (
-                                    config.embedding_model_instance.embed(
-                                        summary_content
-                                    )
-                                )
-
-                            # Process chunks
-                            chunks = await create_document_chunks(markdown_content)
-
-                            # Create and store new document
-                            logger.info(
-                                f"Creating new document for Airtable record: {record_id}"
-                            )
+                            # Create new document with PENDING status (visible in UI immediately)
                            document = Document(
                                search_space_id=search_space_id,
-                                title=f"Airtable Record: {record_id}",
+                                title=record_id,
                                document_type=DocumentType.AIRTABLE_CONNECTOR,
                                document_metadata={
                                    "record_id": record_id,
                                    "created_time": record.get("CREATED_TIME()", ""),
+                                    "base_name": base_name,
+                                    "table_name": table_name,
+                                    "connector_id": connector_id,
                                },
-                                content=summary_content,
-                                content_hash=content_hash,
+                                content="Pending...",  # Placeholder until processed
+                                content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                                unique_identifier_hash=unique_identifier_hash,
-                                embedding=summary_embedding,
-                                chunks=chunks,
+                                embedding=None,
+                                chunks=[],  # Empty at creation - safe for async
+                                status=DocumentStatus.pending(),  # Pending until processing starts
                                updated_at=get_current_timestamp(),
                                created_by_id=user_id,
                                connector_id=connector_id,
                            )
-
                            session.add(document)
-                            documents_indexed += 1
-                            logger.info(
-                                f"Successfully indexed new Airtable record {summary_content}"
-                            )
+                            new_documents_created = True

-                            # Batch commit every 10 documents
-                            if documents_indexed % 10 == 0:
-                                logger.info(
-                                    f"Committing batch: {documents_indexed} Airtable records processed so far"
-                                )
-                                await session.commit()
+                            records_to_process.append(
+                                {
+                                    "document": document,
+                                    "is_new": True,
+                                    "markdown_content": markdown_content,
+                                    "content_hash": content_hash,
+                                    "record_id": record_id,
+                                    "record": record,
+                                    "base_name": base_name,
+                                    "table_name": table_name,
+                                }
+                            )

                        except Exception as e:
                            logger.error(
-                                f"Error processing the Airtable record {record.get('id', 'Unknown')}: {e!s}",
-                                exc_info=True,
+                                f"Error in Phase 1 for record: {e!s}", exc_info=True
                            )
-                            skipped_messages.append(
-                                f"{record.get('id', 'Unknown')} (processing error)"
-                            )
-                            documents_skipped += 1
-                            continue  # Skip this message and continue with others
+                            documents_failed += 1
+                            continue

-                    # Accumulate total processed across all tables
-                    total_processed += documents_indexed
+            # Commit all pending documents - they all appear in UI now
+            if new_documents_created:
+                logger.info(
+                    f"Phase 1: Committing {len([r for r in records_to_process if r['is_new']])} pending documents"
+                )
+                await session.commit()

-                    # Final commit for any remaining documents not yet committed in batches
-                    if documents_indexed > 0:
+            # =======================================================================
+            # PHASE 2: Process each document one by one
+            # Each document transitions: pending → processing → ready/failed
+            # =======================================================================
+            logger.info(f"Phase 2: Processing {len(records_to_process)} documents")
+
+            for item in records_to_process:
+                # Send heartbeat periodically
+                if on_heartbeat_callback:
+                    current_time = time.time()
+                    if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                        await on_heartbeat_callback(documents_indexed)
+                        last_heartbeat_time = current_time
+
+                document = item["document"]
+                try:
+                    # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                    document.status = DocumentStatus.processing()
+                    await session.commit()
+
+                    # Heavy processing (LLM, embeddings, chunks)
+                    user_llm = await get_user_long_context_llm(
+                        session, user_id, search_space_id
+                    )
+
+                    if user_llm:
+                        document_metadata_for_summary = {
+                            "record_id": item["record_id"],
+                            "created_time": item["record"].get("CREATED_TIME()", ""),
+                            "document_type": "Airtable Record",
+                            "connector_type": "Airtable",
+                        }
+                        (
+                            summary_content,
+                            summary_embedding,
+                        ) = await generate_document_summary(
+                            item["markdown_content"],
+                            user_llm,
+                            document_metadata_for_summary,
+                        )
+                    else:
+                        # Fallback to simple summary if no LLM configured
+                        summary_content = f"Airtable Record: {item['record_id']}\n\n"
+                        summary_embedding = config.embedding_model_instance.embed(
+                            summary_content
+                        )
+
+                    chunks = await create_document_chunks(item["markdown_content"])
+
+                    # Update document to READY with actual content
+                    document.title = item["record_id"]
+                    document.content = summary_content
+                    document.content_hash = item["content_hash"]
+                    document.embedding = summary_embedding
+                    document.document_metadata = {
+                        "record_id": item["record_id"],
+                        "created_time": item["record"].get("CREATED_TIME()", ""),
+                        "base_name": item["base_name"],
+                        "table_name": item["table_name"],
+                        "connector_id": connector_id,
+                    }
+                    safe_set_chunks(document, chunks)
+                    document.updated_at = get_current_timestamp()
+                    document.status = DocumentStatus.ready()
+
+                    documents_indexed += 1
+
+                    # Batch commit every 10 documents (for ready status updates)
+                    if documents_indexed % 10 == 0:
                        logger.info(
-                            f"Final commit for table {table_name}: {documents_indexed} Airtable records processed"
+                            f"Committing batch: {documents_indexed} Airtable records processed so far"
                        )
                        await session.commit()
-                        logger.info(
-                            f"Successfully committed all Airtable document changes for table {table_name}"
-                        )

-            # Update the last_indexed_at timestamp for the connector only if requested
-            # (after all tables in all bases are processed)
-            if total_processed > 0:
-                await update_connector_last_indexed(
-                    session, connector, update_last_indexed
+                except Exception as e:
+                    logger.error(
+                        f"Error processing Airtable record: {e!s}", exc_info=True
+                    )
+                    # Mark document as failed with reason (visible in UI)
+                    try:
+                        document.status = DocumentStatus.failed(str(e))
+                        document.updated_at = get_current_timestamp()
+                    except Exception as status_error:
+                        logger.error(
+                            f"Failed to update document status to failed: {status_error}"
+                        )
+                    documents_failed += 1
+                    continue
+
+            # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+            await update_connector_last_indexed(session, connector, update_last_indexed)
+
+            total_processed = documents_indexed
+
+            # Final commit to ensure all documents are persisted (safety net)
+            logger.info(
+                f"Final commit: Total {documents_indexed} Airtable records processed"
+            )
+            try:
+                await session.commit()
+                logger.info(
+                    "Successfully committed all Airtable document changes to database"
                )
+            except Exception as e:
+                # Handle any remaining integrity errors gracefully (race conditions, etc.)
+                if (
+                    "duplicate key value violates unique constraint" in str(e).lower()
+                    or "uniqueviolationerror" in str(e).lower()
+                ):
+                    logger.warning(
+                        f"Duplicate content_hash detected during final commit. "
+                        f"This may occur if the same record was indexed by multiple connectors. "
+                        f"Rolling back and continuing. Error: {e!s}"
+                    )
+                    await session.rollback()
+                    # Don't fail the entire task - some documents may have been successfully indexed
+                else:
+                    raise
+
+            # Build warning message if there were issues
+            warning_parts = []
+            if duplicate_content_count > 0:
+                warning_parts.append(f"{duplicate_content_count} duplicate")
+            if documents_failed > 0:
+                warning_parts.append(f"{documents_failed} failed")
+            warning_message = ", ".join(warning_parts) if warning_parts else None

            # Log success after processing all bases and tables
            await task_logger.log_task_success(
                log_entry,
                f"Successfully completed Airtable indexing for connector {connector_id}",
                {
-                    "events_processed": total_processed,
-                    "documents_indexed": total_processed,
+                    "documents_indexed": documents_indexed,
+                    "documents_skipped": documents_skipped,
+                    "documents_failed": documents_failed,
+                    "duplicate_content_count": duplicate_content_count,
                },
            )

            logger.info(
-                f"Airtable indexing completed: {total_processed} total records processed"
+                f"Airtable indexing completed: {documents_indexed} ready, "
+                f"{documents_skipped} skipped, {documents_failed} failed "
+                f"({duplicate_content_count} duplicate content)"
            )
            return (
                total_processed,
-                None,
-            )  # Return None as the error message to indicate success
+                warning_message,
+            )

        except Exception as e:
            logger.error(
--- a/surfsense_backend/app/tasks/connector_indexers/base.py
+++ b/surfsense_backend/app/tasks/connector_indexers/base.py
@ -28,6 +28,35 @@ def get_current_timestamp() -> datetime:
    return datetime.now(UTC)


+def safe_set_chunks(document: Document, chunks: list) -> None:
+    """
+    Safely assign chunks to a document without triggering lazy loading.
+
+    ALWAYS use this instead of `document.chunks = chunks` to avoid
+    SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
+
+    Why this is needed:
+    - Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
+      load the OLD chunks first (for comparison/orphan detection)
+    - This lazy loading fails in async context with asyncpg driver
+    - set_committed_value bypasses this by setting the value directly
+
+    This function is safe regardless of how the document was loaded
+    (with or without selectinload).
+
+    Args:
+        document: The Document object to update
+        chunks: List of Chunk objects to assign
+
+    Example:
+        # Instead of: document.chunks = chunks (DANGEROUS!)
+        safe_set_chunks(document, chunks)  # Always safe
+    """
+    from sqlalchemy.orm.attributes import set_committed_value
+
+    set_committed_value(document, "chunks", chunks)
+
+
 def parse_date_flexible(date_str: str) -> datetime:
    """
    Parse date from multiple common formats.
--- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
@ -1,5 +1,9 @@
 """
 BookStack connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Collect all pages and create pending documents (visible in UI immediately)
+- Phase 2: Process each page: pending → processing → ready/failed
 """

 import time
@ -11,7 +15,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
 from app.connectors.bookstack_connector import BookStackConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -28,6 +32,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -184,22 +189,22 @@ async def index_bookstack_pages(
            logger.error(f"Error fetching BookStack pages: {e!s}", exc_info=True)
            return 0, f"Error fetching BookStack pages: {e!s}"

-        # Process and index each page
+        # =======================================================================
+        # PHASE 1: Analyze all pages, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
        documents_indexed = 0
        skipped_pages = []
        documents_skipped = 0
+        documents_failed = 0

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
        last_heartbeat_time = time.time()

+        pages_to_process = []  # List of dicts with document and page data
+        new_documents_created = False
+
        for page in pages:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
            try:
                page_id = page.get("id")
                page_name = page.get("name", "")
@ -218,7 +223,7 @@ async def index_bookstack_pages(

                # Fetch full page content (Markdown preferred)
                try:
-                    page_detail, page_content = bookstack_client.get_page_with_content(
+                    _, page_content = bookstack_client.get_page_with_content(
                        page_id, use_markdown=True
                    )
                except Exception as e:
@ -252,82 +257,38 @@ async def index_bookstack_pages(
                # Build page URL
                page_url = f"{bookstack_base_url}/books/{book_slug}/page/{page_slug}"

-                # Build document metadata
-                doc_metadata = {
-                    "page_id": page_id,
-                    "page_name": page_name,
-                    "page_slug": page_slug,
-                    "book_id": book_id,
-                    "book_slug": book_slug,
-                    "chapter_id": chapter_id,
-                    "base_url": bookstack_base_url,
-                    "page_url": page_url,
-                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                }
-
                if existing_document:
                    # Document exists - check if content has changed
                    if existing_document.content_hash == content_hash:
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
+                            existing_document.status = DocumentStatus.ready()
                        logger.info(
                            f"Document for BookStack page {page_name} unchanged. Skipping."
                        )
                        documents_skipped += 1
                        continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for BookStack page {page_name}. Updating document."
-                        )

-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            summary_metadata = {
-                                "page_name": page_name,
-                                "page_id": page_id,
-                                "book_id": book_id,
-                                "document_type": "BookStack Page",
-                                "connector_type": "BookStack",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                full_content, user_llm, summary_metadata
-                            )
-                        else:
-                            summary_content = (
-                                f"BookStack Page: {page_name}\n\nBook ID: {book_id}\n\n"
-                            )
-                            if page_content:
-                                content_preview = page_content[:1000]
-                                if len(page_content) > 1000:
-                                    content_preview += "..."
-                                summary_content += (
-                                    f"Content Preview: {content_preview}\n\n"
-                                )
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(full_content)
-
-                        # Update existing document
-                        existing_document.title = f"BookStack - {page_name}"
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = doc_metadata
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(f"Successfully updated BookStack page {page_name}")
-                        continue
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    pages_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "page_id": page_id,
+                            "page_name": page_name,
+                            "page_slug": page_slug,
+                            "book_id": book_id,
+                            "book_slug": book_slug,
+                            "chapter_id": chapter_id,
+                            "page_url": page_url,
+                            "page_content": page_content,
+                            "full_content": full_content,
+                            "content_hash": content_hash,
+                        }
+                    )
+                    continue

                # Document doesn't exist by unique_identifier_hash
                # Check if a document with the same content_hash exists (from another connector)
@ -345,17 +306,108 @@ async def index_bookstack_pages(
                    documents_skipped += 1
                    continue

-                # Document doesn't exist - create new one
-                # Generate summary with metadata
+                # Create new document with PENDING status (visible in UI immediately)
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=page_name,
+                    document_type=DocumentType.BOOKSTACK_CONNECTOR,
+                    document_metadata={
+                        "page_id": page_id,
+                        "page_name": page_name,
+                        "page_slug": page_slug,
+                        "book_id": book_id,
+                        "book_slug": book_slug,
+                        "chapter_id": chapter_id,
+                        "base_url": bookstack_base_url,
+                        "page_url": page_url,
+                        "connector_id": connector_id,
+                    },
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+                    unique_identifier_hash=unique_identifier_hash,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
+                    updated_at=get_current_timestamp(),
+                    created_by_id=user_id,
+                    connector_id=connector_id,
+                )
+                session.add(document)
+                new_documents_created = True
+
+                pages_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "page_id": page_id,
+                        "page_name": page_name,
+                        "page_slug": page_slug,
+                        "book_id": book_id,
+                        "book_slug": book_slug,
+                        "chapter_id": chapter_id,
+                        "page_url": page_url,
+                        "page_content": page_content,
+                        "full_content": full_content,
+                        "content_hash": content_hash,
+                    }
+                )
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(pages_to_process)} documents")
+
+        for item in pages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
                user_llm = await get_user_long_context_llm(
                    session, user_id, search_space_id
                )

+                # Build document metadata
+                doc_metadata = {
+                    "page_id": item["page_id"],
+                    "page_name": item["page_name"],
+                    "page_slug": item["page_slug"],
+                    "book_id": item["book_id"],
+                    "book_slug": item["book_slug"],
+                    "chapter_id": item["chapter_id"],
+                    "base_url": bookstack_base_url,
+                    "page_url": item["page_url"],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+
                if user_llm:
                    summary_metadata = {
-                        "page_name": page_name,
-                        "page_id": page_id,
-                        "book_id": book_id,
+                        "page_name": item["page_name"],
+                        "page_id": item["page_id"],
+                        "book_id": item["book_id"],
                        "document_type": "BookStack Page",
                        "connector_type": "BookStack",
                    }
@ -363,17 +415,15 @@ async def index_bookstack_pages(
                        summary_content,
                        summary_embedding,
                    ) = await generate_document_summary(
-                        full_content, user_llm, summary_metadata
+                        item["full_content"], user_llm, summary_metadata
                    )
                else:
                    # Fallback to simple summary if no LLM configured
-                    summary_content = (
-                        f"BookStack Page: {page_name}\n\nBook ID: {book_id}\n\n"
-                    )
-                    if page_content:
+                    summary_content = f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n"
+                    if item["page_content"]:
                        # Take first 1000 characters of content for summary
-                        content_preview = page_content[:1000]
-                        if len(page_content) > 1000:
+                        content_preview = item["page_content"][:1000]
+                        if len(item["page_content"]) > 1000:
                            content_preview += "..."
                        summary_content += f"Content Preview: {content_preview}\n\n"
                    summary_embedding = config.embedding_model_instance.embed(
@ -381,30 +431,21 @@ async def index_bookstack_pages(
                    )

                # Process chunks - using the full page content
-                chunks = await create_document_chunks(full_content)
+                chunks = await create_document_chunks(item["full_content"])

-                # Create and store new document
-                logger.info(f"Creating new document for page {page_name}")
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=f"BookStack - {page_name}",
-                    document_type=DocumentType.BOOKSTACK_CONNECTOR,
-                    document_metadata=doc_metadata,
-                    content=summary_content,
-                    content_hash=content_hash,
-                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                    updated_at=get_current_timestamp(),
-                    created_by_id=user_id,
-                    connector_id=connector_id,
-                )
+                # Update document to READY with actual content
+                document.title = item["page_name"]
+                document.content = summary_content
+                document.content_hash = item["content_hash"]
+                document.embedding = summary_embedding
+                document.document_metadata = doc_metadata
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()

-                session.add(document)
                documents_indexed += 1
-                logger.info(f"Successfully indexed new page {page_name}")

-                # Batch commit every 10 documents
+                # Batch commit every 10 documents (for ready status updates)
                if documents_indexed % 10 == 0:
                    logger.info(
                        f"Committing batch: {documents_indexed} BookStack pages processed so far"
@ -413,46 +454,76 @@ async def index_bookstack_pages(

            except Exception as e:
                logger.error(
-                    f"Error processing page {page.get('name', 'Unknown')}: {e!s}",
+                    f"Error processing page {item.get('page_name', 'Unknown')}: {e!s}",
                    exc_info=True,
                )
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                skipped_pages.append(
-                    f"{page.get('name', 'Unknown')} (processing error)"
+                    f"{item.get('page_name', 'Unknown')} (processing error)"
                )
-                documents_skipped += 1
-                continue  # Skip this page and continue with others
+                documents_failed += 1
+                continue

-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        await update_connector_last_indexed(session, connector, update_last_indexed)

        # Final commit for any remaining documents not yet committed in batches
        logger.info(
            f"Final commit: Total {documents_indexed} BookStack pages processed"
        )
-        await session.commit()
-        logger.info("Successfully committed all BookStack document changes to database")
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all BookStack document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same page was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None

        # Log success
        await task_logger.log_task_success(
            log_entry,
            f"Successfully completed BookStack indexing for connector {connector_id}",
            {
-                "pages_processed": total_processed,
+                "pages_processed": documents_indexed,
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                "skipped_pages_count": len(skipped_pages),
            },
        )

        logger.info(
-            f"BookStack indexing completed: {documents_indexed} new pages, {documents_skipped} skipped"
+            f"BookStack indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed"
        )
-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
+        return documents_indexed, warning_message

    except SQLAlchemyError as db_error:
        await session.rollback()
--- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
@ -1,5 +1,9 @@
 """
 ClickUp connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """

 import contextlib
@ -12,7 +16,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
 from app.connectors.clickup_history import ClickUpHistoryConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -28,6 +32,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -141,10 +146,18 @@ async def index_clickup_tasks(

        documents_indexed = 0
        documents_skipped = 0
+        documents_failed = 0

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
        last_heartbeat_time = time.time()

+        # =======================================================================
+        # PHASE 1: Collect all tasks and create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        tasks_to_process = []  # List of dicts with document and task data
+        new_documents_created = False
+
        # Iterate workspaces and fetch tasks
        for workspace in workspaces:
            workspace_id = workspace.get("id")
@ -183,15 +196,6 @@ async def index_clickup_tasks(
            )

            for task in tasks:
-                # Check if it's time for a heartbeat update
-                if (
-                    on_heartbeat_callback
-                    and (time.time() - last_heartbeat_time)
-                    >= HEARTBEAT_INTERVAL_SECONDS
-                ):
-                    await on_heartbeat_callback(documents_indexed)
-                    last_heartbeat_time = time.time()
-
                try:
                    task_id = task.get("id")
                    task_name = task.get("name", "Untitled Task")
@ -255,73 +259,38 @@ async def index_clickup_tasks(
                    if existing_document:
                        # Document exists - check if content has changed
                        if existing_document.content_hash == content_hash:
+                            # Ensure status is ready (might have been stuck in processing/pending)
+                            if not DocumentStatus.is_state(
+                                existing_document.status, DocumentStatus.READY
+                            ):
+                                existing_document.status = DocumentStatus.ready()
                            logger.info(
                                f"Document for ClickUp task {task_name} unchanged. Skipping."
                            )
                            documents_skipped += 1
                            continue
                        else:
-                            # Content has changed - update the existing document
+                            # Queue existing document for update (will be set to processing in Phase 2)
                            logger.info(
-                                f"Content changed for ClickUp task {task_name}. Updating document."
+                                f"Content changed for ClickUp task {task_name}. Queuing for update."
                            )
-
-                            # Generate summary with metadata
-                            user_llm = await get_user_long_context_llm(
-                                session, user_id, search_space_id
-                            )
-
-                            if user_llm:
-                                document_metadata = {
+                            tasks_to_process.append(
+                                {
+                                    "document": existing_document,
+                                    "is_new": False,
+                                    "task_content": task_content,
+                                    "content_hash": content_hash,
                                    "task_id": task_id,
                                    "task_name": task_name,
                                    "task_status": task_status,
                                    "task_priority": task_priority,
-                                    "task_list": task_list_name,
-                                    "task_space": task_space_name,
-                                    "assignees": len(task_assignees),
-                                    "document_type": "ClickUp Task",
-                                    "connector_type": "ClickUp",
+                                    "task_list_name": task_list_name,
+                                    "task_space_name": task_space_name,
+                                    "task_assignees": task_assignees,
+                                    "task_due_date": task_due_date,
+                                    "task_created": task_created,
+                                    "task_updated": task_updated,
                                }
-                                (
-                                    summary_content,
-                                    summary_embedding,
-                                ) = await generate_document_summary(
-                                    task_content, user_llm, document_metadata
-                                )
-                            else:
-                                summary_content = task_content
-                                summary_embedding = (
-                                    config.embedding_model_instance.embed(task_content)
-                                )
-
-                            # Process chunks
-                            chunks = await create_document_chunks(task_content)
-
-                            # Update existing document
-                            existing_document.title = f"Task - {task_name}"
-                            existing_document.content = summary_content
-                            existing_document.content_hash = content_hash
-                            existing_document.embedding = summary_embedding
-                            existing_document.document_metadata = {
-                                "task_id": task_id,
-                                "task_name": task_name,
-                                "task_status": task_status,
-                                "task_priority": task_priority,
-                                "task_assignees": task_assignees,
-                                "task_due_date": task_due_date,
-                                "task_created": task_created,
-                                "task_updated": task_updated,
-                                "indexed_at": datetime.now().strftime(
-                                    "%Y-%m-%d %H:%M:%S"
-                                ),
-                            }
-                            existing_document.chunks = chunks
-                            existing_document.updated_at = get_current_timestamp()
-
-                            documents_indexed += 1
-                            logger.info(
-                                f"Successfully updated ClickUp task {task_name}"
                            )
                            continue

@ -341,42 +310,10 @@ async def index_clickup_tasks(
                        documents_skipped += 1
                        continue

-                    # Document doesn't exist - create new one
-                    # Generate summary with metadata
-                    user_llm = await get_user_long_context_llm(
-                        session, user_id, search_space_id
-                    )
-
-                    if user_llm:
-                        document_metadata = {
-                            "task_id": task_id,
-                            "task_name": task_name,
-                            "task_status": task_status,
-                            "task_priority": task_priority,
-                            "task_list": task_list_name,
-                            "task_space": task_space_name,
-                            "assignees": len(task_assignees),
-                            "document_type": "ClickUp Task",
-                            "connector_type": "ClickUp",
-                        }
-                        (
-                            summary_content,
-                            summary_embedding,
-                        ) = await generate_document_summary(
-                            task_content, user_llm, document_metadata
-                        )
-                    else:
-                        # Fallback to simple summary if no LLM configured
-                        summary_content = task_content
-                        summary_embedding = config.embedding_model_instance.embed(
-                            task_content
-                        )
-
-                    chunks = await create_document_chunks(task_content)
-
+                    # Create new document with PENDING status (visible in UI immediately)
                    document = Document(
                        search_space_id=search_space_id,
-                        title=f"Task - {task_name}",
+                        title=task_name,
                        document_type=DocumentType.CLICKUP_CONNECTOR,
                        document_metadata={
                            "task_id": task_id,
@ -387,44 +324,180 @@ async def index_clickup_tasks(
                            "task_due_date": task_due_date,
                            "task_created": task_created,
                            "task_updated": task_updated,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                            "connector_id": connector_id,
                        },
-                        content=summary_content,
-                        content_hash=content_hash,
+                        content="Pending...",  # Placeholder until processed
+                        content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                        unique_identifier_hash=unique_identifier_hash,
-                        embedding=summary_embedding,
-                        chunks=chunks,
+                        embedding=None,
+                        chunks=[],  # Empty at creation - safe for async
+                        status=DocumentStatus.pending(),  # Pending until processing starts
                        updated_at=get_current_timestamp(),
                        created_by_id=user_id,
                        connector_id=connector_id,
                    )
-
                    session.add(document)
-                    documents_indexed += 1
-                    logger.info(f"Successfully indexed new task {task_name}")
+                    new_documents_created = True

-                    # Batch commit every 10 documents
-                    if documents_indexed % 10 == 0:
-                        logger.info(
-                            f"Committing batch: {documents_indexed} ClickUp tasks processed so far"
-                        )
-                        await session.commit()
+                    tasks_to_process.append(
+                        {
+                            "document": document,
+                            "is_new": True,
+                            "task_content": task_content,
+                            "content_hash": content_hash,
+                            "task_id": task_id,
+                            "task_name": task_name,
+                            "task_status": task_status,
+                            "task_priority": task_priority,
+                            "task_list_name": task_list_name,
+                            "task_space_name": task_space_name,
+                            "task_assignees": task_assignees,
+                            "task_due_date": task_due_date,
+                            "task_created": task_created,
+                            "task_updated": task_updated,
+                        }
+                    )

                except Exception as e:
                    logger.error(
-                        f"Error processing task {task.get('name', 'Unknown')}: {e!s}",
+                        f"Error in Phase 1 for task {task.get('name', 'Unknown')}: {e!s}",
                        exc_info=True,
                    )
-                    documents_skipped += 1
+                    documents_failed += 1
+                    continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([t for t in tasks_to_process if t['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(tasks_to_process)} documents")
+
+        for item in tasks_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "task_id": item["task_id"],
+                        "task_name": item["task_name"],
+                        "task_status": item["task_status"],
+                        "task_priority": item["task_priority"],
+                        "task_list": item["task_list_name"],
+                        "task_space": item["task_space_name"],
+                        "assignees": len(item["task_assignees"]),
+                        "document_type": "ClickUp Task",
+                        "connector_type": "ClickUp",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["task_content"], user_llm, document_metadata_for_summary
+                    )
+                else:
+                    summary_content = item["task_content"]
+                    summary_embedding = config.embedding_model_instance.embed(
+                        item["task_content"]
+                    )
+
+                chunks = await create_document_chunks(item["task_content"])
+
+                # Update document to READY with actual content
+                document.title = item["task_name"]
+                document.content = summary_content
+                document.content_hash = item["content_hash"]
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "task_id": item["task_id"],
+                    "task_name": item["task_name"],
+                    "task_status": item["task_status"],
+                    "task_priority": item["task_priority"],
+                    "task_assignees": item["task_assignees"],
+                    "task_due_date": item["task_due_date"],
+                    "task_created": item["task_created"],
+                    "task_updated": item["task_updated"],
+                    "connector_id": connector_id,
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
+                if documents_indexed % 10 == 0:
+                    logger.info(
+                        f"Committing batch: {documents_indexed} ClickUp tasks processed so far"
+                    )
+                    await session.commit()
+
+            except Exception as e:
+                logger.error(
+                    f"Error processing task {item.get('task_name', 'Unknown')}: {e!s}",
+                    exc_info=True,
+                )
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
+                documents_failed += 1
+                continue

        total_processed = documents_indexed

-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        await update_connector_last_indexed(session, connector, update_last_indexed)

        # Final commit for any remaining documents not yet committed in batches
        logger.info(f"Final commit: Total {documents_indexed} ClickUp tasks processed")
-        await session.commit()
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all ClickUp document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same task was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise

        await task_logger.log_task_success(
            log_entry,
@ -433,11 +506,12 @@ async def index_clickup_tasks(
                "pages_processed": total_processed,
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
            },
        )

        logger.info(
-            f"clickup indexing completed: {documents_indexed} new tasks, {documents_skipped} skipped"
+            f"clickup indexing completed: {documents_indexed} ready, {documents_skipped} skipped, {documents_failed} failed"
        )

        # Close client connection
--- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
@ -1,5 +1,9 @@
 """
 Confluence connector indexer.
+
+Provides real-time document status updates during indexing using a two-phase approach:
+- Phase 1: Create all documents with PENDING status (visible in UI immediately)
+- Phase 2: Process each document one by one (PENDING → PROCESSING → READY/FAILED)
 """

 import contextlib
@ -12,7 +16,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
 from app.connectors.confluence_history import ConfluenceHistoryConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -29,6 +33,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -180,22 +185,22 @@ async def index_confluence_pages(
                    await confluence_client.close()
            return 0, f"Error fetching Confluence pages: {e!s}"

-        # Process and index each page
+        # =======================================================================
+        # PHASE 1: Analyze all pages, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
        documents_indexed = 0
-        skipped_pages = []
        documents_skipped = 0
+        documents_failed = 0
+        duplicate_content_count = 0

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
        last_heartbeat_time = time.time()

+        pages_to_process = []  # List of dicts with document and page data
+        new_documents_created = False
+
        for page in pages:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
            try:
                page_id = page.get("id")
                page_title = page.get("title", "")
@ -205,7 +210,6 @@ async def index_confluence_pages(
                    logger.warning(
                        f"Skipping page with missing ID or title: {page_id or 'Unknown'}"
                    )
-                    skipped_pages.append(f"{page_title or 'Unknown'} (missing data)")
                    documents_skipped += 1
                    continue

@ -236,7 +240,6 @@ async def index_confluence_pages(

                if not full_content.strip():
                    logger.warning(f"Skipping page with no content: {page_title}")
-                    skipped_pages.append(f"{page_title} (no content)")
                    documents_skipped += 1
                    continue

@ -258,74 +261,29 @@ async def index_confluence_pages(
                if existing_document:
                    # Document exists - check if content has changed
                    if existing_document.content_hash == content_hash:
-                        logger.info(
-                            f"Document for Confluence page {page_title} unchanged. Skipping."
-                        )
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
+                            existing_document.status = DocumentStatus.ready()
                        documents_skipped += 1
                        continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Confluence page {page_title}. Updating document."
-                        )

-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "page_title": page_title,
-                                "page_id": page_id,
-                                "space_id": space_id,
-                                "comment_count": comment_count,
-                                "document_type": "Confluence Page",
-                                "connector_type": "Confluence",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                full_content, user_llm, document_metadata
-                            )
-                        else:
-                            summary_content = f"Confluence Page: {page_title}\n\nSpace ID: {space_id}\n\n"
-                            if page_content:
-                                content_preview = page_content[:1000]
-                                if len(page_content) > 1000:
-                                    content_preview += "..."
-                                summary_content += (
-                                    f"Content Preview: {content_preview}\n\n"
-                                )
-                            summary_content += f"Comments: {comment_count}"
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(full_content)
-
-                        # Update existing document
-                        existing_document.title = f"Confluence - {page_title}"
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    pages_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "full_content": full_content,
+                            "page_content": page_content,
+                            "content_hash": content_hash,
                            "page_id": page_id,
                            "page_title": page_title,
                            "space_id": space_id,
                            "comment_count": comment_count,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(
-                            f"Successfully updated Confluence page {page_title}"
-                        )
-                        continue
+                    )
+                    continue

                # Document doesn't exist by unique_identifier_hash
                # Check if a document with the same content_hash exists (from another connector)
@ -340,21 +298,92 @@ async def index_confluence_pages(
                        f"(existing document ID: {duplicate_by_content.id}, "
                        f"type: {duplicate_by_content.document_type}). Skipping."
                    )
+                    duplicate_content_count += 1
                    documents_skipped += 1
                    continue

-                # Document doesn't exist - create new one
-                # Generate summary with metadata
+                # Create new document with PENDING status (visible in UI immediately)
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=page_title,
+                    document_type=DocumentType.CONFLUENCE_CONNECTOR,
+                    document_metadata={
+                        "page_id": page_id,
+                        "page_title": page_title,
+                        "space_id": space_id,
+                        "comment_count": comment_count,
+                        "connector_id": connector_id,
+                    },
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+                    unique_identifier_hash=unique_identifier_hash,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
+                    updated_at=get_current_timestamp(),
+                    created_by_id=user_id,
+                    connector_id=connector_id,
+                )
+                session.add(document)
+                new_documents_created = True
+
+                pages_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "full_content": full_content,
+                        "page_content": page_content,
+                        "content_hash": content_hash,
+                        "page_id": page_id,
+                        "page_title": page_title,
+                        "space_id": space_id,
+                        "comment_count": comment_count,
+                    }
+                )
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(pages_to_process)} documents")
+
+        for item in pages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
                user_llm = await get_user_long_context_llm(
                    session, user_id, search_space_id
                )

                if user_llm:
                    document_metadata = {
-                        "page_title": page_title,
-                        "page_id": page_id,
-                        "space_id": space_id,
-                        "comment_count": comment_count,
+                        "page_title": item["page_title"],
+                        "page_id": item["page_id"],
+                        "space_id": item["space_id"],
+                        "comment_count": item["comment_count"],
                        "document_type": "Confluence Page",
                        "connector_type": "Confluence",
                    }
@ -362,55 +391,45 @@ async def index_confluence_pages(
                        summary_content,
                        summary_embedding,
                    ) = await generate_document_summary(
-                        full_content, user_llm, document_metadata
+                        item["full_content"], user_llm, document_metadata
                    )
                else:
                    # Fallback to simple summary if no LLM configured
-                    summary_content = (
-                        f"Confluence Page: {page_title}\n\nSpace ID: {space_id}\n\n"
-                    )
-                    if page_content:
-                        # Take first 500 characters of content for summary
-                        content_preview = page_content[:1000]
-                        if len(page_content) > 1000:
+                    summary_content = f"Confluence Page: {item['page_title']}\n\nSpace ID: {item['space_id']}\n\n"
+                    if item["page_content"]:
+                        # Take first 1000 characters of content for summary
+                        content_preview = item["page_content"][:1000]
+                        if len(item["page_content"]) > 1000:
                            content_preview += "..."
                        summary_content += f"Content Preview: {content_preview}\n\n"
-                    summary_content += f"Comments: {comment_count}"
+                    summary_content += f"Comments: {item['comment_count']}"
                    summary_embedding = config.embedding_model_instance.embed(
                        summary_content
                    )

                # Process chunks - using the full page content with comments
-                chunks = await create_document_chunks(full_content)
+                chunks = await create_document_chunks(item["full_content"])

-                # Create and store new document
-                logger.info(f"Creating new document for page {page_title}")
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=f"Confluence - {page_title}",
-                    document_type=DocumentType.CONFLUENCE_CONNECTOR,
-                    document_metadata={
-                        "page_id": page_id,
-                        "page_title": page_title,
-                        "space_id": space_id,
-                        "comment_count": comment_count,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                    },
-                    content=summary_content,
-                    content_hash=content_hash,
-                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                    updated_at=get_current_timestamp(),
-                    created_by_id=user_id,
-                    connector_id=connector_id,
-                )
+                # Update document to READY with actual content
+                document.title = item["page_title"]
+                document.content = summary_content
+                document.content_hash = item["content_hash"]
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "page_id": item["page_id"],
+                    "page_title": item["page_title"],
+                    "space_id": item["space_id"],
+                    "comment_count": item["comment_count"],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()

-                session.add(document)
                documents_indexed += 1
-                logger.info(f"Successfully indexed new page {page_title}")

-                # Batch commit every 10 documents
+                # Batch commit every 10 documents (for ready status updates)
                if documents_indexed % 10 == 0:
                    logger.info(
                        f"Committing batch: {documents_indexed} Confluence pages processed so far"
@ -419,53 +438,80 @@ async def index_confluence_pages(

            except Exception as e:
                logger.error(
-                    f"Error processing page {page.get('title', 'Unknown')}: {e!s}",
+                    f"Error processing page {item.get('page_title', 'Unknown')}: {e!s}",
                    exc_info=True,
                )
-                skipped_pages.append(
-                    f"{page.get('title', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
+                documents_failed += 1
                continue  # Skip this page and continue with others

-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        await update_connector_last_indexed(session, connector, update_last_indexed)

-        # Final commit for any remaining documents not yet committed in batches
+        # Final commit to ensure all documents are persisted (safety net)
        logger.info(
            f"Final commit: Total {documents_indexed} Confluence pages processed"
        )
-        await session.commit()
-        logger.info(
-            "Successfully committed all Confluence document changes to database"
-        )
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Confluence document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same page was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None

        # Log success
        await task_logger.log_task_success(
            log_entry,
            f"Successfully completed Confluence indexing for connector {connector_id}",
            {
-                "pages_processed": total_processed,
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
-                "skipped_pages_count": len(skipped_pages),
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
            },
        )

        logger.info(
-            f"Confluence indexing completed: {documents_indexed} new pages, {documents_skipped} skipped"
+            f"Confluence indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
        )

        # Close the client connection
        if confluence_client:
            await confluence_client.close()

-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
+        return documents_indexed, warning_message

    except SQLAlchemyError as db_error:
        await session.rollback()
--- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
@ -1,5 +1,9 @@
 """
 Discord connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """

 import asyncio
@ -12,7 +16,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
 from app.connectors.discord_connector import DiscordConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
@ -27,6 +31,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -48,7 +53,11 @@ async def index_discord_messages(
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
-    Index Discord messages from all accessible channels.
+    Index Discord messages from the configured guild's channels.
+
+    Implements 2-phase document status updates for real-time UI feedback:
+    - Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+    - Phase 2: Process each document: pending → processing → ready/failed

    Args:
        session: Database session
@ -113,6 +122,37 @@ async def index_discord_messages(

        logger.info(f"Starting Discord indexing for connector {connector_id}")

+        # =======================================================================
+        # GUILD FILTERING: Only index the specific guild configured for this connector
+        # =======================================================================
+        # Extract guild_id from connector config (set during OAuth flow)
+        configured_guild_id = connector.config.get("guild_id")
+        configured_guild_name = connector.config.get("guild_name")
+
+        # Legacy connector check - if no guild_id, we need to warn and handle gracefully
+        is_legacy_connector = configured_guild_id is None
+
+        if is_legacy_connector:
+            logger.warning(
+                f"Discord connector {connector_id} has no guild_id configured. "
+                "This is a legacy connector. Please reconnect the Discord server to fix this. "
+                "For now, indexing will be skipped to prevent indexing unwanted servers."
+            )
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Legacy Discord connector {connector_id} missing guild_id",
+                "No guild_id configured. Please reconnect this Discord server.",
+                {"error_type": "MissingGuildId", "is_legacy": True},
+            )
+            return (
+                0,
+                "This Discord connector needs to be reconnected. Please disconnect and reconnect your Discord server to enable indexing.",
+            )
+
+        logger.info(
+            f"Configured to index guild: {configured_guild_name} ({configured_guild_id})"
+        )
+
        # Initialize Discord client with OAuth credentials support
        await task_logger.log_task_progress(
            log_entry,
@ -255,77 +295,66 @@ async def index_discord_messages(
        try:
            await task_logger.log_task_progress(
                log_entry,
-                f"Starting Discord bot and fetching guilds for connector {connector_id}",
-                {"stage": "fetch_guilds"},
+                f"Starting Discord bot for connector {connector_id}",
+                {"stage": "bot_initialization"},
            )

-            logger.info("Starting Discord bot to fetch guilds")
+            logger.info("Starting Discord bot")
            discord_client._bot_task = asyncio.create_task(discord_client.start_bot())
            await discord_client._wait_until_ready()

-            logger.info("Fetching Discord guilds")
-            guilds = await discord_client.get_guilds()
-            logger.info(f"Found {len(guilds)} guilds")
+            # We only process the configured guild, not all guilds
+            logger.info(
+                f"Processing configured guild only: {configured_guild_name} ({configured_guild_id})"
+            )
+
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
-                f"Failed to get Discord guilds for connector {connector_id}",
+                f"Failed to start Discord bot for connector {connector_id}",
                str(e),
-                {"error_type": "GuildFetchError"},
+                {"error_type": "BotStartError"},
            )
-            logger.error(f"Failed to get Discord guilds: {e!s}", exc_info=True)
+            logger.error(f"Failed to start Discord bot: {e!s}", exc_info=True)
            await discord_client.close_bot()
-            return 0, f"Failed to get Discord guilds: {e!s}"
-
-        if not guilds:
-            await task_logger.log_task_success(
-                log_entry,
-                f"No Discord guilds found for connector {connector_id}",
-                {"guilds_found": 0},
-            )
-            logger.info("No Discord guilds found to index")
-            await discord_client.close_bot()
-            return 0, "No Discord guilds found"
+            return 0, f"Failed to start Discord bot: {e!s}"

        # Track results
        documents_indexed = 0
        documents_skipped = 0
+        documents_failed = 0
+        duplicate_content_count = 0
        skipped_channels: list[str] = []

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
        last_heartbeat_time = time.time()

-        # Process each guild and channel
+        # Use the configured guild info
+        guild_id = configured_guild_id
+        guild_name = configured_guild_name or "Unknown Guild"
+
        await task_logger.log_task_progress(
            log_entry,
-            f"Starting to process {len(guilds)} Discord guilds",
-            {"stage": "process_guilds", "total_guilds": len(guilds)},
+            f"Processing Discord guild: {guild_name}",
+            {"stage": "process_guild", "guild_id": guild_id, "guild_name": guild_name},
        )

+        # =======================================================================
+        # PHASE 1: Collect all messages and create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        messages_to_process = []  # List of dicts with document and message data
+        new_documents_created = False
+
        try:
-            for guild in guilds:
-                # Check if it's time for a heartbeat update
-                if (
-                    on_heartbeat_callback
-                    and (time.time() - last_heartbeat_time)
-                    >= HEARTBEAT_INTERVAL_SECONDS
-                ):
-                    await on_heartbeat_callback(documents_indexed)
-                    last_heartbeat_time = time.time()
-                guild_id = guild["id"]
-                guild_name = guild["name"]
-                logger.info(f"Processing guild: {guild_name} ({guild_id})")
-
-                try:
-                    channels = await discord_client.get_text_channels(guild_id)
-                    if not channels:
-                        logger.info(
-                            f"No channels found in guild {guild_name}. Skipping."
-                        )
-                        skipped_channels.append(f"{guild_name} (no channels)")
-                        documents_skipped += 1
-                        continue
+            logger.info(f"Processing guild: {guild_name} ({guild_id})")

+            try:
+                channels = await discord_client.get_text_channels(guild_id)
+                if not channels:
+                    logger.info(f"No channels found in guild {guild_name}. Skipping.")
+                    skipped_channels.append(f"{guild_name} (no channels)")
+                else:
                    for channel in channels:
                        channel_id = channel["id"]
                        channel_name = channel["name"]
@ -343,14 +372,12 @@ async def index_discord_messages(
                            skipped_channels.append(
                                f"{guild_name}#{channel_name} (fetch error)"
                            )
-                            documents_skipped += 1
                            continue

                        if not messages:
                            logger.info(
                                f"No messages found in channel {channel_name} for the specified date range."
                            )
-                            documents_skipped += 1
                            continue

                        # Filter/format messages
@ -365,7 +392,6 @@ async def index_discord_messages(
                            logger.info(
                                f"No valid messages found in channel {channel_name} after filtering."
                            )
-                            documents_skipped += 1
                            continue

                        # Process each message as an individual document (like Slack)
@ -427,32 +453,23 @@ async def index_discord_messages(
                            if existing_document:
                                # Document exists - check if content has changed
                                if existing_document.content_hash == content_hash:
-                                    logger.info(
-                                        f"Document for Discord message {msg_id} in {guild_name}#{channel_name} unchanged. Skipping."
-                                    )
+                                    # Ensure status is ready (might have been stuck in processing/pending)
+                                    if not DocumentStatus.is_state(
+                                        existing_document.status, DocumentStatus.READY
+                                    ):
+                                        existing_document.status = (
+                                            DocumentStatus.ready()
+                                        )
                                    documents_skipped += 1
                                    continue
-                                else:
-                                    # Content has changed - update the existing document
-                                    logger.info(
-                                        f"Content changed for Discord message {msg_id} in {guild_name}#{channel_name}. Updating document."
-                                    )

-                                    # Update chunks and embedding
-                                    chunks = await create_document_chunks(
-                                        combined_document_string
-                                    )
-                                    doc_embedding = (
-                                        config.embedding_model_instance.embed(
-                                            combined_document_string
-                                        )
-                                    )
-
-                                    # Update existing document
-                                    existing_document.content = combined_document_string
-                                    existing_document.content_hash = content_hash
-                                    existing_document.embedding = doc_embedding
-                                    existing_document.document_metadata = {
+                                # Queue existing document for update (will be set to processing in Phase 2)
+                                messages_to_process.append(
+                                    {
+                                        "document": existing_document,
+                                        "is_new": False,
+                                        "combined_document_string": combined_document_string,
+                                        "content_hash": content_hash,
                                        "guild_name": guild_name,
                                        "guild_id": guild_id,
                                        "channel_name": channel_name,
@ -460,22 +477,9 @@ async def index_discord_messages(
                                        "message_id": msg_id,
                                        "message_timestamp": msg_timestamp,
                                        "message_user_name": msg_user_name,
-                                        "indexed_at": datetime.now(UTC).strftime(
-                                            "%Y-%m-%d %H:%M:%S"
-                                        ),
                                    }
-
-                                    # Delete old chunks and add new ones
-                                    existing_document.chunks = chunks
-                                    existing_document.updated_at = (
-                                        get_current_timestamp()
-                                    )
-
-                                    documents_indexed += 1
-                                    logger.info(
-                                        f"Successfully updated Discord message {msg_id}"
-                                    )
-                                    continue
+                                )
+                                continue

                            # Document doesn't exist by unique_identifier_hash
                            # Check if a document with the same content_hash exists (from another connector)
@ -492,22 +496,14 @@ async def index_discord_messages(
                                    f"(existing document ID: {duplicate_by_content.id}, "
                                    f"type: {duplicate_by_content.document_type}). Skipping."
                                )
+                                duplicate_content_count += 1
                                documents_skipped += 1
                                continue

-                            # Document doesn't exist - create new one
-                            # Process chunks
-                            chunks = await create_document_chunks(
-                                combined_document_string
-                            )
-                            doc_embedding = config.embedding_model_instance.embed(
-                                combined_document_string
-                            )
-
-                            # Create and store new document
+                            # Create new document with PENDING status (visible in UI immediately)
                            document = Document(
                                search_space_id=search_space_id,
-                                title=f"Discord - {guild_name}#{channel_name}",
+                                title=f"{guild_name}#{channel_name}",
                                document_type=DocumentType.DISCORD_CONNECTOR,
                                document_metadata={
                                    "guild_name": guild_name,
@ -515,87 +511,177 @@ async def index_discord_messages(
                                    "channel_name": channel_name,
                                    "channel_id": channel_id,
                                    "message_id": msg_id,
-                                    "message_timestamp": msg_timestamp,
-                                    "message_user_name": msg_user_name,
-                                    "indexed_at": datetime.now(UTC).strftime(
-                                        "%Y-%m-%d %H:%M:%S"
-                                    ),
+                                    "connector_id": connector_id,
                                },
-                                content=combined_document_string,
-                                embedding=doc_embedding,
-                                chunks=chunks,
-                                content_hash=content_hash,
+                                content="Pending...",  # Placeholder until processed
+                                content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                                unique_identifier_hash=unique_identifier_hash,
+                                embedding=None,
+                                chunks=[],  # Empty at creation - safe for async
+                                status=DocumentStatus.pending(),  # Pending until processing starts
                                updated_at=get_current_timestamp(),
                                created_by_id=user_id,
                                connector_id=connector_id,
                            )
-
                            session.add(document)
-                            documents_indexed += 1
+                            new_documents_created = True

-                            # Batch commit every 10 documents
-                            if documents_indexed % 10 == 0:
-                                logger.info(
-                                    f"Committing batch: {documents_indexed} Discord messages processed so far"
-                                )
-                                await session.commit()
+                            messages_to_process.append(
+                                {
+                                    "document": document,
+                                    "is_new": True,
+                                    "combined_document_string": combined_document_string,
+                                    "content_hash": content_hash,
+                                    "guild_name": guild_name,
+                                    "guild_id": guild_id,
+                                    "channel_name": channel_name,
+                                    "channel_id": channel_id,
+                                    "message_id": msg_id,
+                                    "message_timestamp": msg_timestamp,
+                                    "message_user_name": msg_user_name,
+                                }
+                            )

-                        logger.info(
-                            f"Successfully indexed channel {guild_name}#{channel_name} with {len(formatted_messages)} messages"
-                        )
+            except Exception as e:
+                logger.error(
+                    f"Error processing guild {guild_name}: {e!s}", exc_info=True
+                )
+                skipped_channels.append(f"{guild_name} (processing error)")

-                except Exception as e:
-                    logger.error(
-                        f"Error processing guild {guild_name}: {e!s}", exc_info=True
-                    )
-                    skipped_channels.append(f"{guild_name} (processing error)")
-                    documents_skipped += 1
-                    continue
        finally:
            await discord_client.close_bot()

-        # Update last_indexed_at only if we indexed at least one
-        if documents_indexed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(messages_to_process)} documents")
+
+        for item in messages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (embeddings, chunks)
+                chunks = await create_document_chunks(item["combined_document_string"])
+                doc_embedding = config.embedding_model_instance.embed(
+                    item["combined_document_string"]
+                )
+
+                # Update document to READY with actual content
+                document.title = f"{item['guild_name']}#{item['channel_name']}"
+                document.content = item["combined_document_string"]
+                document.content_hash = item["content_hash"]
+                document.embedding = doc_embedding
+                document.document_metadata = {
+                    "guild_name": item["guild_name"],
+                    "guild_id": item["guild_id"],
+                    "channel_name": item["channel_name"],
+                    "channel_id": item["channel_id"],
+                    "message_id": item["message_id"],
+                    "message_timestamp": item["message_timestamp"],
+                    "message_user_name": item["message_user_name"],
+                    "indexed_at": datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
+                if documents_indexed % 10 == 0:
+                    logger.info(
+                        f"Committing batch: {documents_indexed} Discord messages processed so far"
+                    )
+                    await session.commit()
+
+            except Exception as e:
+                logger.error(f"Error processing Discord message: {e!s}", exc_info=True)
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
+                documents_failed += 1
+                continue
+
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)

        # Final commit for any remaining documents not yet committed in batches
        logger.info(
            f"Final commit: Total {documents_indexed} Discord messages processed"
        )
-        await session.commit()
-
-        # Prepare result message
-        result_message = None
-        if skipped_channels:
-            result_message = (
-                f"Processed {documents_indexed} messages. Skipped {len(skipped_channels)} channels: "
-                + ", ".join(skipped_channels)
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Discord document changes to database"
            )
-        else:
-            result_message = f"Processed {documents_indexed} messages."
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        if skipped_channels:
+            warning_parts.append(f"{len(skipped_channels)} channels skipped")
+        warning_message = ", ".join(warning_parts) if warning_parts else None

        # Log success
        await task_logger.log_task_success(
            log_entry,
            f"Successfully completed Discord indexing for connector {connector_id}",
            {
-                "messages_processed": documents_indexed,
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
                "skipped_channels_count": len(skipped_channels),
-                "guilds_processed": len(guilds),
-                "result_message": result_message,
+                "guild_id": guild_id,
+                "guild_name": guild_name,
            },
        )

        logger.info(
-            f"Discord indexing completed: {documents_indexed} new messages, {documents_skipped} skipped"
+            f"Discord indexing completed for guild {guild_name}: {documents_indexed} ready, {documents_skipped} skipped, "
+            f"{documents_failed} failed ({duplicate_content_count} duplicate content)"
        )
-        return (
-            documents_indexed,
-            None,
-        )  # Return None on success (result_message is for logging only)
+        return documents_indexed, warning_message

    except SQLAlchemyError as db_error:
        await session.rollback()
--- a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py
@ -1,5 +1,9 @@
 """
 Elasticsearch indexer for SurfSense
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Collect all documents and create pending documents (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """

 import json
@ -13,7 +17,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select

 from app.connectors.elasticsearch_connector import ElasticsearchConnector
-from app.db import Document, DocumentType, SearchSourceConnector
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnector
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
@ -25,6 +29,7 @@ from .base import (
    check_document_by_unique_identifier,
    check_duplicate_document_by_hash,
    get_current_timestamp,
+    safe_set_chunks,
 )

 # Type hint for heartbeat callback
@ -164,6 +169,8 @@ async def index_elasticsearch_documents(
        )

        documents_processed = 0
+        documents_skipped = 0
+        documents_failed = 0

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
        last_heartbeat_time = time.time()
@ -178,23 +185,22 @@ async def index_elasticsearch_documents(
                    "max_documents": max_documents,
                },
            )
-            # Use scroll search for large result sets
+
+            # =======================================================================
+            # PHASE 1: Collect all documents from Elasticsearch and create pending documents
+            # This makes ALL documents visible in the UI immediately with pending status
+            # =======================================================================
+            docs_to_process = []  # List of dicts with document and ES data
+            new_documents_created = False
+            hits_collected = 0
+
            async for hit in es_connector.scroll_search(
                index=index_name,
                query=query,
                size=min(max_documents, 100),  # Scroll in batches
                fields=config.get("ELASTICSEARCH_FIELDS"),
            ):
-                # Check if it's time for a heartbeat update
-                if (
-                    on_heartbeat_callback
-                    and (time.time() - last_heartbeat_time)
-                    >= HEARTBEAT_INTERVAL_SECONDS
-                ):
-                    await on_heartbeat_callback(documents_processed)
-                    last_heartbeat_time = time.time()
-
-                if documents_processed >= max_documents:
+                if hits_collected >= max_documents:
                    break

                try:
@ -220,26 +226,12 @@ async def index_elasticsearch_documents(

                    if not content.strip():
                        logger.warning(f"Skipping document {doc_id} - no content found")
+                        documents_skipped += 1
                        continue

                    # Create content hash
                    content_hash = generate_content_hash(content, search_space_id)

-                    # Build metadata
-                    metadata = {
-                        "elasticsearch_id": doc_id,
-                        "elasticsearch_index": hit.get("_index", index_name),
-                        "elasticsearch_score": hit.get("_score"),
-                        "indexed_at": datetime.now().isoformat(),
-                        "source": "ELASTICSEARCH_CONNECTOR",
-                    }
-
-                    # Add any additional metadata fields specified in config
-                    if "ELASTICSEARCH_METADATA_FIELDS" in config:
-                        for field in config["ELASTICSEARCH_METADATA_FIELDS"]:
-                            if field in source:
-                                metadata[f"es_{field}"] = source[field]
-
                    # Build source-unique identifier and hash (prefer source id dedupe)
                    source_identifier = f"{hit.get('_index', index_name)}:{doc_id}"
                    unique_identifier_hash = generate_unique_identifier_hash(
@ -258,98 +250,223 @@ async def index_elasticsearch_documents(
                        )

                    if existing_doc:
-                        # If content is unchanged, skip. Otherwise update the existing document.
+                        # If content is unchanged, skip. Otherwise queue for update.
                        if existing_doc.content_hash == content_hash:
+                            # Ensure status is ready (might have been stuck in processing/pending)
+                            if not DocumentStatus.is_state(
+                                existing_doc.status, DocumentStatus.READY
+                            ):
+                                existing_doc.status = DocumentStatus.ready()
                            logger.info(
                                f"Skipping ES doc {doc_id} — already indexed (doc id {existing_doc.id})"
                            )
-                            continue
-                        else:
-                            logger.info(
-                                f"Updating existing document {existing_doc.id} for ES doc {doc_id}"
-                            )
-                            existing_doc.title = title
-                            existing_doc.content = content
-                            existing_doc.content_hash = content_hash
-                            existing_doc.document_metadata = metadata
-                            existing_doc.unique_identifier_hash = unique_identifier_hash
-                            chunks = await create_document_chunks(content)
-                            existing_doc.chunks = chunks
-                            existing_doc.updated_at = get_current_timestamp()
-                            await session.flush()
-                            documents_processed += 1
-                            if documents_processed % 10 == 0:
-                                await session.commit()
+                            documents_skipped += 1
                            continue

-                    # Create document
+                        # Queue existing document for update (will be set to processing in Phase 2)
+                        docs_to_process.append(
+                            {
+                                "document": existing_doc,
+                                "is_new": False,
+                                "doc_id": doc_id,
+                                "title": title,
+                                "content": content,
+                                "content_hash": content_hash,
+                                "unique_identifier_hash": unique_identifier_hash,
+                                "hit": hit,
+                                "source": source,
+                            }
+                        )
+                        hits_collected += 1
+                        continue
+
+                    # Build metadata for new document
+                    metadata = {
+                        "elasticsearch_id": doc_id,
+                        "elasticsearch_index": hit.get("_index", index_name),
+                        "elasticsearch_score": hit.get("_score"),
+                        "source": "ELASTICSEARCH_CONNECTOR",
+                        "connector_id": connector_id,
+                    }
+
+                    # Add any additional metadata fields specified in config
+                    if "ELASTICSEARCH_METADATA_FIELDS" in config:
+                        for field in config["ELASTICSEARCH_METADATA_FIELDS"]:
+                            if field in source:
+                                metadata[f"es_{field}"] = source[field]
+
+                    # Create new document with PENDING status (visible in UI immediately)
                    document = Document(
                        title=title,
-                        content=content,
-                        content_hash=content_hash,
+                        content="Pending...",  # Placeholder until processed
+                        content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                        unique_identifier_hash=unique_identifier_hash,
                        document_type=DocumentType.ELASTICSEARCH_CONNECTOR,
                        document_metadata=metadata,
                        search_space_id=search_space_id,
+                        embedding=None,
+                        chunks=[],  # Empty at creation - safe for async
+                        status=DocumentStatus.pending(),  # Pending until processing starts
                        updated_at=get_current_timestamp(),
                        created_by_id=user_id,
                        connector_id=connector_id,
                    )
-
-                    # Create chunks and attach to document (persist via relationship)
-                    chunks = await create_document_chunks(content)
-                    document.chunks = chunks
                    session.add(document)
-                    await session.flush()
+                    new_documents_created = True
+
+                    docs_to_process.append(
+                        {
+                            "document": document,
+                            "is_new": True,
+                            "doc_id": doc_id,
+                            "title": title,
+                            "content": content,
+                            "content_hash": content_hash,
+                            "unique_identifier_hash": unique_identifier_hash,
+                            "hit": hit,
+                            "source": source,
+                        }
+                    )
+                    hits_collected += 1
+
+                except Exception as e:
+                    logger.error(f"Error in Phase 1 for ES doc: {e!s}", exc_info=True)
+                    documents_failed += 1
+                    continue
+
+            # Commit all pending documents - they all appear in UI now
+            if new_documents_created:
+                logger.info(
+                    f"Phase 1: Committing {len([d for d in docs_to_process if d['is_new']])} pending documents"
+                )
+                await session.commit()
+
+            # =======================================================================
+            # PHASE 2: Process each document one by one
+            # Each document transitions: pending → processing → ready/failed
+            # =======================================================================
+            logger.info(f"Phase 2: Processing {len(docs_to_process)} documents")
+
+            for item in docs_to_process:
+                # Send heartbeat periodically
+                if on_heartbeat_callback:
+                    current_time = time.time()
+                    if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                        await on_heartbeat_callback(documents_processed)
+                        last_heartbeat_time = current_time
+
+                document = item["document"]
+                try:
+                    # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                    document.status = DocumentStatus.processing()
+                    await session.commit()
+
+                    # Build metadata
+                    metadata = {
+                        "elasticsearch_id": item["doc_id"],
+                        "elasticsearch_index": item["hit"].get("_index", index_name),
+                        "elasticsearch_score": item["hit"].get("_score"),
+                        "indexed_at": datetime.now().isoformat(),
+                        "source": "ELASTICSEARCH_CONNECTOR",
+                        "connector_id": connector_id,
+                    }
+
+                    # Add any additional metadata fields specified in config
+                    if "ELASTICSEARCH_METADATA_FIELDS" in config:
+                        for field in config["ELASTICSEARCH_METADATA_FIELDS"]:
+                            if field in item["source"]:
+                                metadata[f"es_{field}"] = item["source"][field]
+
+                    # Create chunks
+                    chunks = await create_document_chunks(item["content"])
+
+                    # Update document to READY with actual content
+                    document.title = item["title"]
+                    document.content = item["content"]
+                    document.content_hash = item["content_hash"]
+                    document.unique_identifier_hash = item["unique_identifier_hash"]
+                    document.document_metadata = metadata
+                    safe_set_chunks(document, chunks)
+                    document.updated_at = get_current_timestamp()
+                    document.status = DocumentStatus.ready()

                    documents_processed += 1

+                    # Batch commit every 10 documents (for ready status updates)
                    if documents_processed % 10 == 0:
                        logger.info(
-                            f"Processed {documents_processed} Elasticsearch documents"
+                            f"Committing batch: {documents_processed} Elasticsearch documents processed so far"
                        )
                        await session.commit()

                except Exception as e:
-                    msg = f"Error processing Elasticsearch document {hit.get('_id', 'unknown')}: {e}"
+                    msg = f"Error processing Elasticsearch document {item.get('doc_id', 'unknown')}: {e}"
                    logger.error(msg)
-                    await task_logger.log_task_failure(
-                        log_entry,
-                        "Document processing error",
-                        msg,
-                        {
-                            "document_id": hit.get("_id", "unknown"),
-                            "error_type": type(e).__name__,
-                        },
-                    )
+                    # Mark document as failed with reason (visible in UI)
+                    try:
+                        document.status = DocumentStatus.failed(str(e))
+                        document.updated_at = get_current_timestamp()
+                    except Exception as status_error:
+                        logger.error(
+                            f"Failed to update document status to failed: {status_error}"
+                        )
+                    documents_failed += 1
                    continue

-            # Final commit
-            await session.commit()
+            # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+            # This ensures the UI shows "Last indexed" instead of "Never indexed"
+            if update_last_indexed:
+                connector.last_indexed_at = (
+                    datetime.now(UTC).isoformat().replace("+00:00", "Z")
+                )
+
+            # Final commit for any remaining documents not yet committed in batches
+            logger.info(
+                f"Final commit: Total {documents_processed} Elasticsearch documents processed"
+            )
+            try:
+                await session.commit()
+                logger.info(
+                    "Successfully committed all Elasticsearch document changes to database"
+                )
+            except Exception as e:
+                # Handle any remaining integrity errors gracefully (race conditions, etc.)
+                if (
+                    "duplicate key value violates unique constraint" in str(e).lower()
+                    or "uniqueviolationerror" in str(e).lower()
+                ):
+                    logger.warning(
+                        f"Duplicate content_hash detected during final commit. "
+                        f"This may occur if the same document was indexed by multiple connectors. "
+                        f"Rolling back and continuing. Error: {e!s}"
+                    )
+                    await session.rollback()
+                    # Don't fail the entire task - some documents may have been successfully indexed
+                else:
+                    raise
+
+            # Build warning message if there were issues
+            warning_parts = []
+            if documents_failed > 0:
+                warning_parts.append(f"{documents_failed} failed")
+            warning_message = ", ".join(warning_parts) if warning_parts else None

            await task_logger.log_task_success(
                log_entry,
                f"Successfully indexed {documents_processed} documents from Elasticsearch",
-                {"documents_indexed": documents_processed, "index": index_name},
+                {
+                    "documents_indexed": documents_processed,
+                    "documents_skipped": documents_skipped,
+                    "documents_failed": documents_failed,
+                    "index": index_name,
+                },
            )
            logger.info(
-                f"Successfully indexed {documents_processed} documents from Elasticsearch"
+                f"Elasticsearch indexing completed: {documents_processed} ready, "
+                f"{documents_skipped} skipped, {documents_failed} failed"
            )

-            # Update last indexed timestamp if requested
-            if update_last_indexed and documents_processed > 0:
-                # connector.last_indexed_at = datetime.now()
-                connector.last_indexed_at = (
-                    datetime.now(UTC).isoformat().replace("+00:00", "Z")
-                )
-                await session.commit()
-                await task_logger.log_task_progress(
-                    log_entry,
-                    "Updated connector.last_indexed_at",
-                    {"last_indexed_at": connector.last_indexed_at},
-                )
-
-            return documents_processed, None
+            return documents_processed, warning_message

        finally:
            # Clean up Elasticsearch connection
--- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
@ -3,6 +3,10 @@ GitHub connector indexer using gitingest.

 This indexer processes entire repository digests in one pass, dramatically
 reducing LLM API calls compared to the previous file-by-file approach.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """

 import time
@ -13,8 +17,8 @@ from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
-from app.connectors.github_connector import GitHubConnector, RepositoryDigest
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.connectors.github_connector import GitHubConnector
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -30,6 +34,8 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
+    update_connector_last_indexed,
 )

 # Type hint for heartbeat callback
@ -164,7 +170,7 @@ async def index_github_repos(
            )
            return 0, f"Failed to initialize GitHub client: {e!s}"

-        # 4. Process each repository with gitingest
+        # 4. Process each repository with gitingest using 2-phase approach
        await task_logger.log_task_progress(
            log_entry,
            f"Starting gitingest processing for {len(repo_full_names_to_index)} repositories",
@ -181,24 +187,25 @@ async def index_github_repos(
        # Heartbeat tracking - update notification periodically to prevent appearing stuck
        last_heartbeat_time = time.time()
        documents_indexed = 0
+        documents_skipped = 0
+        documents_failed = 0
+
+        # =======================================================================
+        # PHASE 1: Analyze all repos and create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        repos_to_process = []  # List of dicts with document and digest data
+        new_documents_created = False

        for repo_full_name in repo_full_names_to_index:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
            if not repo_full_name or not isinstance(repo_full_name, str):
                logger.warning(f"Skipping invalid repository entry: {repo_full_name}")
                continue

-            logger.info(f"Ingesting repository: {repo_full_name}")
-
            try:
+                logger.info(f"Phase 1: Analyzing repository: {repo_full_name}")
+
                # Run gitingest via subprocess (isolated from event loop)
-                # Using to_thread to not block the async database operations
                import asyncio

                digest = await asyncio.to_thread(
@ -212,30 +219,266 @@ async def index_github_repos(
                    errors.append(f"No digest for {repo_full_name}")
                    continue

-                # Process the digest and create documents
-                docs_created = await _process_repository_digest(
-                    session=session,
-                    digest=digest,
-                    search_space_id=search_space_id,
-                    user_id=user_id,
-                    task_logger=task_logger,
-                    log_entry=log_entry,
-                    connector_id=connector_id,
+                # Generate unique identifier based on repo name
+                unique_identifier_hash = generate_unique_identifier_hash(
+                    DocumentType.GITHUB_CONNECTOR, repo_full_name, search_space_id
                )

-                documents_processed += docs_created
-                logger.info(
-                    f"Created {docs_created} documents from repository: {repo_full_name}"
+                # Generate content hash from digest
+                full_content = digest.full_digest
+                content_hash = generate_content_hash(full_content, search_space_id)
+
+                # Check if document with this unique identifier already exists
+                existing_document = await check_document_by_unique_identifier(
+                    session, unique_identifier_hash
+                )
+
+                if existing_document:
+                    # Document exists - check if content has changed
+                    if existing_document.content_hash == content_hash:
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
+                            existing_document.status = DocumentStatus.ready()
+                        logger.info(f"Repository {repo_full_name} unchanged. Skipping.")
+                        documents_skipped += 1
+                        continue
+
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    logger.info(
+                        f"Content changed for repository {repo_full_name}. Queuing for update."
+                    )
+                    repos_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "digest": digest,
+                            "content_hash": content_hash,
+                            "repo_full_name": repo_full_name,
+                            "unique_identifier_hash": unique_identifier_hash,
+                        }
+                    )
+                    continue
+
+                # Document doesn't exist by unique_identifier_hash
+                # Check if a document with the same content_hash exists (from another connector)
+                with session.no_autoflush:
+                    duplicate_by_content = await check_duplicate_document_by_hash(
+                        session, content_hash
+                    )
+
+                if duplicate_by_content:
+                    logger.info(
+                        f"Repository {repo_full_name} already indexed by another connector "
+                        f"(existing document ID: {duplicate_by_content.id}, "
+                        f"type: {duplicate_by_content.document_type}). Skipping."
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Create new document with PENDING status (visible in UI immediately)
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=repo_full_name,
+                    document_type=DocumentType.GITHUB_CONNECTOR,
+                    document_metadata={
+                        "repository_full_name": repo_full_name,
+                        "url": f"https://github.com/{repo_full_name}",
+                        "branch": digest.branch,
+                        "ingestion_method": "gitingest",
+                        "connector_id": connector_id,
+                    },
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+                    unique_identifier_hash=unique_identifier_hash,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
+                    updated_at=get_current_timestamp(),
+                    created_by_id=user_id,
+                    connector_id=connector_id,
+                )
+                session.add(document)
+                new_documents_created = True
+
+                repos_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "digest": digest,
+                        "content_hash": content_hash,
+                        "repo_full_name": repo_full_name,
+                        "unique_identifier_hash": unique_identifier_hash,
+                    }
                )

            except Exception as repo_err:
                logger.error(
-                    f"Failed to process repository {repo_full_name}: {repo_err}"
+                    f"Error in Phase 1 for repository {repo_full_name}: {repo_err}",
+                    exc_info=True,
                )
+                errors.append(f"Phase 1 error for {repo_full_name}: {repo_err}")
+                documents_failed += 1
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([r for r in repos_to_process if r['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(repos_to_process)} documents")
+
+        for item in repos_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            digest = item["digest"]
+            repo_full_name = item["repo_full_name"]
+
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                document_metadata_for_summary = {
+                    "repository": repo_full_name,
+                    "document_type": "GitHub Repository",
+                    "connector_type": "GitHub",
+                    "ingestion_method": "gitingest",
+                    "file_tree": digest.tree[:2000]
+                    if len(digest.tree) > 2000
+                    else digest.tree,
+                    "estimated_tokens": digest.estimated_tokens,
+                }
+
+                if user_llm:
+                    # Prepare content for summarization
+                    summary_content = digest.full_digest
+                    if len(summary_content) > MAX_DIGEST_CHARS:
+                        summary_content = (
+                            f"# Repository: {repo_full_name}\n\n"
+                            f"## File Structure\n\n{digest.tree}\n\n"
+                            f"## File Contents (truncated)\n\n{digest.content[: MAX_DIGEST_CHARS - len(digest.tree) - 200]}..."
+                        )
+
+                    summary_text, summary_embedding = await generate_document_summary(
+                        summary_content, user_llm, document_metadata_for_summary
+                    )
+                else:
+                    # Fallback to simple summary if no LLM configured
+                    summary_text = (
+                        f"# GitHub Repository: {repo_full_name}\n\n"
+                        f"## Summary\n{digest.summary}\n\n"
+                        f"## File Structure\n{digest.tree[:3000]}"
+                    )
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_text
+                    )
+
+                # Chunk the full digest content for granular search
+                try:
+                    chunks_data = await create_document_chunks(digest.content)
+                except Exception as chunk_err:
+                    logger.error(
+                        f"Failed to chunk repository {repo_full_name}: {chunk_err}"
+                    )
+                    chunks_data = await _simple_chunk_content(digest.content)
+
+                # Update document to READY with actual content
+                doc_metadata = {
+                    "repository_full_name": repo_full_name,
+                    "url": f"https://github.com/{repo_full_name}",
+                    "branch": digest.branch,
+                    "ingestion_method": "gitingest",
+                    "file_tree": digest.tree,
+                    "gitingest_summary": digest.summary,
+                    "estimated_tokens": digest.estimated_tokens,
+                    "connector_id": connector_id,
+                    "indexed_at": datetime.now(UTC).isoformat(),
+                }
+
+                document.title = repo_full_name
+                document.content = summary_text
+                document.content_hash = item["content_hash"]
+                document.embedding = summary_embedding
+                document.document_metadata = doc_metadata
+                safe_set_chunks(document, chunks_data)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_processed += 1
+                documents_indexed += 1
+
+                logger.info(
+                    f"Created document for repository {repo_full_name} "
+                    f"with {len(chunks_data)} chunks"
+                )
+
+                # Batch commit every 5 documents (repositories are large)
+                if documents_indexed % 5 == 0:
+                    logger.info(
+                        f"Committing batch: {documents_indexed} GitHub repos processed so far"
+                    )
+                    await session.commit()
+
+            except Exception as repo_err:
+                logger.error(
+                    f"Error processing repository {repo_full_name}: {repo_err}",
+                    exc_info=True,
+                )
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(repo_err))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                errors.append(f"Failed processing {repo_full_name}: {repo_err}")
+                documents_failed += 1
+                continue
+
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)

        # Final commit
-        await session.commit()
+        logger.info(
+            f"Final commit: Total {documents_processed} GitHub repositories processed"
+        )
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all GitHub document changes to database"
+            )
+        except Exception as e:
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise
+
        logger.info(
            f"Finished GitHub indexing for connector {connector_id}. "
            f"Created {documents_processed} documents."
@ -247,6 +490,8 @@ async def index_github_repos(
            f"Successfully completed GitHub indexing for connector {connector_id}",
            {
                "documents_processed": documents_processed,
+                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                "errors_count": len(errors),
                "repo_count": len(repo_full_names_to_index),
                "method": "gitingest",
@ -286,163 +531,6 @@ async def index_github_repos(
    return documents_processed, error_message


-async def _process_repository_digest(
-    session: AsyncSession,
-    digest: RepositoryDigest,
-    search_space_id: int,
-    user_id: str,
-    task_logger: TaskLoggingService,
-    log_entry,
-    connector_id: int,
-) -> int:
-    """
-    Process a repository digest and create documents.
-
-    For each repository, we create:
-    1. One main document with the repository summary
-    2. Chunks from the full digest content for granular search
-
-    Args:
-        session: Database session
-        digest: The repository digest from gitingest
-        search_space_id: ID of the search space
-        user_id: ID of the user
-        task_logger: Task logging service
-        log_entry: Current log entry
-
-    Returns:
-        Number of documents created
-    """
-    repo_full_name = digest.repo_full_name
-    documents_created = 0
-
-    # Generate unique identifier based on repo name and content hash
-    # This allows updates when repo content changes
-    full_content = digest.full_digest
-    content_hash = generate_content_hash(full_content, search_space_id)
-
-    # Use repo name as the unique identifier (one document per repo)
-    unique_identifier_hash = generate_unique_identifier_hash(
-        DocumentType.GITHUB_CONNECTOR, repo_full_name, search_space_id
-    )
-
-    # Check if document with this unique identifier already exists
-    existing_document = await check_document_by_unique_identifier(
-        session, unique_identifier_hash
-    )
-
-    if existing_document:
-        # Document exists - check if content has changed
-        if existing_document.content_hash == content_hash:
-            logger.info(f"Repository {repo_full_name} unchanged. Skipping.")
-            return 0
-        else:
-            logger.info(
-                f"Content changed for repository {repo_full_name}. Updating document."
-            )
-            # Delete existing document to replace with new one
-            await session.delete(existing_document)
-            await session.flush()
-    else:
-        # Document doesn't exist by unique_identifier_hash
-        # Check if a document with the same content_hash exists (from another connector)
-        with session.no_autoflush:
-            duplicate_by_content = await check_duplicate_document_by_hash(
-                session, content_hash
-            )
-
-        if duplicate_by_content:
-            logger.info(
-                f"Repository {repo_full_name} already indexed by another connector "
-                f"(existing document ID: {duplicate_by_content.id}, "
-                f"type: {duplicate_by_content.document_type}). Skipping."
-            )
-            return 0
-
-    # Generate summary using LLM (ONE call per repository!)
-    user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
-
-    document_metadata = {
-        "repository": repo_full_name,
-        "document_type": "GitHub Repository",
-        "connector_type": "GitHub",
-        "ingestion_method": "gitingest",
-        "file_tree": digest.tree[:2000] if len(digest.tree) > 2000 else digest.tree,
-        "estimated_tokens": digest.estimated_tokens,
-    }
-
-    if user_llm:
-        # Prepare content for summarization
-        # Include tree structure and truncated content if too large
-        summary_content = digest.full_digest
-        if len(summary_content) > MAX_DIGEST_CHARS:
-            # Truncate but keep the tree and beginning of content
-            summary_content = (
-                f"# Repository: {repo_full_name}\n\n"
-                f"## File Structure\n\n{digest.tree}\n\n"
-                f"## File Contents (truncated)\n\n{digest.content[: MAX_DIGEST_CHARS - len(digest.tree) - 200]}..."
-            )
-
-        summary_text, summary_embedding = await generate_document_summary(
-            summary_content, user_llm, document_metadata
-        )
-    else:
-        # Fallback to simple summary if no LLM configured
-        summary_text = (
-            f"# GitHub Repository: {repo_full_name}\n\n"
-            f"## Summary\n{digest.summary}\n\n"
-            f"## File Structure\n{digest.tree[:3000]}"
-        )
-        summary_embedding = config.embedding_model_instance.embed(summary_text)
-
-    # Chunk the full digest content for granular search
-    try:
-        # Use the content (not the summary) for chunking
-        # This preserves file-level granularity in search
-        chunks_data = await create_document_chunks(digest.content)
-    except Exception as chunk_err:
-        logger.error(f"Failed to chunk repository {repo_full_name}: {chunk_err}")
-        # Fall back to a simpler chunking approach
-        chunks_data = await _simple_chunk_content(digest.content)
-
-    # Create the document
-    doc_metadata = {
-        "repository_full_name": repo_full_name,
-        "url": f"https://github.com/{repo_full_name}",
-        "branch": digest.branch,
-        "ingestion_method": "gitingest",
-        "file_tree": digest.tree,
-        "gitingest_summary": digest.summary,
-        "estimated_tokens": digest.estimated_tokens,
-        "indexed_at": datetime.now(UTC).isoformat(),
-    }
-
-    document = Document(
-        title=f"GitHub Repository: {repo_full_name}",
-        document_type=DocumentType.GITHUB_CONNECTOR,
-        document_metadata=doc_metadata,
-        content=summary_text,
-        content_hash=content_hash,
-        unique_identifier_hash=unique_identifier_hash,
-        embedding=summary_embedding,
-        search_space_id=search_space_id,
-        chunks=chunks_data,
-        updated_at=get_current_timestamp(),
-        created_by_id=user_id,
-        connector_id=connector_id,
-    )
-
-    session.add(document)
-    documents_created += 1
-
-    logger.info(
-        f"Created document for repository {repo_full_name} "
-        f"with {len(chunks_data)} chunks"
-    )
-
-    return documents_created
-
-
 async def _simple_chunk_content(content: str, chunk_size: int = 4000) -> list:
    """
    Simple fallback chunking when the regular chunker fails.
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@ -1,5 +1,9 @@
 """
 Google Calendar connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """

 import time
@ -11,7 +15,7 @@ from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.connectors.google_calendar_connector import GoogleCalendarConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -28,6 +32,7 @@ from .base import (
    get_current_timestamp,
    logger,
    parse_date_flexible,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -305,7 +310,7 @@ async def index_google_calendar_events(

        documents_indexed = 0
        documents_skipped = 0
-        skipped_events = []
+        documents_failed = 0  # Track events that failed processing
        duplicate_content_count = (
            0  # Track events skipped due to duplicate content_hash
        )
@ -313,14 +318,14 @@ async def index_google_calendar_events(
        # Heartbeat tracking - update notification periodically to prevent appearing stuck
        last_heartbeat_time = time.time()

+        # =======================================================================
+        # PHASE 1: Analyze all events, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        events_to_process = []  # List of dicts with document and event data
+        new_documents_created = False
+
        for event in events:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
            try:
                event_id = event.get("id")
                event_summary = event.get("summary", "No Title")
@ -328,14 +333,12 @@ async def index_google_calendar_events(

                if not event_id:
                    logger.warning(f"Skipping event with missing ID: {event_summary}")
-                    skipped_events.append(f"{event_summary} (missing ID)")
                    documents_skipped += 1
                    continue

                event_markdown = calendar_client.format_event_to_markdown(event)
                if not event_markdown.strip():
                    logger.warning(f"Skipping event with no content: {event_summary}")
-                    skipped_events.append(f"{event_summary} (no content)")
                    documents_skipped += 1
                    continue

@ -362,82 +365,31 @@ async def index_google_calendar_events(
                if existing_document:
                    # Document exists - check if content has changed
                    if existing_document.content_hash == content_hash:
-                        logger.info(
-                            f"Document for Google Calendar event {event_summary} unchanged. Skipping."
-                        )
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
+                            existing_document.status = DocumentStatus.ready()
                        documents_skipped += 1
                        continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Google Calendar event {event_summary}. Updating document."
-                        )

-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "event_id": event_id,
-                                "event_summary": event_summary,
-                                "calendar_id": calendar_id,
-                                "start_time": start_time,
-                                "end_time": end_time,
-                                "location": location or "No location",
-                                "document_type": "Google Calendar Event",
-                                "connector_type": "Google Calendar",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                event_markdown, user_llm, document_metadata
-                            )
-                        else:
-                            summary_content = (
-                                f"Google Calendar Event: {event_summary}\n\n"
-                            )
-                            summary_content += f"Calendar: {calendar_id}\n"
-                            summary_content += f"Start: {start_time}\n"
-                            summary_content += f"End: {end_time}\n"
-                            if location:
-                                summary_content += f"Location: {location}\n"
-                            if description:
-                                desc_preview = description[:1000]
-                                if len(description) > 1000:
-                                    desc_preview += "..."
-                                summary_content += f"Description: {desc_preview}\n"
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(event_markdown)
-
-                        # Update existing document
-                        existing_document.title = f"Calendar Event - {event_summary}"
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    events_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "event_markdown": event_markdown,
+                            "content_hash": content_hash,
                            "event_id": event_id,
                            "event_summary": event_summary,
                            "calendar_id": calendar_id,
                            "start_time": start_time,
                            "end_time": end_time,
                            "location": location,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                            "description": description,
                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(
-                            f"Successfully updated Google Calendar event {event_summary}"
-                        )
-                        continue
+                    )
+                    continue

                # Document doesn't exist by unique_identifier_hash
                # Check if a document with the same content_hash exists (from another connector)
@ -455,55 +407,12 @@ async def index_google_calendar_events(
                    )
                    duplicate_content_count += 1
                    documents_skipped += 1
-                    skipped_events.append(
-                        f"{event_summary} (already indexed by another connector)"
-                    )
                    continue

-                # Document doesn't exist - create new one
-                # Generate summary with metadata
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "event_id": event_id,
-                        "event_summary": event_summary,
-                        "calendar_id": calendar_id,
-                        "start_time": start_time,
-                        "end_time": end_time,
-                        "location": location or "No location",
-                        "document_type": "Google Calendar Event",
-                        "connector_type": "Google Calendar",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        event_markdown, user_llm, document_metadata
-                    )
-                else:
-                    # Fallback to simple summary if no LLM configured
-                    summary_content = f"Google Calendar Event: {event_summary}\n\n"
-                    summary_content += f"Calendar: {calendar_id}\n"
-                    summary_content += f"Start: {start_time}\n"
-                    summary_content += f"End: {end_time}\n"
-                    if location:
-                        summary_content += f"Location: {location}\n"
-                    if description:
-                        desc_preview = description[:1000]
-                        if len(description) > 1000:
-                            desc_preview += "..."
-                        summary_content += f"Description: {desc_preview}\n"
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-                chunks = await create_document_chunks(event_markdown)
-
+                # Create new document with PENDING status (visible in UI immediately)
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Calendar Event - {event_summary}",
+                    title=event_summary,
                    document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR,
                    document_metadata={
                        "event_id": event_id,
@ -512,23 +421,133 @@ async def index_google_calendar_events(
                        "start_time": start_time,
                        "end_time": end_time,
                        "location": location,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        "connector_id": connector_id,
                    },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                    updated_at=get_current_timestamp(),
                    created_by_id=user_id,
                    connector_id=connector_id,
                )
-
                session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new event {event_summary}")
+                new_documents_created = True

-                # Batch commit every 10 documents
+                events_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "event_markdown": event_markdown,
+                        "content_hash": content_hash,
+                        "event_id": event_id,
+                        "event_summary": event_summary,
+                        "calendar_id": calendar_id,
+                        "start_time": start_time,
+                        "end_time": end_time,
+                        "location": location,
+                        "description": description,
+                    }
+                )
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(events_to_process)} documents")
+
+        for item in events_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "event_id": item["event_id"],
+                        "event_summary": item["event_summary"],
+                        "calendar_id": item["calendar_id"],
+                        "start_time": item["start_time"],
+                        "end_time": item["end_time"],
+                        "location": item["location"] or "No location",
+                        "document_type": "Google Calendar Event",
+                        "connector_type": "Google Calendar",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["event_markdown"], user_llm, document_metadata_for_summary
+                    )
+                else:
+                    summary_content = (
+                        f"Google Calendar Event: {item['event_summary']}\n\n"
+                    )
+                    summary_content += f"Calendar: {item['calendar_id']}\n"
+                    summary_content += f"Start: {item['start_time']}\n"
+                    summary_content += f"End: {item['end_time']}\n"
+                    if item["location"]:
+                        summary_content += f"Location: {item['location']}\n"
+                    if item["description"]:
+                        desc_preview = item["description"][:1000]
+                        if len(item["description"]) > 1000:
+                            desc_preview += "..."
+                        summary_content += f"Description: {desc_preview}\n"
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(item["event_markdown"])
+
+                # Update document to READY with actual content
+                document.title = item["event_summary"]
+                document.content = summary_content
+                document.content_hash = item["content_hash"]
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "event_id": item["event_id"],
+                    "event_summary": item["event_summary"],
+                    "calendar_id": item["calendar_id"],
+                    "start_time": item["start_time"],
+                    "end_time": item["end_time"],
+                    "location": item["location"],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                if documents_indexed % 10 == 0:
                    logger.info(
                        f"Committing batch: {documents_indexed} Google Calendar events processed so far"
@ -536,19 +555,20 @@ async def index_google_calendar_events(
                    await session.commit()

            except Exception as e:
-                logger.error(
-                    f"Error processing event {event.get('summary', 'Unknown')}: {e!s}",
-                    exc_info=True,
-                )
-                skipped_events.append(
-                    f"{event.get('summary', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
+                logger.error(f"Error processing Calendar event: {e!s}", exc_info=True)
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
+                documents_failed += 1
                continue

-        total_processed = documents_indexed
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)

        # Final commit for any remaining documents not yet committed in batches
        logger.info(
@ -556,6 +576,9 @@ async def index_google_calendar_events(
        )
        try:
            await session.commit()
+            logger.info(
+                "Successfully committed all Google Calendar document changes to database"
+            )
        except Exception as e:
            # Handle any remaining integrity errors gracefully (race conditions, etc.)
            if (
@ -572,10 +595,15 @@ async def index_google_calendar_events(
            else:
                raise

-        # Build warning message if duplicates were found
-        warning_message = None
+        # Build warning message if there were issues
+        warning_parts = []
        if duplicate_content_count > 0:
-            warning_message = f"{duplicate_content_count} skipped (duplicate)"
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
+
+        total_processed = documents_indexed

        await task_logger.log_task_success(
            log_entry,
@ -584,14 +612,15 @@ async def index_google_calendar_events(
                "events_processed": total_processed,
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                "duplicate_content_count": duplicate_content_count,
-                "skipped_events_count": len(skipped_events),
            },
        )

        logger.info(
-            f"Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped "
-            f"({duplicate_content_count} due to duplicate content from other connectors)"
+            f"Google Calendar indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
        )
        return total_processed, warning_message

--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@ -1,4 +1,9 @@
-"""Google Drive indexer using Surfsense file processors."""
+"""Google Drive indexer using Surfsense file processors.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
+"""

 import logging
 import time
@ -17,11 +22,12 @@ from app.connectors.google_drive import (
    get_files_in_folder,
    get_start_page_token,
 )
-from app.db import DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.task_logging_service import TaskLoggingService
 from app.tasks.connector_indexers.base import (
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    update_connector_last_indexed,
 )
 from app.utils.document_converters import generate_unique_identifier_hash
@ -324,8 +330,29 @@ async def index_google_drive_single_file(
        display_name = file_name or file.get("name", "Unknown")
        logger.info(f"Indexing Google Drive file: {display_name} ({file_id})")

+        # Create pending document for status visibility
+        pending_doc, should_skip = await _create_pending_document_for_file(
+            session=session,
+            file=file,
+            connector_id=connector_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+        )
+
+        if should_skip:
+            await task_logger.log_task_progress(
+                log_entry,
+                f"File {display_name} is unchanged or not indexable",
+                {"status": "skipped"},
+            )
+            return 0, None
+
+        # Commit pending document so it appears in UI
+        if pending_doc and pending_doc.id is None:
+            await session.commit()
+
        # Process the file
-        indexed, skipped = await _process_single_file(
+        indexed, skipped, failed = await _process_single_file(
            drive_client=drive_client,
            session=session,
            file=file,
@ -334,6 +361,7 @@ async def index_google_drive_single_file(
            user_id=user_id,
            task_logger=task_logger,
            log_entry=log_entry,
+            pending_document=pending_doc,
        )

        await session.commit()
@ -341,6 +369,15 @@ async def index_google_drive_single_file(
            "Successfully committed Google Drive file indexing changes to database"
        )

+        if failed > 0:
+            error_msg = f"Failed to index file {display_name}"
+            await task_logger.log_task_failure(
+                log_entry,
+                error_msg,
+                {"file_name": display_name, "file_id": file_id},
+            )
+            return 0, error_msg
+
        if indexed > 0:
            await task_logger.log_task_success(
                log_entry,
@ -397,7 +434,12 @@ async def _index_full_scan(
    include_subfolders: bool = False,
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int]:
-    """Perform full scan indexing of a folder."""
+    """Perform full scan indexing of a folder.
+
+    Implements 2-phase document status updates for real-time UI feedback:
+    - Phase 1: Collect all files and create pending documents (visible in UI immediately)
+    - Phase 2: Process each file: pending → processing → ready/failed
+    """
    await task_logger.log_task_progress(
        log_entry,
        f"Starting full scan of folder: {folder_name} (include_subfolders={include_subfolders})",
@ -410,29 +452,31 @@ async def _index_full_scan(

    documents_indexed = 0
    documents_skipped = 0
+    documents_failed = 0
    files_processed = 0

    # Heartbeat tracking - update notification periodically to prevent appearing stuck
    last_heartbeat_time = time.time()

+    # =======================================================================
+    # PHASE 1: Collect all files and create pending documents
+    # This makes ALL documents visible in the UI immediately with pending status
+    # =======================================================================
+    files_to_process = []  # List of (file, pending_document or None)
+    new_documents_created = False
+
    # Queue of folders to process: (folder_id, folder_name)
    folders_to_process = [(folder_id, folder_name)]

+    logger.info("Phase 1: Collecting files and creating pending documents")
+
    while folders_to_process and files_processed < max_files:
-        # Check if it's time for a heartbeat update
-        if (
-            on_heartbeat_callback
-            and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-        ):
-            await on_heartbeat_callback(documents_indexed)
-            last_heartbeat_time = time.time()
        current_folder_id, current_folder_name = folders_to_process.pop(0)
-        logger.info(f"Processing folder: {current_folder_name} ({current_folder_id})")
+        logger.info(f"Scanning folder: {current_folder_name} ({current_folder_id})")
        page_token = None

        while files_processed < max_files:
            # Get files and folders in current folder
-            # include_subfolders=True here so we get folder items to queue them
            files, next_token, error = await get_files_in_folder(
                drive_client,
                current_folder_id,
@ -462,35 +506,74 @@ async def _index_full_scan(
                        logger.debug(f"Queued subfolder: {file.get('name', 'Unknown')}")
                    continue

-                # Process the file
                files_processed += 1

-                indexed, skipped = await _process_single_file(
-                    drive_client=drive_client,
+                # Create pending document for this file
+                pending_doc, should_skip = await _create_pending_document_for_file(
                    session=session,
                    file=file,
                    connector_id=connector_id,
                    search_space_id=search_space_id,
                    user_id=user_id,
-                    task_logger=task_logger,
-                    log_entry=log_entry,
                )

-                documents_indexed += indexed
-                documents_skipped += skipped
+                if should_skip:
+                    documents_skipped += 1
+                    continue

-                if documents_indexed % 10 == 0 and documents_indexed > 0:
-                    await session.commit()
-                    logger.info(
-                        f"Committed batch: {documents_indexed} files indexed so far"
-                    )
+                if pending_doc and pending_doc.id is None:
+                    # New document was created
+                    new_documents_created = True
+
+                files_to_process.append((file, pending_doc))

            page_token = next_token
            if not page_token:
                break

+    # Commit all pending documents - they all appear in UI now
+    if new_documents_created:
+        logger.info(
+            f"Phase 1: Committing {len([f for f in files_to_process if f[1] and f[1].id is None])} pending documents"
+        )
+        await session.commit()
+
+    # =======================================================================
+    # PHASE 2: Process each file one by one
+    # Each document transitions: pending → processing → ready/failed
+    # =======================================================================
+    logger.info(f"Phase 2: Processing {len(files_to_process)} files")
+
+    for file, pending_doc in files_to_process:
+        # Check if it's time for a heartbeat update
+        if on_heartbeat_callback:
+            current_time = time.time()
+            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = current_time
+
+        indexed, skipped, failed = await _process_single_file(
+            drive_client=drive_client,
+            session=session,
+            file=file,
+            connector_id=connector_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+            task_logger=task_logger,
+            log_entry=log_entry,
+            pending_document=pending_doc,
+        )
+
+        documents_indexed += indexed
+        documents_skipped += skipped
+        documents_failed += failed
+
+        if documents_indexed % 10 == 0 and documents_indexed > 0:
+            await session.commit()
+            logger.info(f"Committed batch: {documents_indexed} files indexed so far")
+
    logger.info(
-        f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped"
+        f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed"
    )
    return documents_indexed, documents_skipped

@ -514,6 +597,10 @@ async def _index_with_delta_sync(

    Note: include_subfolders is accepted for API consistency but delta sync
    automatically tracks changes across all folders including subfolders.
+
+    Implements 2-phase document status updates for real-time UI feedback:
+    - Phase 1: Collect all changes and create pending documents (visible in UI immediately)
+    - Phase 2: Process each file: pending → processing → ready/failed
    """
    await task_logger.log_task_progress(
        log_entry,
@ -537,19 +624,21 @@ async def _index_with_delta_sync(

    documents_indexed = 0
    documents_skipped = 0
+    documents_failed = 0
    files_processed = 0

    # Heartbeat tracking - update notification periodically to prevent appearing stuck
    last_heartbeat_time = time.time()

+    # =======================================================================
+    # PHASE 1: Analyze changes and create pending documents for new/modified files
+    # =======================================================================
+    changes_to_process = []  # List of (change, file, pending_document or None)
+    new_documents_created = False
+
+    logger.info("Phase 1: Analyzing changes and creating pending documents")
+
    for change in changes:
-        # Check if it's time for a heartbeat update
-        if (
-            on_heartbeat_callback
-            and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-        ):
-            await on_heartbeat_callback(documents_indexed)
-            last_heartbeat_time = time.time()
        if files_processed >= max_files:
            break

@ -566,7 +655,45 @@ async def _index_with_delta_sync(
        if not file:
            continue

-        indexed, skipped = await _process_single_file(
+        # Create pending document for this file
+        pending_doc, should_skip = await _create_pending_document_for_file(
+            session=session,
+            file=file,
+            connector_id=connector_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+        )
+
+        if should_skip:
+            documents_skipped += 1
+            continue
+
+        if pending_doc and pending_doc.id is None:
+            # New document was created
+            new_documents_created = True
+
+        changes_to_process.append((change, file, pending_doc))
+
+    # Commit all pending documents - they all appear in UI now
+    if new_documents_created:
+        logger.info("Phase 1: Committing pending documents")
+        await session.commit()
+
+    # =======================================================================
+    # PHASE 2: Process each file one by one
+    # Each document transitions: pending → processing → ready/failed
+    # =======================================================================
+    logger.info(f"Phase 2: Processing {len(changes_to_process)} changes")
+
+    for _, file, pending_doc in changes_to_process:
+        # Check if it's time for a heartbeat update
+        if on_heartbeat_callback:
+            current_time = time.time()
+            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = current_time
+
+        indexed, skipped, failed = await _process_single_file(
            drive_client=drive_client,
            session=session,
            file=file,
@ -575,21 +702,125 @@ async def _index_with_delta_sync(
            user_id=user_id,
            task_logger=task_logger,
            log_entry=log_entry,
+            pending_document=pending_doc,
        )

        documents_indexed += indexed
        documents_skipped += skipped
+        documents_failed += failed

        if documents_indexed % 10 == 0 and documents_indexed > 0:
            await session.commit()
            logger.info(f"Committed batch: {documents_indexed} changes processed")

    logger.info(
-        f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped"
+        f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed"
    )
    return documents_indexed, documents_skipped


+async def _create_pending_document_for_file(
+    session: AsyncSession,
+    file: dict,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+) -> tuple[Document | None, bool]:
+    """
+    Create a pending document for a Google Drive file if it doesn't exist.
+
+    This is Phase 1 of the 2-phase document status update pattern.
+    Creates documents with 'pending' status so they appear in UI immediately.
+
+    Args:
+        session: Database session
+        file: File metadata from Google Drive API
+        connector_id: ID of the Drive connector
+        search_space_id: ID of the search space
+        user_id: ID of the user
+
+    Returns:
+        Tuple of (document, should_skip):
+        - (existing_doc, False): Existing document that needs update
+        - (new_pending_doc, False): New pending document created
+        - (None, True): File should be skipped (unchanged, rename-only, or folder)
+    """
+    from app.connectors.google_drive.file_types import should_skip_file
+
+    file_id = file.get("id")
+    file_name = file.get("name", "Unknown")
+    mime_type = file.get("mimeType", "")
+
+    # Skip folders and shortcuts
+    if should_skip_file(mime_type):
+        return None, True
+
+    if not file_id:
+        return None, True
+
+    # Generate unique identifier hash for this file
+    unique_identifier_hash = generate_unique_identifier_hash(
+        DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id
+    )
+
+    # Check if document exists
+    existing_document = await check_document_by_unique_identifier(
+        session, unique_identifier_hash
+    )
+
+    if existing_document:
+        # Check if this is a rename-only update (content unchanged)
+        incoming_md5 = file.get("md5Checksum")
+        incoming_modified_time = file.get("modifiedTime")
+        doc_metadata = existing_document.document_metadata or {}
+        stored_md5 = doc_metadata.get("md5_checksum")
+        stored_modified_time = doc_metadata.get("modified_time")
+
+        # Determine if content changed
+        content_unchanged = False
+        if incoming_md5 and stored_md5:
+            content_unchanged = incoming_md5 == stored_md5
+        elif not incoming_md5 and incoming_modified_time and stored_modified_time:
+            # Google Workspace file - use modifiedTime as fallback
+            content_unchanged = incoming_modified_time == stored_modified_time
+
+        if content_unchanged:
+            # Ensure status is ready (might have been stuck in processing/pending)
+            if not DocumentStatus.is_state(
+                existing_document.status, DocumentStatus.READY
+            ):
+                existing_document.status = DocumentStatus.ready()
+            return None, True
+
+        # Content changed - return existing document for update
+        return existing_document, False
+
+    # Create new pending document
+    document = Document(
+        search_space_id=search_space_id,
+        title=file_name,
+        document_type=DocumentType.GOOGLE_DRIVE_FILE,
+        document_metadata={
+            "google_drive_file_id": file_id,
+            "google_drive_file_name": file_name,
+            "google_drive_mime_type": mime_type,
+            "connector_id": connector_id,
+        },
+        content="Pending...",  # Placeholder until processed
+        content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+        unique_identifier_hash=unique_identifier_hash,
+        embedding=None,
+        chunks=[],  # Empty at creation
+        status=DocumentStatus.pending(),  # Pending until processing starts
+        updated_at=get_current_timestamp(),
+        created_by_id=user_id,
+        connector_id=connector_id,
+    )
+    session.add(document)
+
+    return document, False
+
+
 async def _check_rename_only_update(
    session: AsyncSession,
    file: dict,
@ -725,15 +956,31 @@ async def _process_single_file(
    user_id: str,
    task_logger: TaskLoggingService,
    log_entry: any,
-) -> tuple[int, int]:
+    pending_document: Document | None = None,
+) -> tuple[int, int, int]:
    """
    Process a single file by downloading and using Surfsense's file processor.

+    Implements Phase 2 of the 2-phase document status update pattern.
+    Updates document status: pending → processing → ready/failed
+
+    Args:
+        drive_client: Google Drive client
+        session: Database session
+        file: File metadata from Google Drive API
+        connector_id: ID of the connector
+        search_space_id: ID of the search space
+        user_id: ID of the user
+        task_logger: Task logging service
+        log_entry: Log entry for tracking
+        pending_document: Optional pending document created in Phase 1
+
    Returns:
-        Tuple of (indexed_count, skipped_count)
+        Tuple of (indexed_count, skipped_count, failed_count)
    """
    file_name = file.get("name", "Unknown")
    mime_type = file.get("mimeType", "")
+    file_id = file.get("id")

    try:
        logger.info(f"Processing file: {file_name} ({mime_type})")
@ -756,10 +1003,15 @@ async def _process_single_file(
            # Return 1 for renamed files (they are "indexed" in the sense that they're updated)
            # Return 0 for unchanged files
            if "renamed" in (rename_message or "").lower():
-                return 1, 0
-            return 0, 1
+                return 1, 0, 0
+            return 0, 1, 0

-        _, error, _ = await download_and_process_file(
+        # Set document to PROCESSING status if we have a pending document
+        if pending_document:
+            pending_document.status = DocumentStatus.processing()
+            await session.commit()
+
+        _, error, metadata = await download_and_process_file(
            client=drive_client,
            file=file,
            search_space_id=search_space_id,
@ -776,14 +1028,46 @@ async def _process_single_file(
                f"Skipped {file_name}: {error}",
                {"status": "skipped", "reason": error},
            )
-            return 0, 1
+            # Mark pending document as failed if it exists
+            if pending_document:
+                pending_document.status = DocumentStatus.failed(error)
+                pending_document.updated_at = get_current_timestamp()
+                await session.commit()
+            return 0, 1, 0
+
+        # The document was created/updated by download_and_process_file
+        # Find the document and ensure it has READY status
+        if file_id:
+            unique_identifier_hash = generate_unique_identifier_hash(
+                DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id
+            )
+            processed_doc = await check_document_by_unique_identifier(
+                session, unique_identifier_hash
+            )
+            # Ensure status is READY
+            if processed_doc and not DocumentStatus.is_state(
+                processed_doc.status, DocumentStatus.READY
+            ):
+                processed_doc.status = DocumentStatus.ready()
+                processed_doc.updated_at = get_current_timestamp()
+                await session.commit()

        logger.info(f"Successfully indexed Google Drive file: {file_name}")
-        return 1, 0
+        return 1, 0, 0

    except Exception as e:
        logger.error(f"Error processing file {file_name}: {e!s}", exc_info=True)
-        return 0, 1
+        # Mark pending document as failed if it exists
+        if pending_document:
+            try:
+                pending_document.status = DocumentStatus.failed(str(e))
+                pending_document.updated_at = get_current_timestamp()
+                await session.commit()
+            except Exception as status_error:
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
+        return 0, 0, 1


 async def _remove_document(session: AsyncSession, file_id: str, search_space_id: int):
--- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@ -1,5 +1,9 @@
 """
 Google Gmail connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """

 import time
@ -13,6 +17,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from app.connectors.google_gmail_connector import GoogleGmailConnector
 from app.db import (
    Document,
+    DocumentStatus,
    DocumentType,
    SearchSourceConnectorType,
 )
@ -32,6 +37,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -220,20 +226,23 @@ async def index_google_gmail_messages(
        logger.info(f"Found {len(messages)} Google gmail messages to index")

        documents_indexed = 0
-        skipped_messages = []
        documents_skipped = 0
+        documents_failed = 0  # Track messages that failed processing
+        duplicate_content_count = (
+            0  # Track messages skipped due to duplicate content_hash
+        )

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
        last_heartbeat_time = time.time()

+        # =======================================================================
+        # PHASE 1: Analyze all messages, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        messages_to_process = []  # List of dicts with document and message data
+        new_documents_created = False
+
        for message in messages:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
            try:
                # Extract message information
                message_id = message.get("id", "")
@ -259,7 +268,6 @@ async def index_google_gmail_messages(

                if not message_id:
                    logger.warning(f"Skipping message with missing ID: {subject}")
-                    skipped_messages.append(f"{subject} (missing ID)")
                    documents_skipped += 1
                    continue

@ -268,7 +276,6 @@ async def index_google_gmail_messages(

                if not markdown_content.strip():
                    logger.warning(f"Skipping message with no content: {subject}")
-                    skipped_messages.append(f"{subject} (no content)")
                    documents_skipped += 1
                    continue

@ -288,68 +295,29 @@ async def index_google_gmail_messages(
                if existing_document:
                    # Document exists - check if content has changed
                    if existing_document.content_hash == content_hash:
-                        logger.info(
-                            f"Document for Gmail message {subject} unchanged. Skipping."
-                        )
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
+                            existing_document.status = DocumentStatus.ready()
                        documents_skipped += 1
                        continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Gmail message {subject}. Updating document."
-                        )

-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "message_id": message_id,
-                                "thread_id": thread_id,
-                                "subject": subject,
-                                "sender": sender,
-                                "date": date_str,
-                                "document_type": "Gmail Message",
-                                "connector_type": "Google Gmail",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                markdown_content, user_llm, document_metadata
-                            )
-                        else:
-                            summary_content = f"Google Gmail Message: {subject}\n\n"
-                            summary_content += f"Sender: {sender}\n"
-                            summary_content += f"Date: {date_str}\n"
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(markdown_content)
-
-                        # Update existing document
-                        existing_document.title = f"Gmail: {subject}"
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    messages_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "markdown_content": markdown_content,
+                            "content_hash": content_hash,
                            "message_id": message_id,
                            "thread_id": thread_id,
                            "subject": subject,
                            "sender": sender,
-                            "date": date_str,
-                            "connector_id": connector_id,
+                            "date_str": date_str,
                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(f"Successfully updated Gmail message {subject}")
-                        continue
+                    )
+                    continue

                # Document doesn't exist by unique_identifier_hash
                # Check if a document with the same content_hash exists (from another connector)
@ -364,48 +332,14 @@ async def index_google_gmail_messages(
                        f"(existing document ID: {duplicate_by_content.id}, "
                        f"type: {duplicate_by_content.document_type}). Skipping."
                    )
+                    duplicate_content_count += 1
                    documents_skipped += 1
                    continue

-                # Document doesn't exist - create new one
-                # Generate summary with metadata
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "message_id": message_id,
-                        "thread_id": thread_id,
-                        "subject": subject,
-                        "sender": sender,
-                        "date": date_str,
-                        "document_type": "Gmail Message",
-                        "connector_type": "Google Gmail",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        markdown_content, user_llm, document_metadata
-                    )
-                else:
-                    # Fallback to simple summary if no LLM configured
-                    summary_content = f"Google Gmail Message: {subject}\n\n"
-                    summary_content += f"Sender: {sender}\n"
-                    summary_content += f"Date: {date_str}\n"
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                # Process chunks
-                chunks = await create_document_chunks(markdown_content)
-
-                # Create and store new document
-                logger.info(f"Creating new document for Gmail message: {subject}")
+                # Create new document with PENDING status (visible in UI immediately)
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Gmail: {subject}",
+                    title=subject,
                    document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
                    document_metadata={
                        "message_id": message_id,
@ -413,21 +347,120 @@ async def index_google_gmail_messages(
                        "subject": subject,
                        "sender": sender,
                        "date": date_str,
+                        "connector_id": connector_id,
                    },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                    updated_at=get_current_timestamp(),
                    created_by_id=user_id,
                    connector_id=connector_id,
                )
                session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new email {summary_content}")
+                new_documents_created = True

-                # Batch commit every 10 documents
+                messages_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "markdown_content": markdown_content,
+                        "content_hash": content_hash,
+                        "message_id": message_id,
+                        "thread_id": thread_id,
+                        "subject": subject,
+                        "sender": sender,
+                        "date_str": date_str,
+                    }
+                )
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(messages_to_process)} documents")
+
+        for item in messages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "message_id": item["message_id"],
+                        "thread_id": item["thread_id"],
+                        "subject": item["subject"],
+                        "sender": item["sender"],
+                        "date": item["date_str"],
+                        "document_type": "Gmail Message",
+                        "connector_type": "Google Gmail",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["markdown_content"],
+                        user_llm,
+                        document_metadata_for_summary,
+                    )
+                else:
+                    summary_content = f"Google Gmail Message: {item['subject']}\n\n"
+                    summary_content += f"Sender: {item['sender']}\n"
+                    summary_content += f"Date: {item['date_str']}\n"
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(item["markdown_content"])
+
+                # Update document to READY with actual content
+                document.title = item["subject"]
+                document.content = summary_content
+                document.content_hash = item["content_hash"]
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "message_id": item["message_id"],
+                    "thread_id": item["thread_id"],
+                    "subject": item["subject"],
+                    "sender": item["sender"],
+                    "date": item["date_str"],
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                if documents_indexed % 10 == 0:
                    logger.info(
                        f"Committing batch: {documents_indexed} Gmail messages processed so far"
@ -435,45 +468,76 @@ async def index_google_gmail_messages(
                    await session.commit()

            except Exception as e:
-                logger.error(
-                    f"Error processing the email {message_id}: {e!s}",
-                    exc_info=True,
-                )
-                skipped_messages.append(f"{subject} (processing error)")
-                documents_skipped += 1
-                continue  # Skip this message and continue with others
+                logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
+                documents_failed += 1
+                continue

-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)

        # Final commit for any remaining documents not yet committed in batches
        logger.info(f"Final commit: Total {documents_indexed} Gmail messages processed")
-        await session.commit()
-        logger.info(
-            "Successfully committed all Google gmail document changes to database"
-        )
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Google Gmail document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same message was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
+
+        total_processed = documents_indexed

        # Log success
        await task_logger.log_task_success(
            log_entry,
-            f"Successfully completed Google gmail indexing for connector {connector_id}",
+            f"Successfully completed Google Gmail indexing for connector {connector_id}",
            {
                "events_processed": total_processed,
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
-                "skipped_messages_count": len(skipped_messages),
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
            },
        )

        logger.info(
-            f"Google gmail indexing completed: {documents_indexed} new emails, {documents_skipped} skipped"
+            f"Google Gmail indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
        )
        return (
            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
+            warning_message,
+        )  # Return warning_message (None on success)

    except SQLAlchemyError as db_error:
        await session.rollback()
--- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
@ -1,5 +1,9 @@
 """
 Jira connector indexer.
+
+Provides real-time document status updates during indexing using a two-phase approach:
+- Phase 1: Create all documents with PENDING status (visible in UI immediately)
+- Phase 2: Process each document one by one (PENDING → PROCESSING → READY/FAILED)
 """

 import contextlib
@ -12,7 +16,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
 from app.connectors.jira_history import JiraHistoryConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -29,6 +33,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -174,22 +179,22 @@ async def index_jira_issues(
            logger.error(f"Error fetching Jira issues: {e!s}", exc_info=True)
            return 0, f"Error fetching Jira issues: {e!s}"

-        # Process and index each issue
+        # =======================================================================
+        # PHASE 1: Analyze all issues, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
        documents_indexed = 0
-        skipped_issues = []
        documents_skipped = 0
+        documents_failed = 0
+        duplicate_content_count = 0

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
        last_heartbeat_time = time.time()

+        issues_to_process = []  # List of dicts with document and issue data
+        new_documents_created = False
+
        for issue in issues:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
            try:
                issue_id = issue.get("key")
                issue_identifier = issue.get("key", "")
@ -199,9 +204,6 @@ async def index_jira_issues(
                    logger.warning(
                        f"Skipping issue with missing ID or title: {issue_id or 'Unknown'}"
                    )
-                    skipped_issues.append(
-                        f"{issue_identifier or 'Unknown'} (missing data)"
-                    )
                    documents_skipped += 1
                    continue

@ -215,7 +217,6 @@ async def index_jira_issues(
                    logger.warning(
                        f"Skipping issue with no content: {issue_identifier} - {issue_title}"
                    )
-                    skipped_issues.append(f"{issue_identifier} (no content)")
                    documents_skipped += 1
                    continue

@ -237,73 +238,29 @@ async def index_jira_issues(
                if existing_document:
                    # Document exists - check if content has changed
                    if existing_document.content_hash == content_hash:
-                        logger.info(
-                            f"Document for Jira issue {issue_identifier} unchanged. Skipping."
-                        )
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
+                            existing_document.status = DocumentStatus.ready()
                        documents_skipped += 1
                        continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Jira issue {issue_identifier}. Updating document."
-                        )

-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "issue_key": issue_identifier,
-                                "issue_title": issue_title,
-                                "status": formatted_issue.get("status", "Unknown"),
-                                "priority": formatted_issue.get("priority", "Unknown"),
-                                "comment_count": comment_count,
-                                "document_type": "Jira Issue",
-                                "connector_type": "Jira",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                issue_content, user_llm, document_metadata
-                            )
-                        else:
-                            summary_content = f"Jira Issue {issue_identifier}: {issue_title}\n\nStatus: {formatted_issue.get('status', 'Unknown')}\n\n"
-                            if formatted_issue.get("description"):
-                                summary_content += f"Description: {formatted_issue.get('description')}\n\n"
-                            summary_content += f"Comments: {comment_count}"
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(issue_content)
-
-                        # Update existing document
-                        existing_document.title = (
-                            f"Jira - {issue_identifier}: {issue_title}"
-                        )
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    issues_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "issue_content": issue_content,
+                            "content_hash": content_hash,
                            "issue_id": issue_id,
                            "issue_identifier": issue_identifier,
                            "issue_title": issue_title,
-                            "state": formatted_issue.get("status", "Unknown"),
+                            "formatted_issue": formatted_issue,
                            "comment_count": comment_count,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(
-                            f"Successfully updated Jira issue {issue_identifier}"
-                        )
-                        continue
+                    )
+                    continue

                # Document doesn't exist by unique_identifier_hash
                # Check if a document with the same content_hash exists (from another connector)
@ -318,53 +275,14 @@ async def index_jira_issues(
                        f"(existing document ID: {duplicate_by_content.id}, "
                        f"type: {duplicate_by_content.document_type}). Skipping."
                    )
+                    duplicate_content_count += 1
                    documents_skipped += 1
                    continue

-                # Document doesn't exist - create new one
-                # Generate summary with metadata
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "issue_key": issue_identifier,
-                        "issue_title": issue_title,
-                        "status": formatted_issue.get("status", "Unknown"),
-                        "priority": formatted_issue.get("priority", "Unknown"),
-                        "comment_count": comment_count,
-                        "document_type": "Jira Issue",
-                        "connector_type": "Jira",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        issue_content, user_llm, document_metadata
-                    )
-                else:
-                    # Fallback to simple summary if no LLM configured
-                    summary_content = f"Jira Issue {issue_identifier}: {issue_title}\n\nStatus: {formatted_issue.get('status', 'Unknown')}\n\n"
-                    if formatted_issue.get("description"):
-                        summary_content += (
-                            f"Description: {formatted_issue.get('description')}\n\n"
-                        )
-                    summary_content += f"Comments: {comment_count}"
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                # Process chunks - using the full issue content with comments
-                chunks = await create_document_chunks(issue_content)
-
-                # Create and store new document
-                logger.info(
-                    f"Creating new document for issue {issue_identifier} - {issue_title}"
-                )
+                # Create new document with PENDING status (visible in UI immediately)
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Jira - {issue_identifier}: {issue_title}",
+                    title=f"{issue_identifier}: {issue_title}",
                    document_type=DocumentType.JIRA_CONNECTOR,
                    document_metadata={
                        "issue_id": issue_id,
@ -372,25 +290,122 @@ async def index_jira_issues(
                        "issue_title": issue_title,
                        "state": formatted_issue.get("status", "Unknown"),
                        "comment_count": comment_count,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        "connector_id": connector_id,
                    },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                    updated_at=get_current_timestamp(),
                    created_by_id=user_id,
                    connector_id=connector_id,
                )
-
                session.add(document)
-                documents_indexed += 1
-                logger.info(
-                    f"Successfully indexed new issue {issue_identifier} - {issue_title}"
+                new_documents_created = True
+
+                issues_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "issue_content": issue_content,
+                        "content_hash": content_hash,
+                        "issue_id": issue_id,
+                        "issue_identifier": issue_identifier,
+                        "issue_title": issue_title,
+                        "formatted_issue": formatted_issue,
+                        "comment_count": comment_count,
+                    }
                )

-                # Batch commit every 10 documents
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(issues_to_process)} documents")
+
+        for item in issues_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata = {
+                        "issue_key": item["issue_identifier"],
+                        "issue_title": item["issue_title"],
+                        "status": item["formatted_issue"].get("status", "Unknown"),
+                        "priority": item["formatted_issue"].get("priority", "Unknown"),
+                        "comment_count": item["comment_count"],
+                        "document_type": "Jira Issue",
+                        "connector_type": "Jira",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["issue_content"], user_llm, document_metadata
+                    )
+                else:
+                    # Fallback to simple summary if no LLM configured
+                    summary_content = f"Jira Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['formatted_issue'].get('status', 'Unknown')}\n\n"
+                    if item["formatted_issue"].get("description"):
+                        summary_content += f"Description: {item['formatted_issue'].get('description')}\n\n"
+                    summary_content += f"Comments: {item['comment_count']}"
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                # Process chunks - using the full issue content with comments
+                chunks = await create_document_chunks(item["issue_content"])
+
+                # Update document to READY with actual content
+                document.title = f"{item['issue_identifier']}: {item['issue_title']}"
+                document.content = summary_content
+                document.content_hash = item["content_hash"]
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "issue_id": item["issue_id"],
+                    "issue_identifier": item["issue_identifier"],
+                    "issue_title": item["issue_title"],
+                    "state": item["formatted_issue"].get("status", "Unknown"),
+                    "comment_count": item["comment_count"],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                if documents_indexed % 10 == 0:
                    logger.info(
                        f"Committing batch: {documents_indexed} Jira issues processed so far"
@ -399,48 +414,75 @@ async def index_jira_issues(

            except Exception as e:
                logger.error(
-                    f"Error processing issue {issue.get('identifier', 'Unknown')}: {e!s}",
+                    f"Error processing issue {item.get('issue_identifier', 'Unknown')}: {e!s}",
                    exc_info=True,
                )
-                skipped_issues.append(
-                    f"{issue.get('identifier', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
+                documents_failed += 1
                continue  # Skip this issue and continue with others

-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        await update_connector_last_indexed(session, connector, update_last_indexed)

-        # Final commit for any remaining documents not yet committed in batches
+        # Final commit to ensure all documents are persisted (safety net)
        logger.info(f"Final commit: Total {documents_indexed} Jira issues processed")
-        await session.commit()
-        logger.info("Successfully committed all JIRA document changes to database")
+        try:
+            await session.commit()
+            logger.info("Successfully committed all JIRA document changes to database")
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same issue was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None

        # Log success
        await task_logger.log_task_success(
            log_entry,
            f"Successfully completed JIRA indexing for connector {connector_id}",
            {
-                "issues_processed": total_processed,
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
-                "skipped_issues_count": len(skipped_issues),
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
            },
        )

        logger.info(
-            f"JIRA indexing completed: {documents_indexed} new issues, {documents_skipped} skipped"
+            f"JIRA indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
        )

        # Clean up the connector
        await jira_client.close()

-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
+        return documents_indexed, warning_message

    except SQLAlchemyError as db_error:
        await session.rollback()
--- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
@ -1,5 +1,9 @@
 """
 Linear connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """

 import time
@ -11,7 +15,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
 from app.connectors.linear_connector import LinearConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -28,6 +32,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -196,6 +201,7 @@ async def index_linear_issues(
        # Track the number of documents indexed
        documents_indexed = 0
        documents_skipped = 0
+        documents_failed = 0  # Track issues that failed processing
        skipped_issues = []

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
@ -207,16 +213,14 @@ async def index_linear_issues(
            {"stage": "process_issues", "total_issues": len(issues)},
        )

-        # Process each issue
-        for issue in issues:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
+        # =======================================================================
+        # PHASE 1: Analyze all issues, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        issues_to_process = []  # List of dicts with document and issue data
+        new_documents_created = False

+        for issue in issues:
            try:
                issue_id = issue.get("id", "")
                issue_identifier = issue.get("identifier", "")
@ -262,80 +266,39 @@ async def index_linear_issues(
                state = formatted_issue.get("state", "Unknown")
                description = formatted_issue.get("description", "")
                comment_count = len(formatted_issue.get("comments", []))
+                priority = formatted_issue.get("priority", "Unknown")

                if existing_document:
                    # Document exists - check if content has changed
                    if existing_document.content_hash == content_hash:
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
+                            existing_document.status = DocumentStatus.ready()
                        logger.info(
                            f"Document for Linear issue {issue_identifier} unchanged. Skipping."
                        )
                        documents_skipped += 1
                        continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Linear issue {issue_identifier}. Updating document."
-                        )

-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "issue_id": issue_identifier,
-                                "issue_title": issue_title,
-                                "state": state,
-                                "priority": formatted_issue.get("priority", "Unknown"),
-                                "comment_count": comment_count,
-                                "document_type": "Linear Issue",
-                                "connector_type": "Linear",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                issue_content, user_llm, document_metadata
-                            )
-                        else:
-                            # Fallback to simple summary if no LLM configured
-                            if description and len(description) > 1000:
-                                description = description[:997] + "..."
-                            summary_content = f"Linear Issue {issue_identifier}: {issue_title}\n\nStatus: {state}\n\n"
-                            if description:
-                                summary_content += f"Description: {description}\n\n"
-                            summary_content += f"Comments: {comment_count}"
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(issue_content)
-
-                        # Update existing document
-                        existing_document.title = (
-                            f"Linear - {issue_identifier}: {issue_title}"
-                        )
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    issues_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "issue_content": issue_content,
+                            "content_hash": content_hash,
                            "issue_id": issue_id,
                            "issue_identifier": issue_identifier,
                            "issue_title": issue_title,
                            "state": state,
+                            "description": description,
                            "comment_count": comment_count,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                            "priority": priority,
                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(
-                            f"Successfully updated Linear issue {issue_identifier}"
-                        )
-                        continue
+                    )
+                    continue

                # Document doesn't exist by unique_identifier_hash
                # Check if a document with the same content_hash exists (from another connector)
@ -353,51 +316,10 @@ async def index_linear_issues(
                    documents_skipped += 1
                    continue

-                # Document doesn't exist - create new one
-                # Generate summary with metadata
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "issue_id": issue_identifier,
-                        "issue_title": issue_title,
-                        "state": state,
-                        "priority": formatted_issue.get("priority", "Unknown"),
-                        "comment_count": comment_count,
-                        "document_type": "Linear Issue",
-                        "connector_type": "Linear",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        issue_content, user_llm, document_metadata
-                    )
-                else:
-                    # Fallback to simple summary if no LLM configured
-                    # Truncate description if it's too long for the summary
-                    if description and len(description) > 1000:
-                        description = description[:997] + "..."
-                    summary_content = f"Linear Issue {issue_identifier}: {issue_title}\n\nStatus: {state}\n\n"
-                    if description:
-                        summary_content += f"Description: {description}\n\n"
-                    summary_content += f"Comments: {comment_count}"
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                # Process chunks - using the full issue content with comments
-                chunks = await create_document_chunks(issue_content)
-
-                # Create and store new document
-                logger.info(
-                    f"Creating new document for issue {issue_identifier} - {issue_title}"
-                )
+                # Create new document with PENDING status (visible in UI immediately)
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Linear - {issue_identifier}: {issue_title}",
+                    title=f"{issue_identifier}: {issue_title}",
                    document_type=DocumentType.LINEAR_CONNECTOR,
                    document_metadata={
                        "issue_id": issue_id,
@ -405,25 +327,126 @@ async def index_linear_issues(
                        "issue_title": issue_title,
                        "state": state,
                        "comment_count": comment_count,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        "connector_id": connector_id,
                    },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                    updated_at=get_current_timestamp(),
                    created_by_id=user_id,
                    connector_id=connector_id,
                )
-
                session.add(document)
-                documents_indexed += 1
-                logger.info(
-                    f"Successfully indexed new issue {issue_identifier} - {issue_title}"
+                new_documents_created = True
+
+                issues_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "issue_content": issue_content,
+                        "content_hash": content_hash,
+                        "issue_id": issue_id,
+                        "issue_identifier": issue_identifier,
+                        "issue_title": issue_title,
+                        "state": state,
+                        "description": description,
+                        "comment_count": comment_count,
+                        "priority": priority,
+                    }
                )

-                # Batch commit every 10 documents
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(issues_to_process)} documents")
+
+        for item in issues_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "issue_id": item["issue_identifier"],
+                        "issue_title": item["issue_title"],
+                        "state": item["state"],
+                        "priority": item["priority"],
+                        "comment_count": item["comment_count"],
+                        "document_type": "Linear Issue",
+                        "connector_type": "Linear",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["issue_content"], user_llm, document_metadata_for_summary
+                    )
+                else:
+                    # Fallback to simple summary if no LLM configured
+                    description = item["description"]
+                    if description and len(description) > 1000:
+                        description = description[:997] + "..."
+                    summary_content = f"Linear Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['state']}\n\n"
+                    if description:
+                        summary_content += f"Description: {description}\n\n"
+                    summary_content += f"Comments: {item['comment_count']}"
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(item["issue_content"])
+
+                # Update document to READY with actual content
+                document.title = f"{item['issue_identifier']}: {item['issue_title']}"
+                document.content = summary_content
+                document.content_hash = item["content_hash"]
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "issue_id": item["issue_id"],
+                    "issue_identifier": item["issue_identifier"],
+                    "issue_title": item["issue_title"],
+                    "state": item["state"],
+                    "comment_count": item["comment_count"],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                if documents_indexed % 10 == 0:
                    logger.info(
                        f"Committing batch: {documents_indexed} Linear issues processed so far"
@ -432,44 +455,72 @@ async def index_linear_issues(

            except Exception as e:
                logger.error(
-                    f"Error processing issue {issue.get('identifier', 'Unknown')}: {e!s}",
+                    f"Error processing issue {item.get('issue_identifier', 'Unknown')}: {e!s}",
                    exc_info=True,
                )
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                skipped_issues.append(
-                    f"{issue.get('identifier', 'Unknown')} (processing error)"
+                    f"{item.get('issue_identifier', 'Unknown')} (processing error)"
                )
-                documents_skipped += 1
-                continue  # Skip this issue and continue with others
+                documents_failed += 1
+                continue

-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)

        # Final commit for any remaining documents not yet committed in batches
        logger.info(f"Final commit: Total {documents_indexed} Linear issues processed")
-        await session.commit()
-        logger.info("Successfully committed all Linear document changes to database")
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Linear document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same issue was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None

        # Log success
        await task_logger.log_task_success(
            log_entry,
            f"Successfully completed Linear indexing for connector {connector_id}",
            {
-                "issues_processed": total_processed,
+                "issues_processed": documents_indexed,
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                "skipped_issues_count": len(skipped_issues),
            },
        )

        logger.info(
-            f"Linear indexing completed: {documents_indexed} new issues, {documents_skipped} skipped"
+            f"Linear indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed"
        )
-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
+        return documents_indexed, warning_message

    except SQLAlchemyError as db_error:
        await session.rollback()
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@ -1,5 +1,9 @@
 """
 Luma connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Collect all events and create pending documents (visible in UI immediately)
+- Phase 2: Process each event: pending → processing → ready/failed
 """

 import time
@ -11,7 +15,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
 from app.connectors.luma_connector import LumaConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -27,6 +31,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -227,21 +232,22 @@ async def index_luma_events(
            logger.error(f"Error fetching Luma events: {e!s}", exc_info=True)
            return 0, f"Error fetching Luma events: {e!s}"

+        # =======================================================================
+        # PHASE 1: Analyze all events, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
        documents_indexed = 0
        documents_skipped = 0
+        documents_failed = 0
        skipped_events = []

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
        last_heartbeat_time = time.time()

+        events_to_process = []  # List of dicts with document and event data
+        new_documents_created = False
+
        for event in events:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
            try:
                # Luma event structure fields - events have nested 'event' field
                event_data = event.get("event", {})
@ -298,91 +304,38 @@ async def index_luma_events(
                if existing_document:
                    # Document exists - check if content has changed
                    if existing_document.content_hash == content_hash:
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
+                            existing_document.status = DocumentStatus.ready()
                        logger.info(
                            f"Document for Luma event {event_name} unchanged. Skipping."
                        )
                        documents_skipped += 1
                        continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Luma event {event_name}. Updating document."
-                        )

-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "event_id": event_id,
-                                "event_name": event_name,
-                                "event_url": event_url,
-                                "start_at": start_at,
-                                "end_at": end_at,
-                                "timezone": timezone,
-                                "location": location or "No location",
-                                "city": city,
-                                "hosts": host_names,
-                                "document_type": "Luma Event",
-                                "connector_type": "Luma",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                event_markdown, user_llm, document_metadata
-                            )
-                        else:
-                            summary_content = f"Luma Event: {event_name}\n\n"
-                            if event_url:
-                                summary_content += f"URL: {event_url}\n"
-                            summary_content += f"Start: {start_at}\n"
-                            summary_content += f"End: {end_at}\n"
-                            if timezone:
-                                summary_content += f"Timezone: {timezone}\n"
-                            if location:
-                                summary_content += f"Location: {location}\n"
-                            if city:
-                                summary_content += f"City: {city}\n"
-                            if host_names:
-                                summary_content += f"Hosts: {host_names}\n"
-                            if description:
-                                desc_preview = description[:1000]
-                                if len(description) > 1000:
-                                    desc_preview += "..."
-                                summary_content += f"Description: {desc_preview}\n"
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(event_markdown)
-
-                        # Update existing document
-                        existing_document.title = f"Luma Event - {event_name}"
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    events_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
                            "event_id": event_id,
                            "event_name": event_name,
                            "event_url": event_url,
+                            "event_markdown": event_markdown,
+                            "content_hash": content_hash,
                            "start_at": start_at,
                            "end_at": end_at,
                            "timezone": timezone,
                            "location": location,
                            "city": city,
-                            "hosts": host_names,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                            "host_names": host_names,
+                            "description": description,
+                            "cover_url": cover_url,
                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(f"Successfully updated Luma event {event_name}")
-                        continue
+                    )
+                    continue

                # Document doesn't exist by unique_identifier_hash
                # Check if a document with the same content_hash exists (from another connector)
@ -400,62 +353,10 @@ async def index_luma_events(
                    documents_skipped += 1
                    continue

-                # Document doesn't exist - create new one
-                # Generate summary with metadata
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "event_id": event_id,
-                        "event_name": event_name,
-                        "event_url": event_url,
-                        "start_at": start_at,
-                        "end_at": end_at,
-                        "timezone": timezone,
-                        "location": location or "No location",
-                        "city": city,
-                        "hosts": host_names,
-                        "document_type": "Luma Event",
-                        "connector_type": "Luma",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        event_markdown, user_llm, document_metadata
-                    )
-                else:
-                    # Fallback to simple summary if no LLM configured
-                    summary_content = f"Luma Event: {event_name}\n\n"
-                    if event_url:
-                        summary_content += f"URL: {event_url}\n"
-                    summary_content += f"Start: {start_at}\n"
-                    summary_content += f"End: {end_at}\n"
-                    if timezone:
-                        summary_content += f"Timezone: {timezone}\n"
-                    if location:
-                        summary_content += f"Location: {location}\n"
-                    if city:
-                        summary_content += f"City: {city}\n"
-                    if host_names:
-                        summary_content += f"Hosts: {host_names}\n"
-                    if description:
-                        desc_preview = description[:1000]
-                        if len(description) > 1000:
-                            desc_preview += "..."
-                        summary_content += f"Description: {desc_preview}\n"
-
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                chunks = await create_document_chunks(event_markdown)
-
+                # Create new document with PENDING status (visible in UI immediately)
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Luma Event - {event_name}",
+                    title=event_name,
                    document_type=DocumentType.LUMA_CONNECTOR,
                    document_metadata={
                        "event_id": event_id,
@ -468,23 +369,151 @@ async def index_luma_events(
                        "city": city,
                        "hosts": host_names,
                        "cover_url": cover_url,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        "connector_id": connector_id,
                    },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                    updated_at=get_current_timestamp(),
                    created_by_id=user_id,
                    connector_id=connector_id,
                )
-
                session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new event {event_name}")
+                new_documents_created = True

-                # Batch commit every 10 documents
+                events_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "event_id": event_id,
+                        "event_name": event_name,
+                        "event_url": event_url,
+                        "event_markdown": event_markdown,
+                        "content_hash": content_hash,
+                        "start_at": start_at,
+                        "end_at": end_at,
+                        "timezone": timezone,
+                        "location": location,
+                        "city": city,
+                        "host_names": host_names,
+                        "description": description,
+                        "cover_url": cover_url,
+                    }
+                )
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(events_to_process)} documents")
+
+        for item in events_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "event_id": item["event_id"],
+                        "event_name": item["event_name"],
+                        "event_url": item["event_url"],
+                        "start_at": item["start_at"],
+                        "end_at": item["end_at"],
+                        "timezone": item["timezone"],
+                        "location": item["location"] or "No location",
+                        "city": item["city"],
+                        "hosts": item["host_names"],
+                        "document_type": "Luma Event",
+                        "connector_type": "Luma",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["event_markdown"], user_llm, document_metadata_for_summary
+                    )
+                else:
+                    # Fallback to simple summary if no LLM configured
+                    summary_content = f"Luma Event: {item['event_name']}\n\n"
+                    if item["event_url"]:
+                        summary_content += f"URL: {item['event_url']}\n"
+                    summary_content += f"Start: {item['start_at']}\n"
+                    summary_content += f"End: {item['end_at']}\n"
+                    if item["timezone"]:
+                        summary_content += f"Timezone: {item['timezone']}\n"
+                    if item["location"]:
+                        summary_content += f"Location: {item['location']}\n"
+                    if item["city"]:
+                        summary_content += f"City: {item['city']}\n"
+                    if item["host_names"]:
+                        summary_content += f"Hosts: {item['host_names']}\n"
+                    if item["description"]:
+                        desc_preview = item["description"][:1000]
+                        if len(item["description"]) > 1000:
+                            desc_preview += "..."
+                        summary_content += f"Description: {desc_preview}\n"
+
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(item["event_markdown"])
+
+                # Update document to READY with actual content
+                document.title = item["event_name"]
+                document.content = summary_content
+                document.content_hash = item["content_hash"]
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "event_id": item["event_id"],
+                    "event_name": item["event_name"],
+                    "event_url": item["event_url"],
+                    "start_at": item["start_at"],
+                    "end_at": item["end_at"],
+                    "timezone": item["timezone"],
+                    "location": item["location"],
+                    "city": item["city"],
+                    "hosts": item["host_names"],
+                    "cover_url": item["cover_url"],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                if documents_indexed % 10 == 0:
                    logger.info(
                        f"Committing batch: {documents_indexed} Luma events processed so far"
@ -493,38 +522,71 @@ async def index_luma_events(

            except Exception as e:
                logger.error(
-                    f"Error processing event {event.get('name', 'Unknown')}: {e!s}",
+                    f"Error processing event {item.get('event_name', 'Unknown')}: {e!s}",
                    exc_info=True,
                )
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                skipped_events.append(
-                    f"{event.get('name', 'Unknown')} (processing error)"
+                    f"{item.get('event_name', 'Unknown')} (processing error)"
                )
-                documents_skipped += 1
+                documents_failed += 1
                continue

-        total_processed = documents_indexed
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        await update_connector_last_indexed(session, connector, update_last_indexed)

        # Final commit for any remaining documents not yet committed in batches
        logger.info(f"Final commit: Total {documents_indexed} Luma events processed")
-        await session.commit()
+        try:
+            await session.commit()
+            logger.info("Successfully committed all Luma document changes to database")
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same event was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None

        await task_logger.log_task_success(
            log_entry,
            f"Successfully completed Luma indexing for connector {connector_id}",
            {
-                "events_processed": total_processed,
+                "events_processed": documents_indexed,
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                "skipped_events_count": len(skipped_events),
            },
        )

        logger.info(
-            f"Luma indexing completed: {documents_indexed} new events, {documents_skipped} skipped"
+            f"Luma indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed"
        )
-        return total_processed, None
+        return documents_indexed, warning_message

    except SQLAlchemyError as db_error:
        await session.rollback()
--- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
@ -1,5 +1,9 @@
 """
 Notion connector indexer.
+
+Implements real-time document status updates using a two-phase approach:
+- Phase 1: Create all documents with PENDING status (visible in UI immediately)
+- Phase 2: Process each document one by one (pending → processing → ready/failed)
 """

 import time
@ -9,8 +13,9 @@ from datetime import datetime
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession

+from app.config import config
 from app.connectors.notion_history import NotionHistoryConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -28,6 +33,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -245,12 +251,17 @@ async def index_notion_pages(
                {"pages_found": 0},
            )
            logger.info("No Notion pages found to index")
+            # CRITICAL: Update timestamp even when no pages found so Electric SQL syncs
+            await update_connector_last_indexed(session, connector, update_last_indexed)
+            await session.commit()
            await notion_client.close()
            return 0, None  # Success with 0 pages, not an error

        # Track the number of documents indexed
        documents_indexed = 0
        documents_skipped = 0
+        documents_failed = 0
+        duplicate_content_count = 0
        skipped_pages = []

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
@ -262,22 +273,69 @@ async def index_notion_pages(
            {"stage": "process_pages", "total_pages": len(pages)},
        )

-        # Process each page
-        for page in pages:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
+        # =======================================================================
+        # PHASE 1: Analyze all pages, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        pages_to_process = []  # List of dicts with document and page data
+        new_documents_created = False

+        # Helper function to convert page content to markdown
+        def process_blocks(blocks, level=0):
+            result = ""
+            for block in blocks:
+                block_type = block.get("type")
+                block_content = block.get("content", "")
+                children = block.get("children", [])
+
+                # Add indentation based on level
+                indent = "  " * level
+
+                # Format based on block type
+                if block_type in ["paragraph", "text"]:
+                    result += f"{indent}{block_content}\n\n"
+                elif block_type in ["heading_1", "header"]:
+                    result += f"{indent}# {block_content}\n\n"
+                elif block_type == "heading_2":
+                    result += f"{indent}## {block_content}\n\n"
+                elif block_type == "heading_3":
+                    result += f"{indent}### {block_content}\n\n"
+                elif block_type == "bulleted_list_item":
+                    result += f"{indent}* {block_content}\n"
+                elif block_type == "numbered_list_item":
+                    result += f"{indent}1. {block_content}\n"
+                elif block_type == "to_do":
+                    result += f"{indent}- [ ] {block_content}\n"
+                elif block_type == "toggle":
+                    result += f"{indent}> {block_content}\n"
+                elif block_type == "code":
+                    result += f"{indent}```\n{block_content}\n```\n\n"
+                elif block_type == "quote":
+                    result += f"{indent}> {block_content}\n\n"
+                elif block_type == "callout":
+                    result += f"{indent}> **Note:** {block_content}\n\n"
+                elif block_type == "image":
+                    result += f"{indent}![Image]({block_content})\n\n"
+                else:
+                    # Default for other block types
+                    if block_content:
+                        result += f"{indent}{block_content}\n\n"
+
+                # Process children recursively
+                if children:
+                    result += process_blocks(children, level + 1)
+
+            return result
+
+        for page in pages:
            try:
                page_id = page.get("page_id")
                page_title = page.get("title", f"Untitled page ({page_id})")
                page_content = page.get("content", [])

-                logger.info(f"Processing Notion page: {page_title} ({page_id})")
+                if not page_id:
+                    documents_skipped += 1
+                    continue

                if not page_content:
                    logger.info(f"No content found in page {page_title}. Skipping.")
@ -287,57 +345,6 @@ async def index_notion_pages(

                # Convert page content to markdown format
                markdown_content = f"# Notion Page: {page_title}\n\n"
-
-                # Process blocks recursively
-                def process_blocks(blocks, level=0):
-                    result = ""
-                    for block in blocks:
-                        block_type = block.get("type")
-                        block_content = block.get("content", "")
-                        children = block.get("children", [])
-
-                        # Add indentation based on level
-                        indent = "  " * level
-
-                        # Format based on block type
-                        if block_type in ["paragraph", "text"]:
-                            result += f"{indent}{block_content}\n\n"
-                        elif block_type in ["heading_1", "header"]:
-                            result += f"{indent}# {block_content}\n\n"
-                        elif block_type == "heading_2":
-                            result += f"{indent}## {block_content}\n\n"
-                        elif block_type == "heading_3":
-                            result += f"{indent}### {block_content}\n\n"
-                        elif block_type == "bulleted_list_item":
-                            result += f"{indent}* {block_content}\n"
-                        elif block_type == "numbered_list_item":
-                            result += f"{indent}1. {block_content}\n"
-                        elif block_type == "to_do":
-                            result += f"{indent}- [ ] {block_content}\n"
-                        elif block_type == "toggle":
-                            result += f"{indent}> {block_content}\n"
-                        elif block_type == "code":
-                            result += f"{indent}```\n{block_content}\n```\n\n"
-                        elif block_type == "quote":
-                            result += f"{indent}> {block_content}\n\n"
-                        elif block_type == "callout":
-                            result += f"{indent}> **Note:** {block_content}\n\n"
-                        elif block_type == "image":
-                            result += f"{indent}![Image]({block_content})\n\n"
-                        else:
-                            # Default for other block types
-                            if block_content:
-                                result += f"{indent}{block_content}\n\n"
-
-                        # Process children recursively
-                        if children:
-                            result += process_blocks(children, level + 1)
-
-                    return result
-
-                logger.debug(
-                    f"Converting {len(page_content)} blocks to markdown for page {page_title}"
-                )
                markdown_content += process_blocks(page_content)

                # Format document metadata
@ -377,71 +384,26 @@ async def index_notion_pages(
                if existing_document:
                    # Document exists - check if content has changed
                    if existing_document.content_hash == content_hash:
-                        logger.info(
-                            f"Document for Notion page {page_title} unchanged. Skipping."
-                        )
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
+                            existing_document.status = DocumentStatus.ready()
                        documents_skipped += 1
                        continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Notion page {page_title}. Updating document."
-                        )

-                        # Get user's long context LLM
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-                        if not user_llm:
-                            logger.error(
-                                f"No long context LLM configured for user {user_id}"
-                            )
-                            skipped_pages.append(f"{page_title} (no LLM configured)")
-                            documents_skipped += 1
-                            continue
-
-                        # Generate summary with metadata
-                        document_metadata = {
-                            "page_title": page_title,
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    pages_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "markdown_content": markdown_content,
+                            "content_hash": content_hash,
                            "page_id": page_id,
-                            "document_type": "Notion Page",
-                            "connector_type": "Notion",
-                        }
-                        (
-                            summary_content,
-                            summary_embedding,
-                        ) = await generate_document_summary(
-                            markdown_content, user_llm, document_metadata
-                        )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(markdown_content)
-
-                        # Update existing document
-                        existing_document.title = f"Notion - {page_title}"
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
                            "page_title": page_title,
-                            "page_id": page_id,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-                        existing_document.connector_id = connector_id
-
-                        documents_indexed += 1
-                        logger.info(f"Successfully updated Notion page: {page_title}")
-
-                        # Batch commit every 10 documents
-                        if documents_indexed % 10 == 0:
-                            logger.info(
-                                f"Committing batch: {documents_indexed} documents processed so far"
-                            )
-                            await session.commit()
-
-                        continue
+                    )
+                    continue

                # Document doesn't exist by unique_identifier_hash
                # Check if a document with the same content_hash exists (from another connector)
@ -456,91 +418,182 @@ async def index_notion_pages(
                        f"(existing document ID: {duplicate_by_content.id}, "
                        f"type: {duplicate_by_content.document_type}). Skipping."
                    )
+                    duplicate_content_count += 1
                    documents_skipped += 1
                    continue

-                # Document doesn't exist - create new one
-                # Get user's long context LLM
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-                if not user_llm:
-                    logger.error(f"No long context LLM configured for user {user_id}")
-                    skipped_pages.append(f"{page_title} (no LLM configured)")
-                    documents_skipped += 1
-                    continue
-
-                # Generate summary with metadata
-                logger.debug(f"Generating summary for page {page_title}")
-                document_metadata = {
-                    "page_title": page_title,
-                    "page_id": page_id,
-                    "document_type": "Notion Page",
-                    "connector_type": "Notion",
-                }
-                summary_content, summary_embedding = await generate_document_summary(
-                    markdown_content, user_llm, document_metadata
-                )
-
-                # Process chunks
-                logger.debug(f"Chunking content for page {page_title}")
-                chunks = await create_document_chunks(markdown_content)
-
-                # Create and store new document
+                # Create new document with PENDING status (visible in UI immediately)
                document = Document(
                    search_space_id=search_space_id,
-                    title=f"Notion - {page_title}",
+                    title=page_title,
                    document_type=DocumentType.NOTION_CONNECTOR,
                    document_metadata={
                        "page_title": page_title,
                        "page_id": page_id,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        "connector_id": connector_id,
                    },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                    updated_at=get_current_timestamp(),
                    created_by_id=user_id,
                    connector_id=connector_id,
                )
-
                session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new Notion page: {page_title}")
+                new_documents_created = True

-                # Batch commit every 10 documents
+                pages_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "markdown_content": markdown_content,
+                        "content_hash": content_hash,
+                        "page_id": page_id,
+                        "page_title": page_title,
+                    }
+                )
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(pages_to_process)} documents")
+
+        for item in pages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "page_title": item["page_title"],
+                        "page_id": item["page_id"],
+                        "document_type": "Notion Page",
+                        "connector_type": "Notion",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["markdown_content"],
+                        user_llm,
+                        document_metadata_for_summary,
+                    )
+                else:
+                    # Fallback to simple summary if no LLM configured
+                    summary_content = f"Notion Page: {item['page_title']}\n\n{item['markdown_content'][:500]}..."
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(item["markdown_content"])
+
+                # Update document to READY with actual content
+                document.title = item["page_title"]
+                document.content = summary_content
+                document.content_hash = item["content_hash"]
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "page_title": item["page_title"],
+                    "page_id": item["page_id"],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                if documents_indexed % 10 == 0:
                    logger.info(
-                        f"Committing batch: {documents_indexed} documents processed so far"
+                        f"Committing batch: {documents_indexed} Notion pages processed so far"
                    )
                    await session.commit()

            except Exception as e:
-                logger.error(
-                    f"Error processing Notion page {page.get('title', 'Unknown')}: {e!s}",
-                    exc_info=True,
-                )
-                skipped_pages.append(
-                    f"{page.get('title', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
-                continue  # Skip this page and continue with others
+                logger.error(f"Error processing Notion page: {e!s}", exc_info=True)
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
+                skipped_pages.append(f"{item['page_title']} (processing error)")
+                documents_failed += 1
+                continue
+
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)

-        # Update the last_indexed_at timestamp for the connector only if requested
-        # and if we successfully indexed at least one page
        total_processed = documents_indexed
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)

-        # Final commit for any remaining documents not yet committed in batches
+        # Final commit to ensure all documents are persisted (safety net)
        logger.info(f"Final commit: Total {documents_indexed} documents processed")
-        await session.commit()
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Notion document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same page was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise

        # Get final count of pages with skipped Notion AI content
        pages_with_skipped_ai_content = notion_client.get_skipped_content_count()

+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
+
        # Prepare result message with user-friendly notification about skipped content
        result_message = None
        if skipped_pages:
@ -563,6 +616,8 @@ async def index_notion_pages(
                "pages_processed": total_processed,
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
                "skipped_pages_count": len(skipped_pages),
                "pages_with_skipped_ai_content": pages_with_skipped_ai_content,
                "result_message": result_message,
@ -570,7 +625,9 @@ async def index_notion_pages(
        )

        logger.info(
-            f"Notion indexing completed: {documents_indexed} new pages, {documents_skipped} skipped"
+            f"Notion indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
        )

        # Clean up the async client
@ -590,6 +647,10 @@ async def index_notion_pages(
                "Using legacy token. Reconnect with OAuth for better reliability."
            )

+        # Include warning message if there were issues
+        if warning_message:
+            notification_parts.append(warning_message)
+
        user_notification_message = (
            " ".join(notification_parts) if notification_parts else None
        )
--- a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py
@ -3,6 +3,10 @@ Obsidian connector indexer.

 Indexes markdown notes from a local Obsidian vault.
 This connector is only available in self-hosted mode.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """

 import os
@ -17,7 +21,7 @@ from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -34,6 +38,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -307,25 +312,22 @@ async def index_obsidian_vault(

        logger.info(f"Processing {len(files)} files after date filtering")

-        # Get LLM for summarization
-        long_context_llm = await get_user_long_context_llm(
-            session, user_id, search_space_id
-        )
-
        indexed_count = 0
        skipped_count = 0
+        failed_count = 0
+        duplicate_content_count = 0

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
        last_heartbeat_time = time.time()

+        # =======================================================================
+        # PHASE 1: Analyze all files, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        files_to_process = []  # List of dicts with document and file data
+        new_documents_created = False
+
        for file_info in files:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(indexed_count)
-                last_heartbeat_time = time.time()
            try:
                file_path = file_info["path"]
                relative_path = file_info["relative_path"]
@ -368,13 +370,151 @@ async def index_obsidian_vault(
                    search_space_id,
                )

+                # Generate content hash
+                content_hash = generate_content_hash(content, search_space_id)
+
                # Check for existing document
                existing_document = await check_document_by_unique_identifier(
                    session, unique_identifier_hash
                )

-                # Generate content hash
-                content_hash = generate_content_hash(content, search_space_id)
+                if existing_document:
+                    # Document exists - check if content has changed
+                    if existing_document.content_hash == content_hash:
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
+                            existing_document.status = DocumentStatus.ready()
+                        logger.debug(f"Note {title} unchanged, skipping")
+                        skipped_count += 1
+                        continue
+
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    files_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "file_info": file_info,
+                            "content": content,
+                            "body_content": body_content,
+                            "frontmatter": frontmatter,
+                            "wiki_links": wiki_links,
+                            "tags": tags,
+                            "title": title,
+                            "relative_path": relative_path,
+                            "content_hash": content_hash,
+                            "unique_identifier_hash": unique_identifier_hash,
+                        }
+                    )
+                    continue
+
+                # Document doesn't exist by unique_identifier_hash
+                # Check if a document with the same content_hash exists (from another connector)
+                with session.no_autoflush:
+                    duplicate_by_content = await check_duplicate_document_by_hash(
+                        session, content_hash
+                    )
+
+                if duplicate_by_content:
+                    logger.info(
+                        f"Obsidian note {title} already indexed by another connector "
+                        f"(existing document ID: {duplicate_by_content.id}, "
+                        f"type: {duplicate_by_content.document_type}). Skipping."
+                    )
+                    duplicate_content_count += 1
+                    skipped_count += 1
+                    continue
+
+                # Create new document with PENDING status (visible in UI immediately)
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=title,
+                    document_type=DocumentType.OBSIDIAN_CONNECTOR,
+                    document_metadata={
+                        "vault_name": vault_name,
+                        "file_path": relative_path,
+                        "connector_id": connector_id,
+                    },
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+                    unique_identifier_hash=unique_identifier_hash,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
+                    updated_at=get_current_timestamp(),
+                    created_by_id=user_id,
+                    connector_id=connector_id,
+                )
+                session.add(document)
+                new_documents_created = True
+
+                files_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "file_info": file_info,
+                        "content": content,
+                        "body_content": body_content,
+                        "frontmatter": frontmatter,
+                        "wiki_links": wiki_links,
+                        "tags": tags,
+                        "title": title,
+                        "relative_path": relative_path,
+                        "content_hash": content_hash,
+                        "unique_identifier_hash": unique_identifier_hash,
+                    }
+                )
+
+            except Exception as e:
+                logger.exception(
+                    f"Error in Phase 1 for file {file_info.get('path', 'unknown')}: {e}"
+                )
+                failed_count += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(files_to_process)} documents")
+
+        # Get LLM for summarization
+        long_context_llm = await get_user_long_context_llm(
+            session, user_id, search_space_id
+        )
+
+        for item in files_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(indexed_count)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Extract data from item
+                title = item["title"]
+                relative_path = item["relative_path"]
+                content = item["content"]
+                body_content = item["body_content"]
+                frontmatter = item["frontmatter"]
+                wiki_links = item["wiki_links"]
+                tags = item["tags"]
+                content_hash = item["content_hash"]
+                file_info = item["file_info"]

                # Build metadata
                document_metadata = {
@ -404,134 +544,114 @@ async def index_obsidian_vault(
                ]
                document_string = build_document_metadata_string(metadata_sections)

-                if existing_document:
-                    # Check if content has changed
-                    if existing_document.content_hash == content_hash:
-                        logger.debug(f"Note {title} unchanged, skipping")
-                        skipped_count += 1
-                        continue
-
-                    # Update existing document
-                    logger.info(f"Updating note: {title}")
-
-                    # Generate new summary if content changed
-                    if long_context_llm:
-                        new_summary, _ = await generate_document_summary(
-                            document_string,
-                            long_context_llm,
-                            document_metadata,
-                        )
-                        # Store summary in metadata
-                        document_metadata["summary"] = new_summary
-
-                    # Add URL and connector_id to metadata
-                    document_metadata["url"] = (
-                        f"obsidian://{vault_name}/{relative_path}"
-                    )
-                    document_metadata["connector_id"] = connector_id
-
-                    existing_document.content = document_string
-                    existing_document.content_hash = content_hash
-                    existing_document.document_metadata = document_metadata
-                    existing_document.updated_at = get_current_timestamp()
-
-                    # Update embedding
-                    embedding = config.embedding_model_instance.embed(document_string)
-                    existing_document.embedding = embedding
-
-                    # Update chunks - delete old and create new
-                    existing_document.chunks.clear()
-                    new_chunks = await create_document_chunks(document_string)
-                    existing_document.chunks = new_chunks
-
-                    indexed_count += 1
-
-                else:
-                    # Document doesn't exist by unique_identifier_hash
-                    # Check if a document with the same content_hash exists (from another connector)
-                    with session.no_autoflush:
-                        duplicate_by_content = await check_duplicate_document_by_hash(
-                            session, content_hash
-                        )
-
-                    if duplicate_by_content:
-                        logger.info(
-                            f"Obsidian note {title} already indexed by another connector "
-                            f"(existing document ID: {duplicate_by_content.id}, "
-                            f"type: {duplicate_by_content.document_type}). Skipping."
-                        )
-                        skipped_count += 1
-                        continue
-
-                    # Create new document
-                    logger.info(f"Indexing new note: {title}")
-
-                    # Generate summary
-                    summary_content = ""
-                    if long_context_llm:
-                        summary_content, _ = await generate_document_summary(
-                            document_string,
-                            long_context_llm,
-                            document_metadata,
-                        )
-
-                    # Generate embedding
-                    embedding = config.embedding_model_instance.embed(document_string)
-
-                    # Add URL and summary to metadata
-                    document_metadata["url"] = (
-                        f"obsidian://{vault_name}/{relative_path}"
-                    )
-                    document_metadata["summary"] = summary_content
-                    document_metadata["connector_id"] = connector_id
-
-                    # Create chunks
-                    chunks = await create_document_chunks(document_string)
-
-                    # Create document
-                    new_document = Document(
-                        search_space_id=search_space_id,
-                        title=title,
-                        document_type=DocumentType.OBSIDIAN_CONNECTOR,
-                        content=document_string,
-                        content_hash=content_hash,
-                        unique_identifier_hash=unique_identifier_hash,
-                        document_metadata=document_metadata,
-                        embedding=embedding,
-                        chunks=chunks,
-                        updated_at=get_current_timestamp(),
-                        created_by_id=user_id,
-                        connector_id=connector_id,
+                # Generate summary
+                summary_content = ""
+                if long_context_llm:
+                    summary_content, _ = await generate_document_summary(
+                        document_string,
+                        long_context_llm,
+                        document_metadata,
                    )

-                    session.add(new_document)
+                # Generate embedding
+                embedding = config.embedding_model_instance.embed(document_string)

-                    indexed_count += 1
+                # Add URL and summary to metadata
+                document_metadata["url"] = f"obsidian://{vault_name}/{relative_path}"
+                document_metadata["summary"] = summary_content
+                document_metadata["connector_id"] = connector_id
+
+                # Create chunks
+                chunks = await create_document_chunks(document_string)
+
+                # Update document to READY with actual content
+                document.title = title
+                document.content = document_string
+                document.content_hash = content_hash
+                document.embedding = embedding
+                document.document_metadata = document_metadata
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                indexed_count += 1
+
+                # Batch commit every 10 documents (for ready status updates)
+                if indexed_count % 10 == 0:
+                    logger.info(
+                        f"Committing batch: {indexed_count} Obsidian notes processed so far"
+                    )
+                    await session.commit()

            except Exception as e:
                logger.exception(
-                    f"Error processing file {file_info.get('path', 'unknown')}: {e}"
+                    f"Error processing file {item.get('file_info', {}).get('path', 'unknown')}: {e}"
                )
-                skipped_count += 1
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
+                failed_count += 1
                continue

-        # Update connector's last indexed timestamp
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
        await update_connector_last_indexed(session, connector, update_last_indexed)

-        # Commit all changes
-        await session.commit()
+        # Final commit for any remaining documents not yet committed in batches
+        logger.info(f"Final commit: Total {indexed_count} Obsidian notes processed")
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Obsidian document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same note was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if failed_count > 0:
+            warning_parts.append(f"{failed_count} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
+
+        total_processed = indexed_count

        await task_logger.log_task_success(
            log_entry,
-            f"Successfully indexed {indexed_count} Obsidian notes (skipped {skipped_count})",
+            f"Successfully completed Obsidian vault indexing for connector {connector_id}",
            {
-                "indexed_count": indexed_count,
-                "skipped_count": skipped_count,
-                "total_files": len(files),
+                "notes_processed": total_processed,
+                "documents_indexed": indexed_count,
+                "documents_skipped": skipped_count,
+                "documents_failed": failed_count,
+                "duplicate_content_count": duplicate_content_count,
            },
        )

-        return indexed_count, None
+        logger.info(
+            f"Obsidian vault indexing completed: {indexed_count} ready, "
+            f"{skipped_count} skipped, {failed_count} failed "
+            f"({duplicate_content_count} duplicate content)"
+        )
+        return total_processed, warning_message

    except SQLAlchemyError as e:
        logger.exception(f"Database error during Obsidian indexing: {e}")
--- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
@ -1,5 +1,9 @@
 """
 Slack connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """

 import time
@ -12,7 +16,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
 from app.connectors.slack_history import SlackHistory
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
@ -28,6 +32,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -168,11 +173,15 @@ async def index_slack_messages(
                f"No Slack channels found for connector {connector_id}",
                {"channels_found": 0},
            )
-            return 0, "No Slack channels found"
+            # CRITICAL: Update timestamp even when no channels found so Electric SQL syncs
+            await update_connector_last_indexed(session, connector, update_last_indexed)
+            await session.commit()
+            return 0, None  # Return None (not error) when no channels found

        # Track the number of documents indexed
        documents_indexed = 0
        documents_skipped = 0
+        documents_failed = 0  # Track messages that failed processing
        skipped_channels = []

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
@ -184,15 +193,14 @@ async def index_slack_messages(
            {"stage": "process_channels", "total_channels": len(channels)},
        )

-        # Process each channel
+        # =======================================================================
+        # PHASE 1: Collect all messages from all channels, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        messages_to_process = []  # List of dicts with document and message data
+        new_documents_created = False
+
        for channel_obj in channels:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
            channel_id = channel_obj["id"]
            channel_name = channel_obj["name"]
            is_private = channel_obj["is_private"]
@ -305,47 +313,33 @@ async def index_slack_messages(
                    if existing_document:
                        # Document exists - check if content has changed
                        if existing_document.content_hash == content_hash:
+                            # Ensure status is ready (might have been stuck in processing/pending)
+                            if not DocumentStatus.is_state(
+                                existing_document.status, DocumentStatus.READY
+                            ):
+                                existing_document.status = DocumentStatus.ready()
                            logger.info(
                                f"Document for Slack message {msg_ts} in channel {channel_name} unchanged. Skipping."
                            )
                            documents_skipped += 1
                            continue
-                        else:
-                            # Content has changed - update the existing document
-                            logger.info(
-                                f"Content changed for Slack message {msg_ts} in channel {channel_name}. Updating document."
-                            )

-                            # Update chunks and embedding
-                            chunks = await create_document_chunks(
-                                combined_document_string
-                            )
-                            doc_embedding = config.embedding_model_instance.embed(
-                                combined_document_string
-                            )
-
-                            # Update existing document
-                            existing_document.content = combined_document_string
-                            existing_document.content_hash = content_hash
-                            existing_document.embedding = doc_embedding
-                            existing_document.document_metadata = {
+                        # Queue existing document for update (will be set to processing in Phase 2)
+                        messages_to_process.append(
+                            {
+                                "document": existing_document,
+                                "is_new": False,
+                                "combined_document_string": combined_document_string,
+                                "content_hash": content_hash,
                                "channel_name": channel_name,
                                "channel_id": channel_id,
+                                "msg_ts": msg_ts,
                                "start_date": start_date_str,
                                "end_date": end_date_str,
                                "message_count": len(formatted_messages),
-                                "indexed_at": datetime.now().strftime(
-                                    "%Y-%m-%d %H:%M:%S"
-                                ),
                            }
-
-                            # Delete old chunks and add new ones
-                            existing_document.chunks = chunks
-                            existing_document.updated_at = get_current_timestamp()
-
-                            documents_indexed += 1
-                            logger.info(f"Successfully updated Slack message {msg_ts}")
-                            continue
+                        )
+                        continue

                    # Document doesn't exist by unique_identifier_hash
                    # Check if a document with the same content_hash exists (from another connector)
@ -363,48 +357,47 @@ async def index_slack_messages(
                        documents_skipped += 1
                        continue

-                    # Document doesn't exist - create new one
-                    # Process chunks
-                    chunks = await create_document_chunks(combined_document_string)
-                    doc_embedding = config.embedding_model_instance.embed(
-                        combined_document_string
-                    )
-
-                    # Create and store new document
+                    # Create new document with PENDING status (visible in UI immediately)
                    document = Document(
                        search_space_id=search_space_id,
-                        title=f"Slack - {channel_name}",
+                        title=channel_name,
                        document_type=DocumentType.SLACK_CONNECTOR,
                        document_metadata={
                            "channel_name": channel_name,
                            "channel_id": channel_id,
-                            "start_date": start_date_str,
-                            "end_date": end_date_str,
-                            "message_count": len(formatted_messages),
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                            "msg_ts": msg_ts,
+                            "connector_id": connector_id,
                        },
-                        content=combined_document_string,
-                        embedding=doc_embedding,
-                        chunks=chunks,
-                        content_hash=content_hash,
+                        content="Pending...",  # Placeholder until processed
+                        content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                        unique_identifier_hash=unique_identifier_hash,
+                        embedding=None,
+                        chunks=[],  # Empty at creation - safe for async
+                        status=DocumentStatus.pending(),  # Pending until processing starts
                        updated_at=get_current_timestamp(),
                        created_by_id=user_id,
                        connector_id=connector_id,
                    )
-
                    session.add(document)
-                    documents_indexed += 1
+                    new_documents_created = True

-                    # Batch commit every 10 documents
-                    if documents_indexed % 10 == 0:
-                        logger.info(
-                            f"Committing batch: {documents_indexed} Slack channels processed so far"
-                        )
-                        await session.commit()
+                    messages_to_process.append(
+                        {
+                            "document": document,
+                            "is_new": True,
+                            "combined_document_string": combined_document_string,
+                            "content_hash": content_hash,
+                            "channel_name": channel_name,
+                            "channel_id": channel_id,
+                            "msg_ts": msg_ts,
+                            "start_date": start_date_str,
+                            "end_date": end_date_str,
+                            "message_count": len(formatted_messages),
+                        }
+                    )

                logger.info(
-                    f"Successfully indexed new channel {channel_name} with {len(formatted_messages)} messages"
+                    f"Phase 1: Collected {len(formatted_messages)} messages from channel {channel_name}"
                )

            except SlackApiError as slack_error:
@ -420,43 +413,129 @@ async def index_slack_messages(
                documents_skipped += 1
                continue  # Skip this channel and continue with others

-        # Update the last_indexed_at timestamp for the connector only if requested
-        # and if we successfully indexed at least one channel
-        total_processed = documents_indexed
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(messages_to_process)} documents")
+
+        for item in messages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (embeddings, chunks)
+                chunks = await create_document_chunks(item["combined_document_string"])
+                doc_embedding = config.embedding_model_instance.embed(
+                    item["combined_document_string"]
+                )
+
+                # Update document to READY with actual content
+                document.title = item["channel_name"]
+                document.content = item["combined_document_string"]
+                document.content_hash = item["content_hash"]
+                document.embedding = doc_embedding
+                document.document_metadata = {
+                    "channel_name": item["channel_name"],
+                    "channel_id": item["channel_id"],
+                    "start_date": item["start_date"],
+                    "end_date": item["end_date"],
+                    "message_count": item["message_count"],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
+                if documents_indexed % 10 == 0:
+                    logger.info(
+                        f"Committing batch: {documents_indexed} Slack messages processed so far"
+                    )
+                    await session.commit()
+
+            except Exception as e:
+                logger.error(
+                    f"Error processing Slack message {item.get('msg_ts', 'Unknown')}: {e!s}",
+                    exc_info=True,
+                )
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
+                documents_failed += 1
+                continue
+
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)

        # Final commit for any remaining documents not yet committed in batches
-        logger.info(f"Final commit: Total {documents_indexed} Slack channels processed")
-        await session.commit()
+        logger.info(f"Final commit: Total {documents_indexed} Slack messages processed")
+        try:
+            await session.commit()
+            logger.info("Successfully committed all Slack document changes to database")
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same message was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise

-        # Prepare result message
-        result_message = None
-        if skipped_channels:
-            result_message = f"Processed {total_processed} channels. Skipped {len(skipped_channels)} channels: {', '.join(skipped_channels)}"
-        else:
-            result_message = f"Processed {total_processed} channels."
+        # Build warning message if there were issues
+        warning_parts = []
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None

        # Log success
        await task_logger.log_task_success(
            log_entry,
            f"Successfully completed Slack indexing for connector {connector_id}",
            {
-                "channels_processed": total_processed,
+                "channels_processed": len(channels),
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                "skipped_channels_count": len(skipped_channels),
-                "result_message": result_message,
            },
        )

        logger.info(
-            f"Slack indexing completed: {documents_indexed} new channels, {documents_skipped} skipped"
+            f"Slack indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed"
        )
-        return (
-            total_processed,
-            None,
-        )  # Return None on success (result_message is for logging only)
+        return documents_indexed, warning_message

    except SQLAlchemyError as db_error:
        await session.rollback()
--- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
@ -1,17 +1,21 @@
 """
 Microsoft Teams connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """

 import time
 from collections.abc import Awaitable, Callable
-from datetime import UTC
+from datetime import UTC, datetime

 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
 from app.connectors.teams_history import TeamsHistory
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
    create_document_chunks,
@ -27,6 +31,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -50,6 +55,10 @@ async def index_teams_messages(
    """
    Index Microsoft Teams messages from all accessible teams and channels.

+    Implements 2-phase document status updates for real-time UI feedback:
+    - Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+    - Phase 2: Process each document: pending → processing → ready/failed
+
    Args:
        session: Database session
        connector_id: ID of the Teams connector
@ -165,11 +174,16 @@ async def index_teams_messages(
                f"No Teams found for connector {connector_id}",
                {"teams_found": 0},
            )
-            return 0, "No Teams found"
+            # CRITICAL: Update timestamp even when no teams found so Electric SQL syncs
+            await update_connector_last_indexed(session, connector, update_last_indexed)
+            await session.commit()
+            return 0, None  # Return None (not error) when no items found

        # Track the number of documents indexed
        documents_indexed = 0
        documents_skipped = 0
+        documents_failed = 0
+        duplicate_content_count = 0
        skipped_channels = []

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
@ -182,8 +196,6 @@ async def index_teams_messages(
        )

        # Convert date strings to datetime objects for filtering
-        from datetime import datetime
-
        start_datetime = None
        end_datetime = None
        if start_date_str:
@ -197,16 +209,14 @@ async def index_teams_messages(
                hour=23, minute=59, second=59, tzinfo=UTC
            )

-        # Process each team
-        for team in teams:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
+        # =======================================================================
+        # PHASE 1: Collect all messages and create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        messages_to_process = []  # List of dicts with document and message data
+        new_documents_created = False

+        for team in teams:
            team_id = team.get("id")
            team_name = team.get("displayName", "Unknown Team")

@ -239,7 +249,6 @@ async def index_teams_messages(
                                channel_name,
                                team_name,
                            )
-                            documents_skipped += 1
                            continue

                        # Process each message
@ -322,60 +331,33 @@ async def index_teams_messages(
                            if existing_document:
                                # Document exists - check if content has changed
                                if existing_document.content_hash == content_hash:
-                                    logger.info(
-                                        "Document for Teams message %s in channel %s unchanged. Skipping.",
-                                        message_id,
-                                        channel_name,
-                                    )
+                                    # Ensure status is ready (might have been stuck in processing/pending)
+                                    if not DocumentStatus.is_state(
+                                        existing_document.status, DocumentStatus.READY
+                                    ):
+                                        existing_document.status = (
+                                            DocumentStatus.ready()
+                                        )
                                    documents_skipped += 1
                                    continue
-                                else:
-                                    # Content has changed - update the existing document
-                                    logger.info(
-                                        "Content changed for Teams message %s in channel %s. Updating document.",
-                                        message_id,
-                                        channel_name,
-                                    )

-                                    # Update chunks and embedding
-                                    chunks = await create_document_chunks(
-                                        combined_document_string
-                                    )
-                                    doc_embedding = (
-                                        config.embedding_model_instance.embed(
-                                            combined_document_string
-                                        )
-                                    )
-
-                                    # Update existing document
-                                    existing_document.content = combined_document_string
-                                    existing_document.content_hash = content_hash
-                                    existing_document.embedding = doc_embedding
-                                    existing_document.document_metadata = {
+                                # Queue existing document for update (will be set to processing in Phase 2)
+                                messages_to_process.append(
+                                    {
+                                        "document": existing_document,
+                                        "is_new": False,
+                                        "combined_document_string": combined_document_string,
+                                        "content_hash": content_hash,
                                        "team_name": team_name,
                                        "team_id": team_id,
                                        "channel_name": channel_name,
                                        "channel_id": channel_id,
+                                        "message_id": message_id,
                                        "start_date": start_date_str,
                                        "end_date": end_date_str,
-                                        "message_count": len(messages),
-                                        "indexed_at": datetime.now().strftime(
-                                            "%Y-%m-%d %H:%M:%S"
-                                        ),
                                    }
-
-                                    # Delete old chunks and add new ones
-                                    existing_document.chunks = chunks
-                                    existing_document.updated_at = (
-                                        get_current_timestamp()
-                                    )
-
-                                    documents_indexed += 1
-                                    logger.info(
-                                        "Successfully updated Teams message %s",
-                                        message_id,
-                                    )
-                                    continue
+                                )
+                                continue

                            # Document doesn't exist by unique_identifier_hash
                            # Check if a document with the same content_hash exists (from another connector)
@ -395,62 +377,50 @@ async def index_teams_messages(
                                    duplicate_by_content.id,
                                    duplicate_by_content.document_type,
                                )
+                                duplicate_content_count += 1
                                documents_skipped += 1
                                continue

-                            # Document doesn't exist - create new one
-                            # Process chunks
-                            chunks = await create_document_chunks(
-                                combined_document_string
-                            )
-                            doc_embedding = config.embedding_model_instance.embed(
-                                combined_document_string
-                            )
-
-                            # Create and store new document
+                            # Create new document with PENDING status (visible in UI immediately)
                            document = Document(
                                search_space_id=search_space_id,
-                                title=f"Teams - {team_name} - {channel_name}",
+                                title=f"{team_name} - {channel_name}",
                                document_type=DocumentType.TEAMS_CONNECTOR,
                                document_metadata={
                                    "team_name": team_name,
                                    "team_id": team_id,
                                    "channel_name": channel_name,
                                    "channel_id": channel_id,
-                                    "start_date": start_date_str,
-                                    "end_date": end_date_str,
-                                    "message_count": len(messages),
-                                    "indexed_at": datetime.now().strftime(
-                                        "%Y-%m-%d %H:%M:%S"
-                                    ),
+                                    "connector_id": connector_id,
                                },
-                                content=combined_document_string,
-                                embedding=doc_embedding,
-                                chunks=chunks,
-                                content_hash=content_hash,
+                                content="Pending...",  # Placeholder until processed
+                                content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                                unique_identifier_hash=unique_identifier_hash,
+                                embedding=None,
+                                chunks=[],  # Empty at creation - safe for async
+                                status=DocumentStatus.pending(),  # Pending until processing starts
                                updated_at=get_current_timestamp(),
                                created_by_id=user_id,
                                connector_id=connector_id,
                            )
-
                            session.add(document)
-                            documents_indexed += 1
+                            new_documents_created = True

-                            # Batch commit every 10 documents
-                            if documents_indexed % 10 == 0:
-                                logger.info(
-                                    "Committing batch: %s Teams messages processed so far",
-                                    documents_indexed,
-                                )
-                                await session.commit()
-
-                        logger.info(
-                            "Successfully indexed channel %s in team %s with %s messages",
-                            channel_name,
-                            team_name,
-                            len(messages),
-                        )
+                            messages_to_process.append(
+                                {
+                                    "document": document,
+                                    "is_new": True,
+                                    "combined_document_string": combined_document_string,
+                                    "content_hash": content_hash,
+                                    "team_name": team_name,
+                                    "team_id": team_id,
+                                    "channel_name": channel_name,
+                                    "channel_id": channel_id,
+                                    "message_id": message_id,
+                                    "start_date": start_date_str,
+                                    "end_date": end_date_str,
+                                }
+                            )

                    except Exception as e:
                        logger.error(
@ -462,54 +432,143 @@ async def index_teams_messages(
                        skipped_channels.append(
                            f"{team_name}/{channel_name} (processing error)"
                        )
-                        documents_skipped += 1
                        continue

            except Exception as e:
                logger.error("Error processing team %s: %s", team_name, str(e))
                continue

-        # Update the last_indexed_at timestamp for the connector only if requested
-        # and if we successfully indexed at least one document
-        total_processed = documents_indexed
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(messages_to_process)} documents")
+
+        for item in messages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (embeddings, chunks)
+                chunks = await create_document_chunks(item["combined_document_string"])
+                doc_embedding = config.embedding_model_instance.embed(
+                    item["combined_document_string"]
+                )
+
+                # Update document to READY with actual content
+                document.title = f"{item['team_name']} - {item['channel_name']}"
+                document.content = item["combined_document_string"]
+                document.content_hash = item["content_hash"]
+                document.embedding = doc_embedding
+                document.document_metadata = {
+                    "team_name": item["team_name"],
+                    "team_id": item["team_id"],
+                    "channel_name": item["channel_name"],
+                    "channel_id": item["channel_id"],
+                    "start_date": item["start_date"],
+                    "end_date": item["end_date"],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
+                if documents_indexed % 10 == 0:
+                    logger.info(
+                        "Committing batch: %s Teams messages processed so far",
+                        documents_indexed,
+                    )
+                    await session.commit()
+
+            except Exception as e:
+                logger.error(f"Error processing Teams message: {e!s}", exc_info=True)
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
+                documents_failed += 1
+                continue
+
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)

        # Final commit for any remaining documents not yet committed in batches
        logger.info(
            "Final commit: Total %s Teams messages processed", documents_indexed
        )
-        await session.commit()
+        try:
+            await session.commit()
+            logger.info("Successfully committed all Teams document changes to database")
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise

-        # Prepare result message
-        result_message = None
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
        if skipped_channels:
-            result_message = f"Processed {total_processed} messages. Skipped {len(skipped_channels)} channels: {', '.join(skipped_channels)}"
-        else:
-            result_message = f"Processed {total_processed} messages."
+            warning_parts.append(f"{len(skipped_channels)} channels skipped")
+        warning_message = ", ".join(warning_parts) if warning_parts else None

        # Log success
        await task_logger.log_task_success(
            log_entry,
            f"Successfully completed Teams indexing for connector {connector_id}",
            {
-                "messages_processed": total_processed,
                "documents_indexed": documents_indexed,
                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
                "skipped_channels_count": len(skipped_channels),
-                "result_message": result_message,
            },
        )

        logger.info(
-            "Teams indexing completed: %s new messages, %s skipped",
+            "Teams indexing completed: %s ready, %s skipped, %s failed "
+            "(%s duplicate content)",
            documents_indexed,
            documents_skipped,
+            documents_failed,
+            duplicate_content_count,
        )
-        return (
-            total_processed,
-            None,
-        )  # Return None on success (result_message is for logging only)
+        return documents_indexed, warning_message

    except SQLAlchemyError as db_error:
        await session.rollback()
--- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
@ -1,5 +1,9 @@
 """
 Webcrawler connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """

 import time
@ -11,7 +15,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config
 from app.connectors.webcrawler_connector import WebCrawlerConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -28,6 +32,7 @@ from .base import (
    get_connector_by_id,
    get_current_timestamp,
    logger,
+    safe_set_chunks,
    update_connector_last_indexed,
 )

@ -49,7 +54,11 @@ async def index_crawled_urls(
    on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
    """
-    Index web page URLs.
+    Index web page URLs with real-time document status updates.
+
+    Implements 2-phase approach for real-time UI feedback:
+    - Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+    - Phase 2: Process each document: pending → processing → ready/failed

    Args:
        session: Database session
@ -150,9 +159,9 @@ async def index_crawled_urls(

        await task_logger.log_task_progress(
            log_entry,
-            f"Starting to crawl {len(urls)} URLs",
+            f"Starting to process {len(urls)} URLs",
            {
-                "stage": "crawling",
+                "stage": "processing",
                "total_urls": len(urls),
            },
        )
@ -160,28 +169,128 @@ async def index_crawled_urls(
        documents_indexed = 0
        documents_updated = 0
        documents_skipped = 0
-        failed_urls = []
+        documents_failed = 0
+        duplicate_content_count = 0

        # Heartbeat tracking - update notification periodically to prevent appearing stuck
        last_heartbeat_time = time.time()

-        for idx, url in enumerate(urls, 1):
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
+        # =======================================================================
+        # PHASE 1: Analyze all URLs, create pending documents for new ones
+        # This makes ALL new documents visible in the UI immediately with pending status
+        # =======================================================================
+        urls_to_process = []  # List of dicts with document and URL data
+        new_documents_created = False
+
+        for url in urls:
            try:
-                logger.info(f"Processing URL {idx}/{len(urls)}: {url}")
+                # Generate unique identifier hash for this URL
+                unique_identifier_hash = generate_unique_identifier_hash(
+                    DocumentType.CRAWLED_URL, url, search_space_id
+                )
+
+                # Check if document with this unique identifier already exists
+                existing_document = await check_document_by_unique_identifier(
+                    session, unique_identifier_hash
+                )
+
+                if existing_document:
+                    # Document exists - check if it's already being processed
+                    if DocumentStatus.is_state(
+                        existing_document.status, DocumentStatus.PENDING
+                    ):
+                        logger.info(f"URL {url} already pending. Skipping.")
+                        documents_skipped += 1
+                        continue
+                    if DocumentStatus.is_state(
+                        existing_document.status, DocumentStatus.PROCESSING
+                    ):
+                        logger.info(f"URL {url} already processing. Skipping.")
+                        documents_skipped += 1
+                        continue
+
+                    # Queue existing document for potential update check
+                    urls_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "url": url,
+                            "unique_identifier_hash": unique_identifier_hash,
+                        }
+                    )
+                    continue
+
+                # Create new document with PENDING status (visible in UI immediately)
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=url[:100],  # Placeholder - URL as title (truncated)
+                    document_type=DocumentType.CRAWLED_URL,
+                    document_metadata={
+                        "url": url,
+                        "connector_id": connector_id,
+                    },
+                    content="Pending crawl...",  # Placeholder content
+                    content_hash=unique_identifier_hash,  # Temporary unique value
+                    unique_identifier_hash=unique_identifier_hash,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # PENDING status - visible in UI
+                    updated_at=get_current_timestamp(),
+                    created_by_id=user_id,
+                    connector_id=connector_id,
+                )
+                session.add(document)
+                new_documents_created = True
+
+                urls_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "url": url,
+                        "unique_identifier_hash": unique_identifier_hash,
+                    }
+                )
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for URL {url}: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(
+                f"Phase 1: Committing {len([u for u in urls_to_process if u['is_new']])} pending documents"
+            )
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each URL one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(urls_to_process)} URLs")
+
+        for item in urls_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed + documents_updated)
+                    last_heartbeat_time = current_time
+
+            document = item["document"]
+            url = item["url"]
+            is_new = item["is_new"]
+
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()

                await task_logger.log_task_progress(
                    log_entry,
-                    f"Crawling URL {idx}/{len(urls)}: {url}",
+                    f"Crawling URL: {url}",
                    {
                        "stage": "crawling_url",
-                        "url_index": idx,
                        "url": url,
                    },
                )
@ -191,7 +300,10 @@ async def index_crawled_urls(

                if error or not crawl_result:
                    logger.warning(f"Failed to crawl URL {url}: {error}")
-                    failed_urls.append((url, error or "Unknown error"))
+                    document.status = DocumentStatus.failed(error or "Crawl failed")
+                    document.updated_at = get_current_timestamp()
+                    await session.commit()
+                    documents_failed += 1
                    continue

                # Extract content and metadata
@ -201,23 +313,18 @@ async def index_crawled_urls(

                if not content.strip():
                    logger.warning(f"Skipping URL with no content: {url}")
-                    failed_urls.append((url, "No content extracted"))
-                    documents_skipped += 1
+                    document.status = DocumentStatus.failed("No content extracted")
+                    document.updated_at = get_current_timestamp()
+                    await session.commit()
+                    documents_failed += 1
                    continue

-                # Format content as structured document for summary generation (includes all metadata)
+                # Format content as structured document for summary generation
                structured_document = crawler.format_to_structured_document(
                    crawl_result
                )

-                # Generate unique identifier hash for this URL
-                unique_identifier_hash = generate_unique_identifier_hash(
-                    DocumentType.CRAWLED_URL, url, search_space_id
-                )
-
                # Generate content hash using a version WITHOUT metadata
-                # This ensures the hash only changes when actual content changes,
-                # not when metadata (which contains dynamic fields like timestamps, IDs, etc.) changes
                structured_document_for_hash = crawler.format_to_structured_document(
                    crawl_result, exclude_metadata=True
                )
@ -225,114 +332,53 @@ async def index_crawled_urls(
                    structured_document_for_hash, search_space_id
                )

-                # Check if document with this unique identifier already exists
-                existing_document = await check_document_by_unique_identifier(
-                    session, unique_identifier_hash
-                )
-
                # Extract useful metadata
                title = metadata.get("title", url)
                description = metadata.get("description", "")
                language = metadata.get("language", "")

-                if existing_document:
-                    # Document exists - check if content has changed
-                    if existing_document.content_hash == content_hash:
-                        logger.info(f"Document for URL {url} unchanged. Skipping.")
-                        documents_skipped += 1
-                        continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for URL {url}. Updating document."
-                        )
+                # Update title immediately for better UX
+                document.title = title
+                await session.commit()

-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "url": url,
-                                "title": title,
-                                "description": description,
-                                "language": language,
-                                "document_type": "Crawled URL",
-                                "crawler_type": crawler_type,
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                structured_document, user_llm, document_metadata
-                            )
-                        else:
-                            # Fallback to simple summary if no LLM configured
-                            summary_content = f"Crawled URL: {title}\n\n"
-                            summary_content += f"URL: {url}\n"
-                            if description:
-                                summary_content += f"Description: {description}\n"
-                            if language:
-                                summary_content += f"Language: {language}\n"
-                            summary_content += f"Crawler: {crawler_type}\n\n"
-
-                            # Add content preview
-                            content_preview = content[:1000]
-                            if len(content) > 1000:
-                                content_preview += "..."
-                            summary_content += f"Content Preview:\n{content_preview}\n"
-
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(content)
-
-                        # Update existing document
-                        existing_document.title = title
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
-                            **metadata,
-                            "crawler_type": crawler_type,
-                            "last_crawled_at": datetime.now().strftime(
-                                "%Y-%m-%d %H:%M:%S"
-                            ),
-                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_updated += 1
-                        logger.info(f"Successfully updated URL {url}")
-                        continue
-
-                # Document doesn't exist by unique_identifier_hash
-                # Check if a document with the same content_hash exists (from another connector)
-                with session.no_autoflush:
-                    duplicate_by_content = await check_duplicate_document_by_hash(
-                        session, content_hash
-                    )
-
-                if duplicate_by_content:
-                    logger.info(
-                        f"URL {url} already indexed by another connector "
-                        f"(existing document ID: {duplicate_by_content.id}, "
-                        f"type: {duplicate_by_content.document_type}). Skipping."
-                    )
+                # For existing documents, check if content has changed
+                if not is_new and document.content_hash == content_hash:
+                    logger.info(f"Document for URL {url} unchanged. Marking as ready.")
+                    # Ensure status is ready (might have been stuck)
+                    document.status = DocumentStatus.ready()
+                    await session.commit()
                    documents_skipped += 1
                    continue

-                # Document doesn't exist - create new one
-                # Generate summary with metadata
+                # For new documents, check if duplicate content exists elsewhere
+                if is_new:
+                    with session.no_autoflush:
+                        duplicate_by_content = await check_duplicate_document_by_hash(
+                            session, content_hash
+                        )
+
+                    if duplicate_by_content:
+                        logger.info(
+                            f"URL {url} already indexed by another connector "
+                            f"(existing document ID: {duplicate_by_content.id}). "
+                            f"Marking as failed."
+                        )
+                        document.status = DocumentStatus.failed(
+                            "Duplicate content exists"
+                        )
+                        document.updated_at = get_current_timestamp()
+                        await session.commit()
+                        duplicate_content_count += 1
+                        documents_skipped += 1
+                        continue
+
+                # Generate summary with LLM
                user_llm = await get_user_long_context_llm(
                    session, user_id, search_space_id
                )

                if user_llm:
-                    document_metadata = {
+                    document_metadata_for_summary = {
                        "url": url,
                        "title": title,
                        "description": description,
@ -344,7 +390,7 @@ async def index_crawled_urls(
                        summary_content,
                        summary_embedding,
                    ) = await generate_document_summary(
-                        structured_document, user_llm, document_metadata
+                        structured_document, user_llm, document_metadata_for_summary
                    )
                else:
                    # Fallback to simple summary if no LLM configured
@ -366,32 +412,32 @@ async def index_crawled_urls(
                        summary_content
                    )

+                # Process chunks
                chunks = await create_document_chunks(content)

-                document = Document(
-                    search_space_id=search_space_id,
-                    title=title,
-                    document_type=DocumentType.CRAWLED_URL,
-                    document_metadata={
-                        **metadata,
-                        "crawler_type": crawler_type,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                    },
-                    content=summary_content,
-                    content_hash=content_hash,
-                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                    updated_at=get_current_timestamp(),
-                    created_by_id=user_id,
-                    connector_id=connector_id,
-                )
+                # Update document to READY with actual content
+                document.title = title
+                document.content = summary_content
+                document.content_hash = content_hash
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    **metadata,
+                    "crawler_type": crawler_type,
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.status = DocumentStatus.ready()  # READY status
+                document.updated_at = get_current_timestamp()

-                session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new URL {url}")
+                if is_new:
+                    documents_indexed += 1
+                else:
+                    documents_updated += 1

-                # Batch commit every 10 documents
+                logger.info(f"Successfully processed URL {url}")
+
+                # Batch commit every 10 documents (for ready status updates)
                if (documents_indexed + documents_updated) % 10 == 0:
                    logger.info(
                        f"Committing batch: {documents_indexed + documents_updated} URLs processed so far"
@ -399,32 +445,51 @@ async def index_crawled_urls(
                    await session.commit()

            except Exception as e:
-                logger.error(
-                    f"Error processing URL {url}: {e!s}",
-                    exc_info=True,
-                )
-                failed_urls.append((url, str(e)))
+                logger.error(f"Error processing URL {url}: {e!s}", exc_info=True)
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e)[:200])
+                    document.updated_at = get_current_timestamp()
+                    await session.commit()
+                except Exception as status_error:
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
+                documents_failed += 1
                continue

        total_processed = documents_indexed + documents_updated

-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)

        # Final commit for any remaining documents not yet committed in batches
        logger.info(
            f"Final commit: Total {documents_indexed} new, {documents_updated} updated URLs processed"
        )
-        await session.commit()
-
-        # Log failed URLs if any (for debugging purposes)
-        if failed_urls:
-            failed_summary = "; ".join(
-                [f"{url}: {error}" for url, error in failed_urls[:5]]
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all webcrawler document changes to database"
            )
-            if len(failed_urls) > 5:
-                failed_summary += f" (and {len(failed_urls) - 5} more)"
-            logger.warning(f"Some URLs failed to index: {failed_summary}")
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully
+            if "duplicate key value violates unique constraint" in str(e).lower():
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None

        await task_logger.log_task_success(
            log_entry,
@ -434,19 +499,21 @@ async def index_crawled_urls(
                "documents_indexed": documents_indexed,
                "documents_updated": documents_updated,
                "documents_skipped": documents_skipped,
-                "failed_urls_count": len(failed_urls),
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
            },
        )

        logger.info(
            f"Web page indexing completed: {documents_indexed} new, "
            f"{documents_updated} updated, {documents_skipped} skipped, "
-            f"{len(failed_urls)} failed"
+            f"{documents_failed} failed"
        )
-        return (
-            total_processed,
-            None,
-        )  # Return None on success (result_message is for logging only)
+
+        if warning_message:
+            return total_processed, f"Completed with issues: {warning_message}"
+
+        return total_processed, None

    except SQLAlchemyError as db_error:
        await session.rollback()
@ -494,9 +561,7 @@ async def get_crawled_url_documents(
    )

    if connector_id:
-        # Filter by connector if needed - you might need to add a connector_id field to Document
-        # or filter by some other means depending on your schema
-        pass
+        query = query.filter(Document.connector_id == connector_id)

    result = await session.execute(query)
    documents = result.scalars().all()
--- a/surfsense_backend/app/tasks/document_processors/base.py
+++ b/surfsense_backend/app/tasks/document_processors/base.py
@ -14,6 +14,35 @@ from app.db import Document
 md = MarkdownifyTransformer()


+def safe_set_chunks(document: Document, chunks: list) -> None:
+    """
+    Safely assign chunks to a document without triggering lazy loading.
+
+    ALWAYS use this instead of `document.chunks = chunks` to avoid
+    SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
+
+    Why this is needed:
+    - Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
+      load the OLD chunks first (for comparison/orphan detection)
+    - This lazy loading fails in async context with asyncpg driver
+    - set_committed_value bypasses this by setting the value directly
+
+    This function is safe regardless of how the document was loaded
+    (with or without selectinload).
+
+    Args:
+        document: The Document object to update
+        chunks: List of Chunk objects to assign
+
+    Example:
+        # Instead of: document.chunks = chunks (DANGEROUS!)
+        safe_set_chunks(document, chunks)  # Always safe
+    """
+    from sqlalchemy.orm.attributes import set_committed_value
+
+    set_committed_value(document, "chunks", chunks)
+
+
 def get_current_timestamp() -> datetime:
    """
    Get the current timestamp with timezone for updated_at field.
--- a/surfsense_backend/app/tasks/document_processors/circleback_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/circleback_processor.py
@ -3,6 +3,11 @@ Circleback meeting document processor.

 This module processes meeting data received from Circleback webhooks
 and stores it as searchable documents in the database.
+
+Implements real-time document status updates for UI feedback:
+- Create document with 'pending' status (visible in UI immediately)
+- Set to 'processing' while processing content
+- Set to 'ready' or 'failed' when complete
 """

 import logging
@ -14,6 +19,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.db import (
    Document,
+    DocumentStatus,
    DocumentType,
    SearchSourceConnector,
    SearchSourceConnectorType,
@ -30,6 +36,7 @@ from app.utils.document_converters import (
 from .base import (
    check_document_by_unique_identifier,
    get_current_timestamp,
+    safe_set_chunks,
 )

 logger = logging.getLogger(__name__)
@ -47,6 +54,11 @@ async def add_circleback_meeting_document(
    """
    Process and store a Circleback meeting document.

+    Implements real-time document status updates:
+    - Phase 1: Create document with 'pending' status (visible in UI immediately)
+    - Phase 2: Set to 'processing' while processing content
+    - Phase 3: Set to 'ready' or 'failed' when complete
+
    Args:
        session: Database session
        meeting_id: Circleback meeting ID
@ -59,6 +71,7 @@ async def add_circleback_meeting_document(
    Returns:
        Document object if successful, None if failed or duplicate
    """
+    document = None
    try:
        # Generate unique identifier hash using Circleback meeting ID
        unique_identifier = f"circleback_{meeting_id}"
@ -77,6 +90,12 @@ async def add_circleback_meeting_document(
        if existing_document:
            # Document exists - check if content has changed
            if existing_document.content_hash == content_hash:
+                # Ensure status is ready (might have been stuck in processing/pending)
+                if not DocumentStatus.is_state(
+                    existing_document.status, DocumentStatus.READY
+                ):
+                    existing_document.status = DocumentStatus.ready()
+                    await session.commit()
                logger.info(f"Circleback meeting {meeting_id} unchanged. Skipping.")
                return existing_document
            else:
@ -84,6 +103,78 @@ async def add_circleback_meeting_document(
                logger.info(
                    f"Content changed for Circleback meeting {meeting_id}. Updating document."
                )
+                document = existing_document
+                # Set to PROCESSING status and commit - shows "processing" in UI
+                document.status = DocumentStatus.processing()
+                await session.commit()
+        else:
+            # =======================================================================
+            # PHASE 1: Create document with PENDING status
+            # This makes the document visible in the UI immediately
+            # =======================================================================
+
+            # Fetch the user who set up the Circleback connector (preferred)
+            # or fall back to search space owner if no connector found
+            created_by_user_id = None
+
+            # Try to find the Circleback connector for this search space
+            connector_result = await session.execute(
+                select(SearchSourceConnector.user_id).where(
+                    SearchSourceConnector.search_space_id == search_space_id,
+                    SearchSourceConnector.connector_type
+                    == SearchSourceConnectorType.CIRCLEBACK_CONNECTOR,
+                )
+            )
+            connector_user = connector_result.scalar_one_or_none()
+
+            if connector_user:
+                # Use the user who set up the Circleback connector
+                created_by_user_id = connector_user
+            else:
+                # Fallback: use search space owner if no connector found
+                search_space_result = await session.execute(
+                    select(SearchSpace.user_id).where(SearchSpace.id == search_space_id)
+                )
+                created_by_user_id = search_space_result.scalar_one_or_none()
+
+            # Create new document with PENDING status (visible in UI immediately)
+            document = Document(
+                search_space_id=search_space_id,
+                title=meeting_name,
+                document_type=DocumentType.CIRCLEBACK,
+                document_metadata={
+                    "CIRCLEBACK_MEETING_ID": meeting_id,
+                    "MEETING_NAME": meeting_name,
+                    "SOURCE": "CIRCLEBACK_WEBHOOK",
+                    "connector_id": connector_id,
+                },
+                content="Pending...",  # Placeholder until processed
+                content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+                unique_identifier_hash=unique_identifier_hash,
+                embedding=None,
+                chunks=[],  # Empty at creation - safe for async
+                status=DocumentStatus.pending(),  # Pending until processing starts
+                content_needs_reindexing=False,
+                updated_at=get_current_timestamp(),
+                created_by_id=created_by_user_id,
+                connector_id=connector_id,
+            )
+            session.add(document)
+            # Commit immediately so document appears in UI with pending status
+            await session.commit()
+            logger.info(
+                f"Created pending Circleback meeting document {meeting_id} in search space {search_space_id}"
+            )
+
+            # =======================================================================
+            # PHASE 2: Set to PROCESSING status
+            # =======================================================================
+            document.status = DocumentStatus.processing()
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 3: Process the document content
+        # =======================================================================

        # Get LLM for generating summary
        llm = await get_document_summary_llm(session, search_space_id)
@ -100,7 +191,7 @@ async def add_circleback_meeting_document(
            summary_embedding = None
        else:
            # Generate summary with metadata
-            document_metadata = {
+            summary_metadata = {
                "meeting_name": meeting_name,
                "meeting_id": meeting_id,
                "document_type": "Circleback Meeting",
@ -111,7 +202,7 @@ async def add_circleback_meeting_document(
                },
            }
            summary_content, summary_embedding = await generate_document_summary(
-                markdown_content, llm, document_metadata
+                markdown_content, llm, summary_metadata
            )

        # Process chunks
@ -126,7 +217,7 @@ async def add_circleback_meeting_document(
                f"Failed to convert Circleback meeting {meeting_id} to BlockNote JSON, document will not be editable"
            )

-        # Prepare document metadata
+        # Prepare final document metadata
        document_metadata = {
            "CIRCLEBACK_MEETING_ID": meeting_id,
            "MEETING_NAME": meeting_name,
@ -134,77 +225,34 @@ async def add_circleback_meeting_document(
            **metadata,
        }

-        # Fetch the user who set up the Circleback connector (preferred)
-        # or fall back to search space owner if no connector found
-        created_by_user_id = None
+        # =======================================================================
+        # PHASE 4: Update document to READY status with actual content
+        # =======================================================================
+        document.title = meeting_name
+        document.content = summary_content
+        document.content_hash = content_hash
+        if summary_embedding is not None:
+            document.embedding = summary_embedding
+        document.document_metadata = document_metadata
+        safe_set_chunks(document, chunks)
+        document.blocknote_document = blocknote_json
+        document.content_needs_reindexing = False
+        document.updated_at = get_current_timestamp()
+        document.status = DocumentStatus.ready()
+        # Ensure connector_id is set (backfill for documents created before this field)
+        if connector_id is not None:
+            document.connector_id = connector_id

-        # Try to find the Circleback connector for this search space
-        connector_result = await session.execute(
-            select(SearchSourceConnector.user_id).where(
-                SearchSourceConnector.search_space_id == search_space_id,
-                SearchSourceConnector.connector_type
-                == SearchSourceConnectorType.CIRCLEBACK_CONNECTOR,
-            )
-        )
-        connector_user = connector_result.scalar_one_or_none()
+        await session.commit()
+        await session.refresh(document)

-        if connector_user:
-            # Use the user who set up the Circleback connector
-            created_by_user_id = connector_user
-        else:
-            # Fallback: use search space owner if no connector found
-            search_space_result = await session.execute(
-                select(SearchSpace.user_id).where(SearchSpace.id == search_space_id)
-            )
-            created_by_user_id = search_space_result.scalar_one_or_none()
-
-        # Update or create document
        if existing_document:
-            # Update existing document
-            existing_document.title = meeting_name
-            existing_document.content = summary_content
-            existing_document.content_hash = content_hash
-            if summary_embedding is not None:
-                existing_document.embedding = summary_embedding
-            existing_document.document_metadata = document_metadata
-            existing_document.chunks = chunks
-            existing_document.blocknote_document = blocknote_json
-            existing_document.content_needs_reindexing = False
-            existing_document.updated_at = get_current_timestamp()
-            # Ensure connector_id is set (backfill for documents created before this field)
-            if connector_id is not None:
-                existing_document.connector_id = connector_id
-
-            await session.commit()
-            await session.refresh(existing_document)
-            document = existing_document
            logger.info(
                f"Updated Circleback meeting document {meeting_id} in search space {search_space_id}"
            )
        else:
-            # Create new document
-            document = Document(
-                search_space_id=search_space_id,
-                title=meeting_name,
-                document_type=DocumentType.CIRCLEBACK,
-                document_metadata=document_metadata,
-                content=summary_content,
-                embedding=summary_embedding,
-                chunks=chunks,
-                content_hash=content_hash,
-                unique_identifier_hash=unique_identifier_hash,
-                blocknote_document=blocknote_json,
-                content_needs_reindexing=False,
-                updated_at=get_current_timestamp(),
-                created_by_id=created_by_user_id,
-                connector_id=connector_id,
-            )
-
-            session.add(document)
-            await session.commit()
-            await session.refresh(document)
            logger.info(
-                f"Created new Circleback meeting document {meeting_id} in search space {search_space_id}"
+                f"Processed Circleback meeting document {meeting_id} in search space {search_space_id} - now ready"
            )

        return document
@ -214,8 +262,28 @@ async def add_circleback_meeting_document(
        logger.error(
            f"Database error processing Circleback meeting {meeting_id}: {db_error}"
        )
+        # Mark document as failed if it was created
+        if document is not None:
+            try:
+                document.status = DocumentStatus.failed(str(db_error))
+                document.updated_at = get_current_timestamp()
+                await session.commit()
+            except Exception as status_error:
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
        raise db_error
    except Exception as e:
        await session.rollback()
        logger.error(f"Failed to process Circleback meeting {meeting_id}: {e!s}")
+        # Mark document as failed if it was created
+        if document is not None:
+            try:
+                document.status = DocumentStatus.failed(str(e))
+                document.updated_at = get_current_timestamp()
+                await session.commit()
+            except Exception as status_error:
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
        raise RuntimeError(f"Failed to process Circleback meeting: {e!s}") from e
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@ -17,7 +17,7 @@ from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import config as app_config
-from app.db import Document, DocumentType, Log, Notification
+from app.db import Document, DocumentStatus, DocumentType, Log, Notification
 from app.services.llm_service import get_user_long_context_llm
 from app.services.notification_service import NotificationService
 from app.services.task_logging_service import TaskLoggingService
@ -33,6 +33,7 @@ from .base import (
    check_document_by_unique_identifier,
    check_duplicate_document,
    get_current_timestamp,
+    safe_set_chunks,
 )
 from .markdown_processor import add_received_markdown_file_document

@ -499,6 +500,7 @@ async def add_received_file_document_using_unstructured(
            existing_document.blocknote_document = blocknote_json
            existing_document.content_needs_reindexing = False
            existing_document.updated_at = get_current_timestamp()
+            existing_document.status = DocumentStatus.ready()  # Mark as ready

            await session.commit()
            await session.refresh(existing_document)
@ -528,6 +530,7 @@ async def add_received_file_document_using_unstructured(
                updated_at=get_current_timestamp(),
                created_by_id=user_id,
                connector_id=connector.get("connector_id") if connector else None,
+                status=DocumentStatus.ready(),  # Mark as ready
            )

            session.add(document)
@ -640,6 +643,7 @@ async def add_received_file_document_using_llamacloud(
            existing_document.blocknote_document = blocknote_json
            existing_document.content_needs_reindexing = False
            existing_document.updated_at = get_current_timestamp()
+            existing_document.status = DocumentStatus.ready()  # Mark as ready

            await session.commit()
            await session.refresh(existing_document)
@ -669,6 +673,7 @@ async def add_received_file_document_using_llamacloud(
                updated_at=get_current_timestamp(),
                created_by_id=user_id,
                connector_id=connector.get("connector_id") if connector else None,
+                status=DocumentStatus.ready(),  # Mark as ready
            )

            session.add(document)
@ -806,6 +811,7 @@ async def add_received_file_document_using_docling(
            existing_document.blocknote_document = blocknote_json
            existing_document.content_needs_reindexing = False
            existing_document.updated_at = get_current_timestamp()
+            existing_document.status = DocumentStatus.ready()  # Mark as ready

            await session.commit()
            await session.refresh(existing_document)
@ -835,6 +841,7 @@ async def add_received_file_document_using_docling(
                updated_at=get_current_timestamp(),
                created_by_id=user_id,
                connector_id=connector.get("connector_id") if connector else None,
+                status=DocumentStatus.ready(),  # Mark as ready
            )

            session.add(document)
@ -1606,3 +1613,372 @@ async def process_file_in_background(

        logging.error(f"Error processing file in background: {error_message}")
        raise  # Re-raise so the wrapper can also handle it
+
+
+async def process_file_in_background_with_document(
+    document: Document,
+    file_path: str,
+    filename: str,
+    search_space_id: int,
+    user_id: str,
+    session: AsyncSession,
+    task_logger: TaskLoggingService,
+    log_entry: Log,
+    connector: dict | None = None,
+    notification: Notification | None = None,
+) -> Document | None:
+    """
+    Process file and update existing pending document (2-phase pattern).
+
+    This function is Phase 2 of the real-time document status updates:
+    - Phase 1 (API): Created document with pending status
+    - Phase 2 (this): Process file and update document to ready/failed
+
+    The document already exists with pending status. This function:
+    1. Parses the file content (markdown, audio, or ETL services)
+    2. Updates the document with content, embeddings, and chunks
+    3. Sets status to 'ready' on success
+
+    Args:
+        document: Existing document with pending status
+        file_path: Path to the uploaded file
+        filename: Original filename
+        search_space_id: ID of the search space
+        user_id: ID of the user
+        session: Database session
+        task_logger: Task logging service
+        log_entry: Log entry for this task
+        connector: Optional connector info for Google Drive files
+        notification: Optional notification for progress updates
+
+    Returns:
+        Updated Document object if successful, None if duplicate content detected
+    """
+    import os
+
+    from app.config import config as app_config
+    from app.services.llm_service import get_user_long_context_llm
+    from app.utils.blocknote_converter import convert_markdown_to_blocknote
+
+    try:
+        markdown_content = None
+        etl_service = None
+
+        # ===== STEP 1: Parse file content based on type =====
+
+        # Check if the file is a markdown or text file
+        if filename.lower().endswith((".md", ".markdown", ".txt")):
+            # Update notification: parsing stage
+            if notification:
+                await (
+                    NotificationService.document_processing.notify_processing_progress(
+                        session,
+                        notification,
+                        stage="parsing",
+                        stage_message="Reading file",
+                    )
+                )
+
+            await task_logger.log_task_progress(
+                log_entry,
+                f"Processing markdown/text file: {filename}",
+                {"file_type": "markdown", "processing_stage": "reading_file"},
+            )
+
+            # Read markdown content directly
+            with open(file_path, encoding="utf-8") as f:
+                markdown_content = f.read()
+            etl_service = "MARKDOWN"
+
+            # Clean up temp file
+            with contextlib.suppress(Exception):
+                os.unlink(file_path)
+
+        # Check if the file is an audio file
+        elif filename.lower().endswith(
+            (".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm")
+        ):
+            # Update notification: parsing stage (transcription)
+            if notification:
+                await (
+                    NotificationService.document_processing.notify_processing_progress(
+                        session,
+                        notification,
+                        stage="parsing",
+                        stage_message="Transcribing audio",
+                    )
+                )
+
+            await task_logger.log_task_progress(
+                log_entry,
+                f"Processing audio file for transcription: {filename}",
+                {"file_type": "audio", "processing_stage": "starting_transcription"},
+            )
+
+            # Transcribe audio
+            stt_service_type = (
+                "local"
+                if app_config.STT_SERVICE
+                and app_config.STT_SERVICE.startswith("local/")
+                else "external"
+            )
+
+            if stt_service_type == "local":
+                from app.services.stt_service import stt_service
+
+                result = stt_service.transcribe_file(file_path)
+                transcribed_text = result.get("text", "")
+                if not transcribed_text:
+                    raise ValueError("Transcription returned empty text")
+                markdown_content = (
+                    f"# Transcription of {filename}\n\n{transcribed_text}"
+                )
+            else:
+                with open(file_path, "rb") as audio_file:
+                    transcription_kwargs = {
+                        "model": app_config.STT_SERVICE,
+                        "file": audio_file,
+                        "api_key": app_config.STT_SERVICE_API_KEY,
+                    }
+                    if app_config.STT_SERVICE_API_BASE:
+                        transcription_kwargs["api_base"] = (
+                            app_config.STT_SERVICE_API_BASE
+                        )
+                    transcription_response = await atranscription(
+                        **transcription_kwargs
+                    )
+                    transcribed_text = transcription_response.get("text", "")
+                    if not transcribed_text:
+                        raise ValueError("Transcription returned empty text")
+                markdown_content = (
+                    f"# Transcription of {filename}\n\n{transcribed_text}"
+                )
+
+            etl_service = "AUDIO_TRANSCRIPTION"
+            # Clean up temp file
+            with contextlib.suppress(Exception):
+                os.unlink(file_path)
+
+        else:
+            # Document files - use ETL service
+            from app.services.page_limit_service import (
+                PageLimitExceededError,
+                PageLimitService,
+            )
+
+            page_limit_service = PageLimitService(session)
+
+            # Estimate page count
+            try:
+                estimated_pages = page_limit_service.estimate_pages_before_processing(
+                    file_path
+                )
+            except Exception:
+                file_size = os.path.getsize(file_path)
+                estimated_pages = max(1, file_size // (80 * 1024))
+
+            # Check page limit
+            await page_limit_service.check_page_limit(user_id, estimated_pages)
+
+            if app_config.ETL_SERVICE == "UNSTRUCTURED":
+                if notification:
+                    await NotificationService.document_processing.notify_processing_progress(
+                        session,
+                        notification,
+                        stage="parsing",
+                        stage_message="Extracting content",
+                    )
+
+                from langchain_unstructured import UnstructuredLoader
+
+                loader = UnstructuredLoader(
+                    file_path,
+                    mode="elements",
+                    post_processors=[],
+                    languages=["eng"],
+                    include_orig_elements=False,
+                    include_metadata=False,
+                    strategy="auto",
+                )
+                docs = await loader.aload()
+                markdown_content = await convert_document_to_markdown(docs)
+                actual_pages = page_limit_service.estimate_pages_from_elements(docs)
+                final_page_count = max(estimated_pages, actual_pages)
+                etl_service = "UNSTRUCTURED"
+
+                # Update page usage
+                await page_limit_service.update_page_usage(
+                    user_id, final_page_count, allow_exceed=True
+                )
+
+            elif app_config.ETL_SERVICE == "LLAMACLOUD":
+                if notification:
+                    await NotificationService.document_processing.notify_processing_progress(
+                        session,
+                        notification,
+                        stage="parsing",
+                        stage_message="Extracting content",
+                    )
+
+                result = await parse_with_llamacloud_retry(
+                    file_path=file_path,
+                    estimated_pages=estimated_pages,
+                    task_logger=task_logger,
+                    log_entry=log_entry,
+                )
+                markdown_documents = await result.aget_markdown_documents(
+                    split_by_page=False
+                )
+                if not markdown_documents:
+                    raise RuntimeError(
+                        f"LlamaCloud parsing returned no documents: {filename}"
+                    )
+                markdown_content = markdown_documents[0].text
+                etl_service = "LLAMACLOUD"
+
+                # Update page usage
+                await page_limit_service.update_page_usage(
+                    user_id, estimated_pages, allow_exceed=True
+                )
+
+            elif app_config.ETL_SERVICE == "DOCLING":
+                if notification:
+                    await NotificationService.document_processing.notify_processing_progress(
+                        session,
+                        notification,
+                        stage="parsing",
+                        stage_message="Extracting content",
+                    )
+
+                # Suppress logging during Docling import
+                getLogger("docling.pipeline.base_pipeline").setLevel(ERROR)
+                getLogger("docling.document_converter").setLevel(ERROR)
+                getLogger(
+                    "docling_core.transforms.chunker.hierarchical_chunker"
+                ).setLevel(ERROR)
+
+                from docling.document_converter import DocumentConverter
+
+                converter = DocumentConverter()
+                result = converter.convert(file_path)
+                markdown_content = result.document.export_to_markdown()
+                etl_service = "DOCLING"
+
+                # Update page usage
+                await page_limit_service.update_page_usage(
+                    user_id, estimated_pages, allow_exceed=True
+                )
+
+            else:
+                raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}")
+
+            # Clean up temp file
+            with contextlib.suppress(Exception):
+                os.unlink(file_path)
+
+        if not markdown_content:
+            raise RuntimeError(f"Failed to extract content from file: {filename}")
+
+        # ===== STEP 2: Check for duplicate content =====
+        content_hash = generate_content_hash(markdown_content, search_space_id)
+
+        existing_by_content = await check_duplicate_document(session, content_hash)
+        if existing_by_content and existing_by_content.id != document.id:
+            # Duplicate content found - mark this document as failed
+            logging.info(
+                f"Duplicate content detected for {filename}, "
+                f"matches document {existing_by_content.id}"
+            )
+            return None
+
+        # ===== STEP 3: Generate embeddings and chunks =====
+        if notification:
+            await NotificationService.document_processing.notify_processing_progress(
+                session, notification, stage="chunking"
+            )
+
+        user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
+
+        if user_llm:
+            document_metadata = {
+                "file_name": filename,
+                "etl_service": etl_service,
+                "document_type": "File Document",
+            }
+            summary_content, summary_embedding = await generate_document_summary(
+                markdown_content, user_llm, document_metadata
+            )
+        else:
+            # Fallback: use truncated content as summary
+            summary_content = markdown_content[:4000]
+            from app.config import config
+
+            summary_embedding = config.embedding_model_instance.embed(summary_content)
+
+        chunks = await create_document_chunks(markdown_content)
+
+        # Convert to BlockNote for editing
+        blocknote_json = await convert_markdown_to_blocknote(markdown_content)
+
+        # ===== STEP 4: Update document to READY =====
+        from sqlalchemy.orm.attributes import flag_modified
+
+        document.title = filename
+        document.content = summary_content
+        document.content_hash = content_hash
+        document.embedding = summary_embedding
+        document.document_metadata = {
+            "FILE_NAME": filename,
+            "ETL_SERVICE": etl_service or "UNKNOWN",
+            **(document.document_metadata or {}),
+        }
+        flag_modified(document, "document_metadata")
+
+        # Use safe_set_chunks to avoid async issues
+        safe_set_chunks(document, chunks)
+
+        document.blocknote_document = blocknote_json
+        document.content_needs_reindexing = False
+        document.updated_at = get_current_timestamp()
+        document.status = DocumentStatus.ready()  # Shows checkmark in UI
+
+        await session.commit()
+        await session.refresh(document)
+
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully processed file: {filename}",
+            {
+                "document_id": document.id,
+                "content_hash": content_hash,
+                "file_type": etl_service,
+                "chunks_count": len(chunks),
+            },
+        )
+
+        return document
+
+    except Exception as e:
+        await session.rollback()
+
+        from app.services.page_limit_service import PageLimitExceededError
+
+        if isinstance(e, PageLimitExceededError):
+            error_message = str(e)
+        elif isinstance(e, HTTPException) and "page limit" in str(e.detail).lower():
+            error_message = str(e.detail)
+        else:
+            error_message = f"Failed to process file: {filename}"
+
+        await task_logger.log_task_failure(
+            log_entry,
+            error_message,
+            str(e),
+            {
+                "error_type": type(e).__name__,
+                "filename": filename,
+                "document_id": document.id,
+            },
+        )
+        logging.error(f"Error processing file with document: {error_message}")
+        raise
--- a/surfsense_backend/app/tasks/document_processors/markdown_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/markdown_processor.py
@ -7,7 +7,7 @@ import logging
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession

-from app.db import Document, DocumentType
+from app.db import Document, DocumentStatus, DocumentType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -270,6 +270,7 @@ async def add_received_markdown_file_document(
            existing_document.chunks = chunks
            existing_document.blocknote_document = blocknote_json
            existing_document.updated_at = get_current_timestamp()
+            existing_document.status = DocumentStatus.ready()  # Mark as ready

            await session.commit()
            await session.refresh(existing_document)
@ -297,6 +298,7 @@ async def add_received_markdown_file_document(
                updated_at=get_current_timestamp(),
                created_by_id=user_id,
                connector_id=connector.get("connector_id") if connector else None,
+                status=DocumentStatus.ready(),  # Mark as ready
            )

            session.add(document)
--- a/surfsense_backend/app/tasks/document_processors/youtube_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/youtube_processor.py
@ -1,5 +1,9 @@
 """
 YouTube video document processor.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create document with 'pending' status (visible in UI immediately)
+- Phase 2: Process document: pending → processing → ready/failed
 """

 import logging
@ -12,7 +16,7 @@ from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 from youtube_transcript_api import YouTubeTranscriptApi

-from app.db import Document, DocumentType
+from app.db import Document, DocumentStatus, DocumentType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@ -26,6 +30,7 @@ from app.utils.proxy_config import get_requests_proxies
 from .base import (
    check_document_by_unique_identifier,
    get_current_timestamp,
+    safe_set_chunks,
 )


@ -61,6 +66,10 @@ async def add_youtube_video_document(
    """
    Process a YouTube video URL, extract transcripts, and store as a document.

+    Implements 2-phase document status updates for real-time UI feedback:
+    - Phase 1: Create document with 'pending' status (visible in UI immediately)
+    - Phase 2: Process document: pending → processing → ready/failed
+
    Args:
        session: Database session for storing the document
        url: YouTube video URL (supports standard, shortened, and embed formats)
@ -85,15 +94,18 @@ async def add_youtube_video_document(
        metadata={"url": url, "user_id": str(user_id)},
    )

+    document = None
+    video_id = None
+    is_new_document = False
+
    try:
-        # Extract video ID from URL
+        # Extract video ID from URL (lightweight operation)
        await task_logger.log_task_progress(
            log_entry,
            f"Extracting video ID from URL: {url}",
            {"stage": "video_id_extraction"},
        )

-        # Get video ID
        video_id = get_youtube_video_id(url)
        if not video_id:
            raise ValueError(f"Could not extract video ID from URL: {url}")
@ -104,13 +116,87 @@ async def add_youtube_video_document(
            {"stage": "video_id_extracted", "video_id": video_id},
        )

-        # Get video metadata
+        # Generate unique identifier hash for this YouTube video
+        unique_identifier_hash = generate_unique_identifier_hash(
+            DocumentType.YOUTUBE_VIDEO, video_id, search_space_id
+        )
+
+        # Check if document with this unique identifier already exists
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Checking for existing video: {video_id}",
+            {"stage": "duplicate_check", "video_id": video_id},
+        )
+
+        existing_document = await check_document_by_unique_identifier(
+            session, unique_identifier_hash
+        )
+
+        # =======================================================================
+        # PHASE 1: Create pending document or prepare existing for update
+        # =======================================================================
+        if existing_document:
+            document = existing_document
+            is_new_document = False
+            # Check if already being processed
+            if DocumentStatus.is_state(
+                existing_document.status, DocumentStatus.PENDING
+            ):
+                logging.info(
+                    f"YouTube video {video_id} already pending. Returning existing."
+                )
+                return existing_document
+            if DocumentStatus.is_state(
+                existing_document.status, DocumentStatus.PROCESSING
+            ):
+                logging.info(
+                    f"YouTube video {video_id} already processing. Returning existing."
+                )
+                return existing_document
+        else:
+            # Create new document with PENDING status (visible in UI immediately)
+            await task_logger.log_task_progress(
+                log_entry,
+                f"Creating pending document for video: {video_id}",
+                {"stage": "pending_document_creation"},
+            )
+
+            document = Document(
+                title=f"YouTube Video: {video_id}",  # Placeholder title
+                document_type=DocumentType.YOUTUBE_VIDEO,
+                document_metadata={
+                    "url": url,
+                    "video_id": video_id,
+                },
+                content="Processing video...",  # Placeholder content
+                content_hash=unique_identifier_hash,  # Temporary unique value
+                unique_identifier_hash=unique_identifier_hash,
+                embedding=None,
+                chunks=[],  # Empty at creation
+                status=DocumentStatus.pending(),  # PENDING status - visible in UI
+                search_space_id=search_space_id,
+                updated_at=get_current_timestamp(),
+                created_by_id=user_id,
+            )
+            session.add(document)
+            await session.commit()  # Document visible in UI now with pending status!
+            is_new_document = True
+
+            logging.info(f"Created pending document for YouTube video {video_id}")
+
+        # =======================================================================
+        # PHASE 2: Set to PROCESSING and do heavy work
+        # =======================================================================
+        document.status = DocumentStatus.processing()
+        await session.commit()  # UI shows "processing" status
+
        await task_logger.log_task_progress(
            log_entry,
            f"Fetching video metadata for: {video_id}",
            {"stage": "metadata_fetch"},
        )

+        # Fetch video metadata
        params = {
            "format": "json",
            "url": f"https://www.youtube.com/watch?v={video_id}",
@ -130,6 +216,10 @@ async def add_youtube_video_document(
        ):
            video_data = await response.json()

+        # Update title immediately for better UX (user sees actual title sooner)
+        document.title = video_data.get("title", f"YouTube Video: {video_id}")
+        await session.commit()
+
        await task_logger.log_task_progress(
            log_entry,
            f"Video metadata fetched: {video_data.get('title', 'Unknown')}",
@ -219,53 +309,28 @@ async def add_youtube_video_document(
        document_parts.append("</DOCUMENT>")
        combined_document_string = "\n".join(document_parts)

-        # Generate unique identifier hash for this YouTube video
-        unique_identifier_hash = generate_unique_identifier_hash(
-            DocumentType.YOUTUBE_VIDEO, video_id, search_space_id
-        )
-
        # Generate content hash
        content_hash = generate_content_hash(combined_document_string, search_space_id)

-        # Check if document with this unique identifier already exists
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Checking for existing video: {video_id}",
-            {"stage": "duplicate_check", "video_id": video_id},
-        )
+        # For existing documents, check if content has changed
+        if not is_new_document and existing_document.content_hash == content_hash:
+            await task_logger.log_task_success(
+                log_entry,
+                f"YouTube video document unchanged: {video_data.get('title', 'YouTube Video')}",
+                {
+                    "duplicate_detected": True,
+                    "existing_document_id": existing_document.id,
+                    "video_id": video_id,
+                },
+            )
+            logging.info(
+                f"Document for YouTube video {video_id} unchanged. Marking as ready."
+            )
+            document.status = DocumentStatus.ready()
+            await session.commit()
+            return document

-        existing_document = await check_document_by_unique_identifier(
-            session, unique_identifier_hash
-        )
-
-        if existing_document:
-            # Document exists - check if content has changed
-            if existing_document.content_hash == content_hash:
-                await task_logger.log_task_success(
-                    log_entry,
-                    f"YouTube video document unchanged: {video_data.get('title', 'YouTube Video')}",
-                    {
-                        "duplicate_detected": True,
-                        "existing_document_id": existing_document.id,
-                        "video_id": video_id,
-                    },
-                )
-                logging.info(
-                    f"Document for YouTube video {video_id} unchanged. Skipping."
-                )
-                return existing_document
-            else:
-                # Content has changed - update the existing document
-                logging.info(
-                    f"Content changed for YouTube video {video_id}. Updating document."
-                )
-                await task_logger.log_task_progress(
-                    log_entry,
-                    f"Updating YouTube video document: {video_data.get('title', 'YouTube Video')}",
-                    {"stage": "document_update", "video_id": video_id},
-                )
-
-        # Get LLM for summary generation (needed for both create and update)
+        # Get LLM for summary generation
        await task_logger.log_task_progress(
            log_entry,
            f"Preparing for summary generation: {video_data.get('title', 'YouTube Video')}",
@ -287,7 +352,7 @@ async def add_youtube_video_document(
        )

        # Generate summary with metadata
-        document_metadata = {
+        document_metadata_for_summary = {
            "url": url,
            "video_id": video_id,
            "title": video_data.get("title", "YouTube Video"),
@ -297,7 +362,7 @@ async def add_youtube_video_document(
            "has_transcript": "No captions available" not in transcript_text,
        }
        summary_content, summary_embedding = await generate_document_summary(
-            combined_document_string, user_llm, document_metadata
+            combined_document_string, user_llm, document_metadata_for_summary
        )

        # Process chunks
@ -319,65 +384,33 @@ async def add_youtube_video_document(

        chunks = await create_document_chunks(combined_document_string)

-        # Update or create document
-        if existing_document:
-            # Update existing document
-            await task_logger.log_task_progress(
-                log_entry,
-                f"Updating YouTube video document in database: {video_data.get('title', 'YouTube Video')}",
-                {"stage": "document_update", "chunks_count": len(chunks)},
-            )
+        # =======================================================================
+        # PHASE 3: Update document to READY with all content
+        # =======================================================================
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Finalizing document: {video_data.get('title', 'YouTube Video')}",
+            {"stage": "document_finalization", "chunks_count": len(chunks)},
+        )

-            existing_document.title = video_data.get("title", "YouTube Video")
-            existing_document.content = summary_content
-            existing_document.content_hash = content_hash
-            existing_document.embedding = summary_embedding
-            existing_document.document_metadata = {
-                "url": url,
-                "video_id": video_id,
-                "video_title": video_data.get("title", "YouTube Video"),
-                "author": video_data.get("author_name", "Unknown"),
-                "thumbnail": video_data.get("thumbnail_url", ""),
-            }
-            existing_document.chunks = chunks
-            existing_document.blocknote_document = blocknote_json
-            existing_document.updated_at = get_current_timestamp()
+        document.title = video_data.get("title", "YouTube Video")
+        document.content = summary_content
+        document.content_hash = content_hash
+        document.embedding = summary_embedding
+        document.document_metadata = {
+            "url": url,
+            "video_id": video_id,
+            "video_title": video_data.get("title", "YouTube Video"),
+            "author": video_data.get("author_name", "Unknown"),
+            "thumbnail": video_data.get("thumbnail_url", ""),
+        }
+        safe_set_chunks(document, chunks)
+        document.blocknote_document = blocknote_json
+        document.status = DocumentStatus.ready()  # READY status - fully processed
+        document.updated_at = get_current_timestamp()

-            await session.commit()
-            await session.refresh(existing_document)
-            document = existing_document
-        else:
-            # Create new document
-            await task_logger.log_task_progress(
-                log_entry,
-                f"Creating YouTube video document in database: {video_data.get('title', 'YouTube Video')}",
-                {"stage": "document_creation", "chunks_count": len(chunks)},
-            )
-
-            document = Document(
-                title=video_data.get("title", "YouTube Video"),
-                document_type=DocumentType.YOUTUBE_VIDEO,
-                document_metadata={
-                    "url": url,
-                    "video_id": video_id,
-                    "video_title": video_data.get("title", "YouTube Video"),
-                    "author": video_data.get("author_name", "Unknown"),
-                    "thumbnail": video_data.get("thumbnail_url", ""),
-                },
-                content=summary_content,
-                embedding=summary_embedding,
-                chunks=chunks,
-                search_space_id=search_space_id,
-                content_hash=content_hash,
-                unique_identifier_hash=unique_identifier_hash,
-                blocknote_document=blocknote_json,
-                updated_at=get_current_timestamp(),
-                created_by_id=user_id,
-            )
-
-            session.add(document)
-            await session.commit()
-            await session.refresh(document)
+        await session.commit()
+        await session.refresh(document)

        # Log success
        await task_logger.log_task_success(
@ -395,27 +428,51 @@ async def add_youtube_video_document(
        )

        return document
+
    except SQLAlchemyError as db_error:
-        await session.rollback()
+        # Mark document as failed if it exists
+        if document:
+            try:
+                document.status = DocumentStatus.failed(
+                    f"Database error: {str(db_error)[:150]}"
+                )
+                document.updated_at = get_current_timestamp()
+                await session.commit()
+            except Exception:
+                await session.rollback()
+        else:
+            await session.rollback()
+
        await task_logger.log_task_failure(
            log_entry,
            f"Database error while processing YouTube video: {url}",
            str(db_error),
            {
                "error_type": "SQLAlchemyError",
-                "video_id": video_id if "video_id" in locals() else None,
+                "video_id": video_id,
            },
        )
        raise db_error
+
    except Exception as e:
-        await session.rollback()
+        # Mark document as failed if it exists
+        if document:
+            try:
+                document.status = DocumentStatus.failed(str(e)[:200])
+                document.updated_at = get_current_timestamp()
+                await session.commit()
+            except Exception:
+                await session.rollback()
+        else:
+            await session.rollback()
+
        await task_logger.log_task_failure(
            log_entry,
            f"Failed to process YouTube video: {url}",
            str(e),
            {
                "error_type": type(e).__name__,
-                "video_id": video_id if "video_id" in locals() else None,
+                "video_id": video_id,
            },
        )
        logging.error(f"Failed to process YouTube video: {e!s}")
--- a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx
@ -13,6 +13,7 @@ import {
 	llmPreferencesAtom,
 } from "@/atoms/new-llm-config/new-llm-config-query.atoms";
 import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
+import { ConnectorIndicator } from "@/components/assistant-ui/connector-popup";
 import { DocumentUploadDialogProvider } from "@/components/assistant-ui/document-upload-popup";
 import { DashboardBreadcrumb } from "@/components/dashboard-breadcrumb";
 import { LayoutDataProvider } from "@/components/layout";
@ -192,6 +193,8 @@ export function DashboardClientLayout({
 			<LayoutDataProvider searchSpaceId={searchSpaceId} breadcrumb={<DashboardBreadcrumb />}>
 				{children}
 			</LayoutDataProvider>
+			{/* Global connector dialog - triggered from documents page */}
+			<ConnectorIndicator hideTrigger />
 		</DocumentUploadDialogProvider>
 	);
 }
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
@ -1,10 +1,12 @@
 "use client";

 import type React from "react";
+import { useRef, useState, useEffect } from "react";
 import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
+import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";

-export function getDocumentTypeIcon(type: string): React.ReactNode {
-	return getConnectorIcon(type);
+export function getDocumentTypeIcon(type: string, className?: string): React.ReactNode {
+	return getConnectorIcon(type, className);
 }

 export function getDocumentTypeLabel(type: string): string {
@ -15,16 +17,43 @@ export function getDocumentTypeLabel(type: string): string {
 }

 export function DocumentTypeChip({ type, className }: { type: string; className?: string }) {
-	const icon = getDocumentTypeIcon(type);
-	return (
-		<span
-			className={
-				"inline-flex items-center gap-1.5 rounded-full border border-border bg-primary/5 px-2 py-1 text-xs font-medium " +
-				(className ?? "")
+	const icon = getDocumentTypeIcon(type, "h-4 w-4");
+	const fullLabel = getDocumentTypeLabel(type);
+	const textRef = useRef<HTMLSpanElement>(null);
+	const [isTruncated, setIsTruncated] = useState(false);
+
+	useEffect(() => {
+		const checkTruncation = () => {
+			if (textRef.current) {
+				setIsTruncated(textRef.current.scrollWidth > textRef.current.clientWidth);
 			}
+		};
+		checkTruncation();
+		window.addEventListener("resize", checkTruncation);
+		return () => window.removeEventListener("resize", checkTruncation);
+	}, []);
+
+	const chip = (
+		<span
+			className={`inline-flex items-center gap-1.5 rounded bg-muted/40 px-2 py-1 text-xs text-muted-foreground max-w-full overflow-hidden ${className ?? ""}`}
 		>
-			<span className="text-primary">{icon}</span>
-			{getDocumentTypeLabel(type)}
+			<span className="opacity-80 flex-shrink-0">{icon}</span>
+			<span ref={textRef} className="truncate min-w-0">
+				{fullLabel}
+			</span>
 		</span>
 	);
+
+	if (isTruncated) {
+		return (
+			<Tooltip>
+				<TooltipTrigger asChild>{chip}</TooltipTrigger>
+				<TooltipContent side="top" className="max-w-xs">
+					<p>{fullLabel}</p>
+				</TooltipContent>
+			</Tooltip>
+		);
+	}
+
+	return chip;
 }
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
@ -1,9 +1,21 @@
 "use client";

-import { CircleAlert, CircleX, Columns3, Filter, ListFilter, Trash } from "lucide-react";
-import { AnimatePresence, motion, type Variants } from "motion/react";
+import { useSetAtom } from "jotai";
+import {
+	CircleAlert,
+	CircleX,
+	FilePlus2,
+	FileType,
+	ListFilter,
+	Search,
+	SlidersHorizontal,
+	Trash,
+} from "lucide-react";
+import { motion } from "motion/react";
 import { useTranslations } from "next-intl";
-import React, { useMemo, useRef } from "react";
+import React, { useMemo, useRef, useState } from "react";
+import { connectorDialogOpenAtom } from "@/atoms/connector-dialog/connector-dialog.atoms";
+import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
 import {
 	AlertDialog,
 	AlertDialogAction,
@ -17,24 +29,10 @@ import {
 } from "@/components/ui/alert-dialog";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import {
-	DropdownMenu,
-	DropdownMenuCheckboxItem,
-	DropdownMenuContent,
-	DropdownMenuLabel,
-	DropdownMenuTrigger,
-} from "@/components/ui/dropdown-menu";
 import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
 import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
 import type { DocumentTypeEnum } from "@/contracts/types/document.types";
-import type { ColumnVisibility } from "./types";
-
-const fadeInScale: Variants = {
-	hidden: { opacity: 0, scale: 0.95 },
-	visible: { opacity: 1, scale: 1, transition: { type: "spring", stiffness: 300, damping: 30 } },
-	exit: { opacity: 0, scale: 0.95, transition: { duration: 0.15 } },
-};
+import { getDocumentTypeIcon, getDocumentTypeLabel } from "./DocumentTypeIcon";

 export function DocumentsFilters({
 	typeCounts: typeCountsRecord,
@ -44,8 +42,6 @@ export function DocumentsFilters({
 	onBulkDelete,
 	onToggleType,
 	activeTypes,
-	columnVisibility,
-	onToggleColumn,
 }: {
 	typeCounts: Partial<Record<DocumentTypeEnum, number>>;
 	selectedIds: Set<number>;
@ -54,17 +50,27 @@ export function DocumentsFilters({
 	onBulkDelete: () => Promise<void>;
 	onToggleType: (type: DocumentTypeEnum, checked: boolean) => void;
 	activeTypes: DocumentTypeEnum[];
-	columnVisibility: ColumnVisibility;
-	onToggleColumn: (id: keyof ColumnVisibility, checked: boolean) => void;
 }) {
 	const t = useTranslations("documents");
 	const id = React.useId();
 	const inputRef = useRef<HTMLInputElement>(null);

+	// Dialog hooks for action buttons
+	const { openDialog: openUploadDialog } = useDocumentUploadDialog();
+	const setConnectorDialogOpen = useSetAtom(connectorDialogOpenAtom);
+
+	const [typeSearchQuery, setTypeSearchQuery] = useState("");
+
 	const uniqueTypes = useMemo(() => {
 		return Object.keys(typeCountsRecord).sort() as DocumentTypeEnum[];
 	}, [typeCountsRecord]);

+	const filteredTypes = useMemo(() => {
+		if (!typeSearchQuery.trim()) return uniqueTypes;
+		const query = typeSearchQuery.toLowerCase();
+		return uniqueTypes.filter((type) => getDocumentTypeLabel(type).toLowerCase().includes(query));
+	}, [uniqueTypes, typeSearchQuery]);
+
 	const typeCounts = useMemo(() => {
 		const map = new Map<string, number>();
 		for (const [type, count] of Object.entries(typeCountsRecord)) {
@ -75,202 +81,233 @@ export function DocumentsFilters({

 	return (
 		<motion.div
-			className="flex flex-wrap items-center justify-start gap-3 w-full"
+			className="flex flex-col gap-4"
 			initial={{ opacity: 0, y: 10 }}
 			animate={{ opacity: 1, y: 0 }}
 			transition={{ type: "spring", stiffness: 300, damping: 30, delay: 0.1 }}
 		>
-			<div className="flex items-center gap-3 flex-wrap w-full sm:w-auto">
+			{/* Main toolbar row */}
+			<div className="flex flex-wrap items-center gap-3">
+				{/* Action Buttons - Left Side */}
+				<div className="flex items-center gap-2">
+					<Button
+						onClick={openUploadDialog}
+						variant="outline"
+						size="sm"
+						className="h-9 gap-2 bg-white text-gray-700 border-white hover:bg-gray-50 dark:bg-white dark:text-gray-800 dark:hover:bg-gray-100"
+					>
+						<FilePlus2 size={16} />
+						<span>Upload documents</span>
+					</Button>
+					<Button
+						onClick={() => setConnectorDialogOpen(true)}
+						variant="outline"
+						size="sm"
+						className="h-9 gap-2 bg-white text-gray-700 border-white hover:bg-gray-50 dark:bg-white dark:text-gray-800 dark:hover:bg-gray-100"
+					>
+						<SlidersHorizontal size={16} />
+						<span>Manage connectors</span>
+					</Button>
+				</div>
+
+				{/* Spacer */}
+				<div className="flex-1" />
+
+				{/* Search Input */}
 				<motion.div
-					className="relative w-full sm:w-auto"
+					className="relative w-[180px]"
 					initial={{ opacity: 0, y: -10 }}
 					animate={{ opacity: 1, y: 0 }}
 					transition={{ type: "spring", stiffness: 300, damping: 30 }}
 				>
+					<div className="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3 text-muted-foreground">
+						<ListFilter size={14} aria-hidden="true" />
+					</div>
 					<Input
 						id={`${id}-input`}
 						ref={inputRef}
-						className="peer w-full sm:min-w-60 ps-9"
+						className="peer h-9 w-full pl-9 pr-9 text-sm bg-background border-border/60 focus-visible:ring-1 focus-visible:ring-ring/30"
 						value={searchValue}
 						onChange={(e) => onSearch(e.target.value)}
-						placeholder={t("filter_placeholder")}
+						placeholder="Filter by title"
 						type="text"
 						aria-label={t("filter_placeholder")}
 					/>
-					<motion.div
-						className="pointer-events-none absolute inset-y-0 start-0 flex items-center justify-center ps-3 text-muted-foreground/80 peer-disabled:opacity-50"
-						initial={{ scale: 0.8 }}
-						animate={{ scale: 1 }}
-						transition={{ delay: 0.1 }}
-					>
-						<ListFilter size={16} strokeWidth={2} aria-hidden="true" />
-					</motion.div>
 					{Boolean(searchValue) && (
 						<motion.button
-							className="absolute inset-y-0 end-0 flex h-full w-9 items-center justify-center rounded-e-lg text-muted-foreground/80 outline-offset-2 transition-colors hover:text-foreground focus:z-10 focus-visible:outline focus-visible:outline-ring/70"
+							className="absolute inset-y-0 right-0 flex h-full w-9 items-center justify-center rounded-r-md text-muted-foreground/60 hover:text-foreground transition-colors"
 							aria-label="Clear filter"
 							onClick={() => {
 								onSearch("");
 								inputRef.current?.focus();
 							}}
-							initial={{ opacity: 0, rotate: -90 }}
-							animate={{ opacity: 1, rotate: 0 }}
-							exit={{ opacity: 0, rotate: 90 }}
+							initial={{ opacity: 0, scale: 0.8 }}
+							animate={{ opacity: 1, scale: 1 }}
+							exit={{ opacity: 0, scale: 0.8 }}
 							whileHover={{ scale: 1.1 }}
 							whileTap={{ scale: 0.9 }}
 						>
-							<CircleX size={16} strokeWidth={2} aria-hidden="true" />
+							<CircleX size={14} strokeWidth={2} aria-hidden="true" />
 						</motion.button>
 					)}
 				</motion.div>

-				<Popover>
-					<PopoverTrigger asChild>
-						<motion.div
-							whileHover={{ scale: 1.05 }}
-							whileTap={{ scale: 0.95 }}
-							transition={{ type: "spring", stiffness: 400, damping: 17 }}
-						>
-							<Button variant="outline">
-								<Filter
-									className="-ms-1 me-2 opacity-60"
-									size={16}
-									strokeWidth={2}
-									aria-hidden="true"
-								/>
-								Type
+				{/* Filter Buttons Group */}
+				<div className="flex items-center gap-2 flex-wrap">
+					{/* Type Filter */}
+					<Popover>
+						<PopoverTrigger asChild>
+							<Button
+								variant="outline"
+								size="sm"
+								className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border"
+							>
+								<FileType size={14} className="text-muted-foreground" />
+								<span className="hidden sm:inline">Type</span>
 								{activeTypes.length > 0 && (
-									<motion.span
-										initial={{ scale: 0.8 }}
-										animate={{ scale: 1 }}
-										className="-me-1 ms-3 inline-flex h-5 max-h-full items-center rounded border border-border bg-background px-1 text-[0.625rem] font-medium text-muted-foreground/70"
-									>
+									<span className="flex h-5 w-5 items-center justify-center rounded-full bg-primary text-[10px] font-medium text-primary-foreground">
 										{activeTypes.length}
-									</motion.span>
+									</span>
 								)}
 							</Button>
-						</motion.div>
-					</PopoverTrigger>
-					<PopoverContent className="min-w-36 p-3" align="start">
-						<motion.div initial="hidden" animate="visible" exit="exit" variants={fadeInScale}>
-							<div className="space-y-3">
-								<div className="text-xs font-medium text-muted-foreground">Filters</div>
-								<div className="space-y-3">
-									<AnimatePresence>
-										{uniqueTypes.map((value: DocumentTypeEnum, i) => (
-											<motion.div
+						</PopoverTrigger>
+						<PopoverContent className="w-64 !p-0 overflow-hidden" align="end">
+							<div>
+								{/* Search input */}
+								<div className="p-2 border-b border-border/50">
+									<div className="relative">
+										<Search className="absolute left-0.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" />
+										<Input
+											placeholder="Search types..."
+											value={typeSearchQuery}
+											onChange={(e) => setTypeSearchQuery(e.target.value)}
+											className="h-6 pl-6 text-sm bg-transparent border-0 focus-visible:ring-0"
+										/>
+									</div>
+								</div>
+
+								<div className="max-h-[300px] overflow-y-auto overflow-x-hidden py-1.5 px-1.5">
+									{filteredTypes.length === 0 ? (
+										<div className="py-6 text-center text-sm text-muted-foreground">
+											No types found
+										</div>
+									) : (
+										filteredTypes.map((value: DocumentTypeEnum, i) => (
+											<div
 												key={value}
-												className="flex items-center gap-2"
-												initial={{ opacity: 0, y: -5 }}
-												animate={{ opacity: 1, y: 0 }}
-												exit={{ opacity: 0, y: 5 }}
-												transition={{ delay: i * 0.05 }}
+												role="button"
+												tabIndex={0}
+												className="flex w-full items-center gap-2.5 py-2 px-3 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
+												onClick={() => onToggleType(value, !activeTypes.includes(value))}
+												onKeyDown={(e) => {
+													if (e.key === "Enter" || e.key === " ") {
+														e.preventDefault();
+														onToggleType(value, !activeTypes.includes(value));
+													}
+												}}
 											>
+												{/* Icon */}
+												<div className="flex h-7 w-7 shrink-0 items-center justify-center rounded-md bg-muted/50 text-foreground/80">
+													{getDocumentTypeIcon(value, "h-4 w-4")}
+												</div>
+												{/* Text content */}
+												<div className="flex flex-col min-w-0 flex-1 gap-0.5">
+													<span className="text-[13px] font-medium text-foreground truncate leading-tight">
+														{getDocumentTypeLabel(value)}
+													</span>
+													<span className="text-[11px] text-muted-foreground leading-tight">
+														{typeCounts.get(value)} document
+														{(typeCounts.get(value) ?? 0) !== 1 ? "s" : ""}
+													</span>
+												</div>
+												{/* Checkbox */}
 												<Checkbox
 													id={`${id}-${i}`}
 													checked={activeTypes.includes(value)}
 													onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
+													className="h-4 w-4 shrink-0 rounded border-muted-foreground/30 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
 												/>
-												<Label
-													htmlFor={`${id}-${i}`}
-													className="flex grow justify-between gap-2 font-normal"
-												>
-													{value}{" "}
-													<span className="ms-2 text-xs text-muted-foreground">
-														{typeCounts.get(value)}
-													</span>
-												</Label>
-											</motion.div>
-										))}
-									</AnimatePresence>
+											</div>
+										))
+									)}
 								</div>
+								{activeTypes.length > 0 && (
+									<div className="px-3 pt-1.5 pb-1.5 border-t border-border/50">
+										<Button
+											variant="ghost"
+											size="sm"
+											className="w-full h-7 text-[11px] text-muted-foreground hover:text-foreground"
+											onClick={() => {
+												activeTypes.forEach((t) => {
+													onToggleType(t, false);
+												});
+											}}
+										>
+											Clear filters
+										</Button>
+									</div>
+								)}
 							</div>
-						</motion.div>
-					</PopoverContent>
-				</Popover>
+						</PopoverContent>
+					</Popover>

-				<DropdownMenu>
-					<DropdownMenuTrigger asChild>
-						<motion.div
-							whileHover={{ scale: 1.05 }}
-							whileTap={{ scale: 0.95 }}
-							transition={{ type: "spring", stiffness: 400, damping: 17 }}
-						>
-							<Button variant="outline">
-								<Columns3
-									className="-ms-1 me-2 opacity-60"
-									size={16}
-									strokeWidth={2}
-									aria-hidden="true"
-								/>
-								View
-							</Button>
-						</motion.div>
-					</DropdownMenuTrigger>
-					<DropdownMenuContent align="end">
-						<DropdownMenuLabel>Toggle columns</DropdownMenuLabel>
-						{(
-							[
-								["title", "Title"],
-								["document_type", "Type"],
-								["content", "Content"],
-								["created_at", "Created At"],
-							] as Array<[keyof ColumnVisibility, string]>
-						).map(([key, label]) => (
-							<DropdownMenuCheckboxItem
-								key={key}
-								className="capitalize"
-								checked={columnVisibility[key]}
-								onCheckedChange={(v) => onToggleColumn(key, !!v)}
-								onSelect={(e) => e.preventDefault()}
-							>
-								{label}
-							</DropdownMenuCheckboxItem>
-						))}
-					</DropdownMenuContent>
-				</DropdownMenu>
-			</div>
-
-			<div className="flex items-center gap-3 w-full sm:w-auto sm:ml-auto">
-				{selectedIds.size > 0 && (
-					<AlertDialog>
-						<AlertDialogTrigger asChild>
-							<Button className="w-full sm:w-auto" variant="outline">
-								<Trash
-									className="-ms-1 me-2 opacity-60"
-									size={16}
-									strokeWidth={2}
-									aria-hidden="true"
-								/>
-								Delete
-								<span className="-me-1 ms-3 inline-flex h-5 max-h-full items-center rounded border border-border bg-background px-1 text-[0.625rem] font-medium text-muted-foreground/70">
-									{selectedIds.size}
-								</span>
-							</Button>
-						</AlertDialogTrigger>
-						<AlertDialogContent>
-							<div className="flex flex-col gap-2 max-sm:items-center sm:flex-row sm:gap-4">
-								<div
-									className="flex size-9 shrink-0 items-center justify-center rounded-full border border-border"
-									aria-hidden="true"
+					{/* Bulk Delete Button */}
+					{selectedIds.size > 0 && (
+						<AlertDialog>
+							<AlertDialogTrigger asChild>
+								<motion.div
+									initial={{ opacity: 0, scale: 0.9 }}
+									animate={{ opacity: 1, scale: 1 }}
+									exit={{ opacity: 0, scale: 0.9 }}
 								>
-									<CircleAlert className="opacity-80" size={16} strokeWidth={2} />
+									{/* Mobile: icon with count */}
+									<Button variant="destructive" size="sm" className="h-9 gap-1.5 px-2.5 md:hidden">
+										<Trash size={14} />
+										<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
+											{selectedIds.size}
+										</span>
+									</Button>
+									{/* Desktop: full button */}
+									<Button variant="destructive" size="sm" className="h-9 gap-2 hidden md:flex">
+										<Trash size={14} />
+										Delete
+										<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
+											{selectedIds.size}
+										</span>
+									</Button>
+								</motion.div>
+							</AlertDialogTrigger>
+							<AlertDialogContent className="max-w-md">
+								<div className="flex flex-col gap-2 sm:flex-row sm:gap-4">
+									<div
+										className="flex size-10 shrink-0 items-center justify-center rounded-full bg-destructive/10 text-destructive"
+										aria-hidden="true"
+									>
+										<CircleAlert size={18} strokeWidth={2} />
+									</div>
+									<AlertDialogHeader className="flex-1">
+										<AlertDialogTitle>
+											Delete {selectedIds.size} document{selectedIds.size !== 1 ? "s" : ""}?
+										</AlertDialogTitle>
+										<AlertDialogDescription>
+											This action cannot be undone. This will permanently delete the selected{" "}
+											{selectedIds.size === 1 ? "document" : "documents"} from your search space.
+										</AlertDialogDescription>
+									</AlertDialogHeader>
 								</div>
-								<AlertDialogHeader>
-									<AlertDialogTitle>Are you absolutely sure?</AlertDialogTitle>
-									<AlertDialogDescription>
-										This action cannot be undone. This will permanently delete {selectedIds.size}{" "}
-										selected {selectedIds.size === 1 ? "row" : "rows"}.
-									</AlertDialogDescription>
-								</AlertDialogHeader>
-							</div>
-							<AlertDialogFooter>
-								<AlertDialogCancel>Cancel</AlertDialogCancel>
-								<AlertDialogAction onClick={onBulkDelete}>Delete</AlertDialogAction>
-							</AlertDialogFooter>
-						</AlertDialogContent>
-					</AlertDialog>
-				)}
+								<AlertDialogFooter>
+									<AlertDialogCancel>Cancel</AlertDialogCancel>
+									<AlertDialogAction
+										onClick={onBulkDelete}
+										className="bg-destructive text-destructive-foreground hover:bg-destructive/90"
+									>
+										Delete
+									</AlertDialogAction>
+								</AlertDialogFooter>
+							</AlertDialogContent>
+						</AlertDialog>
+					)}
+				</div>
 			</div>
 		</motion.div>
 	);
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@ -1,14 +1,30 @@
 "use client";

-import { ChevronDown, ChevronUp, FileX, Plus } from "lucide-react";
+import { formatDistanceToNow } from "date-fns";
+import {
+	AlertCircle,
+	Calendar,
+	CheckCircle2,
+	ChevronDown,
+	ChevronUp,
+	Clock,
+	FileText,
+	FileX,
+	Loader2,
+	Network,
+	Plus,
+	User,
+} from "lucide-react";
 import { motion } from "motion/react";
-import { useParams } from "next/navigation";
 import { useTranslations } from "next-intl";
-import React from "react";
+import React, { useRef, useState, useEffect, useCallback } from "react";
 import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
-import { DocumentViewer } from "@/components/document-viewer";
+import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
+import { MarkdownViewer } from "@/components/markdown-viewer";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
+import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog";
+import { Skeleton } from "@/components/ui/skeleton";
 import { Spinner } from "@/components/ui/spinner";
 import {
 	Table,
@ -19,9 +35,64 @@ import {
 	TableRow,
 } from "@/components/ui/table";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
-import { DocumentTypeChip, getDocumentTypeIcon } from "./DocumentTypeIcon";
+import { documentsApiService } from "@/lib/apis/documents-api.service";
+import { DocumentTypeChip } from "./DocumentTypeIcon";
 import { RowActions } from "./RowActions";
-import type { ColumnVisibility, Document } from "./types";
+import type { ColumnVisibility, Document, DocumentStatus } from "./types";
+
+// Status indicator component for document processing status
+function StatusIndicator({ status }: { status?: DocumentStatus }) {
+	const state = status?.state ?? "ready";
+
+	switch (state) {
+		case "pending":
+			return (
+				<Tooltip>
+					<TooltipTrigger asChild>
+						<div className="flex items-center justify-center">
+							<Clock className="h-5 w-5 text-muted-foreground/60" />
+						</div>
+					</TooltipTrigger>
+					<TooltipContent side="top">Pending - waiting to be synced</TooltipContent>
+				</Tooltip>
+			);
+		case "processing":
+			return (
+				<Tooltip>
+					<TooltipTrigger asChild>
+						<div className="flex items-center justify-center">
+							<Spinner size="sm" className="text-primary" />
+						</div>
+					</TooltipTrigger>
+					<TooltipContent side="top">Syncing</TooltipContent>
+				</Tooltip>
+			);
+		case "failed":
+			return (
+				<Tooltip>
+					<TooltipTrigger asChild>
+						<div className="flex items-center justify-center">
+							<AlertCircle className="h-5 w-5 text-destructive" />
+						</div>
+					</TooltipTrigger>
+					<TooltipContent side="top" className="max-w-xs">
+						{status?.reason || "Processing failed"}
+					</TooltipContent>
+				</Tooltip>
+			);
+		case "ready":
+			return (
+				<Tooltip>
+					<TooltipTrigger asChild>
+						<div className="flex items-center justify-center">
+							<CheckCircle2 className="h-5 w-5 text-muted-foreground/60" />
+						</div>
+					</TooltipTrigger>
+					<TooltipContent side="top">Ready</TooltipContent>
+				</Tooltip>
+			);
+	}
+}

 export type SortKey = keyof Pick<Document, "title" | "document_type" | "created_at">;

@ -36,57 +107,215 @@ function sortDocuments(docs: Document[], key: SortKey, desc: boolean): Document[
 	return desc ? sorted.reverse() : sorted;
 }

-function truncate(text: string, len = 150): string {
-	const plain = text
-		.replace(/[#*_`>\-[\]()]+/g, " ")
-		.replace(/\s+/g, " ")
-		.trim();
-	if (plain.length <= len) return plain;
-	return `${plain.slice(0, len)}...`;
+function formatRelativeDate(dateStr: string): string {
+	return formatDistanceToNow(new Date(dateStr), { addSuffix: true });
+}
+
+function formatAbsoluteDate(dateStr: string): string {
+	const date = new Date(dateStr);
+	return date.toLocaleString("en-US", {
+		year: "numeric",
+		month: "long",
+		day: "numeric",
+		hour: "2-digit",
+		minute: "2-digit",
+		hour12: false,
+	});
+}
+
+function TruncatedText({ text, className }: { text: string; className?: string }) {
+	const textRef = useRef<HTMLSpanElement>(null);
+	const [isTruncated, setIsTruncated] = useState(false);
+
+	useEffect(() => {
+		const checkTruncation = () => {
+			if (textRef.current) {
+				setIsTruncated(textRef.current.scrollWidth > textRef.current.clientWidth);
+			}
+		};
+		checkTruncation();
+		window.addEventListener("resize", checkTruncation);
+		return () => window.removeEventListener("resize", checkTruncation);
+	}, []);
+
+	if (isTruncated) {
+		return (
+			<Tooltip>
+				<TooltipTrigger asChild>
+					<span ref={textRef} className={className}>
+						{text}
+					</span>
+				</TooltipTrigger>
+				<TooltipContent side="top" className="max-w-xs">
+					<p className="break-words">{text}</p>
+				</TooltipContent>
+			</Tooltip>
+		);
+	}
+
+	return (
+		<span ref={textRef} className={className}>
+			{text}
+		</span>
+	);
+}
+
+function SortableHeader({
+	children,
+	sortKey,
+	currentSortKey,
+	sortDesc,
+	onSort,
+	icon,
+}: {
+	children: React.ReactNode;
+	sortKey: SortKey;
+	currentSortKey: SortKey;
+	sortDesc: boolean;
+	onSort: (key: SortKey) => void;
+	icon?: React.ReactNode;
+}) {
+	const isActive = currentSortKey === sortKey;
+	return (
+		<button
+			type="button"
+			onClick={() => onSort(sortKey)}
+			className="flex items-center gap-1.5 text-left text-sm font-medium text-muted-foreground/70 hover:text-muted-foreground transition-colors group"
+		>
+			{icon && <span className="opacity-60">{icon}</span>}
+			{children}
+			<span
+				className={`transition-opacity ${isActive ? "opacity-100" : "opacity-0 group-hover:opacity-50"}`}
+			>
+				{isActive && sortDesc ? <ChevronDown size={14} /> : <ChevronUp size={14} />}
+			</span>
+		</button>
+	);
 }

 export function DocumentsTableShell({
 	documents,
 	loading,
 	error,
-	onRefresh,
 	selectedIds,
 	setSelectedIds,
 	columnVisibility,
-	deleteDocument,
 	sortKey,
 	sortDesc,
 	onSortChange,
+	deleteDocument,
+	searchSpaceId,
 }: {
 	documents: Document[];
 	loading: boolean;
 	error: boolean;
-	onRefresh: () => Promise<void>;
 	selectedIds: Set<number>;
 	setSelectedIds: (update: Set<number>) => void;
 	columnVisibility: ColumnVisibility;
-	deleteDocument: (id: number) => Promise<boolean>;
 	sortKey: SortKey;
 	sortDesc: boolean;
 	onSortChange: (key: SortKey) => void;
+	deleteDocument: (id: number) => Promise<boolean>;
+	searchSpaceId: string;
 }) {
 	const t = useTranslations("documents");
-	const params = useParams();
-	const searchSpaceId = params.search_space_id;
 	const { openDialog } = useDocumentUploadDialog();

+	// State for metadata viewer (opened via Ctrl/Cmd+Click)
+	// Real-time documents don't sync metadata - we fetch on-demand when viewing
+	const [metadataDoc, setMetadataDoc] = useState<Document | null>(null);
+	const [metadataContent, setMetadataContent] = useState<any>(null);
+	const [metadataLoading, setMetadataLoading] = useState(false);
+
+	// State for lazy document content viewer
+	// Real-time documents don't sync content - we fetch on-demand when viewing
+	const [viewingDoc, setViewingDoc] = useState<Document | null>(null);
+	const [viewingContent, setViewingContent] = useState<string>("");
+	const [viewingLoading, setViewingLoading] = useState(false);
+
+	// Fetch document metadata on-demand when metadata viewer is opened
+	const handleViewMetadata = useCallback(async (doc: Document) => {
+		setMetadataDoc(doc);
+
+		// If metadata is already available (from API/search), use it directly
+		if (doc.document_metadata) {
+			setMetadataContent(doc.document_metadata);
+			return;
+		}
+
+		// Otherwise, fetch from API (lazy loading for real-time synced documents)
+		setMetadataLoading(true);
+		try {
+			const fullDoc = await documentsApiService.getDocument({ id: doc.id });
+			setMetadataContent(fullDoc.document_metadata);
+		} catch (err) {
+			console.error("[DocumentsTableShell] Failed to fetch document metadata:", err);
+			setMetadataContent(null);
+		} finally {
+			setMetadataLoading(false);
+		}
+	}, []);
+
+	// Close metadata viewer
+	const handleCloseMetadata = useCallback(() => {
+		setMetadataDoc(null);
+		setMetadataContent(null);
+		setMetadataLoading(false);
+	}, []);
+
+	// Fetch document content on-demand when viewer is opened
+	const handleViewDocument = useCallback(async (doc: Document) => {
+		setViewingDoc(doc);
+
+		// If content is already available (from API/search), use it directly
+		if (doc.content) {
+			setViewingContent(doc.content);
+			return;
+		}
+
+		// Otherwise, fetch from API (lazy loading for real-time synced documents)
+		setViewingLoading(true);
+		try {
+			const fullDoc = await documentsApiService.getDocument({ id: doc.id });
+			setViewingContent(fullDoc.content);
+		} catch (err) {
+			console.error("[DocumentsTableShell] Failed to fetch document content:", err);
+			setViewingContent("Failed to load document content.");
+		} finally {
+			setViewingLoading(false);
+		}
+	}, []);
+
+	// Close document viewer
+	const handleCloseViewer = useCallback(() => {
+		setViewingDoc(null);
+		setViewingContent("");
+		setViewingLoading(false);
+	}, []);
+
 	const sorted = React.useMemo(
 		() => sortDocuments(documents, sortKey, sortDesc),
 		[documents, sortKey, sortDesc]
 	);

-	const allSelectedOnPage = sorted.length > 0 && sorted.every((d) => selectedIds.has(d.id));
-	const someSelectedOnPage = sorted.some((d) => selectedIds.has(d.id)) && !allSelectedOnPage;
+	// Helper: check if document can be selected (not processing/pending)
+	const isSelectable = (doc: Document) => {
+		const state = doc.status?.state;
+		return state !== "pending" && state !== "processing";
+	};
+
+	// Only consider selectable documents for "select all" logic
+	const selectableDocs = sorted.filter(isSelectable);
+	const allSelectedOnPage =
+		selectableDocs.length > 0 && selectableDocs.every((d) => selectedIds.has(d.id));
+	const someSelectedOnPage =
+		selectableDocs.some((d) => selectedIds.has(d.id)) && !allSelectedOnPage;

 	const toggleAll = (checked: boolean) => {
 		const next = new Set(selectedIds);
 		if (checked)
-			sorted.forEach((d) => {
+			// Only select documents that are not processing/pending
+			selectableDocs.forEach((d) => {
 				next.add(d.id);
 			});
 		else
@ -107,39 +336,139 @@ export function DocumentsTableShell({

 	return (
 		<motion.div
-			className="rounded-md border mt-6 overflow-hidden"
+			className="rounded-lg border border-border/40 bg-background overflow-hidden"
 			initial={{ opacity: 0, y: 20 }}
 			animate={{ opacity: 1, y: 0 }}
 			transition={{ type: "spring", stiffness: 300, damping: 30, delay: 0.2 }}
 		>
 			{loading ? (
-				<div className="flex h-[400px] w-full items-center justify-center">
-					<div className="flex flex-col items-center gap-2">
-						<Spinner size="lg" className="text-primary" />
-						<p className="text-sm text-muted-foreground">{t("loading")}</p>
+				<>
+					{/* Desktop Skeleton View */}
+					<div className="hidden md:flex md:flex-col">
+						<Table className="table-fixed w-full">
+							<TableHeader>
+								<TableRow className="hover:bg-transparent border-b border-border/40">
+									<TableHead className="w-8 px-0 text-center">
+										<div className="flex items-center justify-center h-full">
+											<Skeleton className="h-4 w-4 rounded" />
+										</div>
+									</TableHead>
+									<TableHead className="w-[35%] max-w-0 border-r border-border/40">
+										<Skeleton className="h-3 w-20" />
+									</TableHead>
+									{columnVisibility.document_type && (
+										<TableHead className="w-[20%] min-w-[120px] max-w-[200px] border-r border-border/40">
+											<Skeleton className="h-3 w-14" />
+										</TableHead>
+									)}
+									{columnVisibility.created_by && (
+										<TableHead className="w-36 border-r border-border/40">
+											<Skeleton className="h-3 w-10" />
+										</TableHead>
+									)}
+									{columnVisibility.created_at && (
+										<TableHead className="w-32 border-r border-border/40">
+											<Skeleton className="h-3 w-16" />
+										</TableHead>
+									)}
+									{columnVisibility.status && (
+										<TableHead className="w-20 text-center">
+											<Skeleton className="h-3 w-12 mx-auto" />
+										</TableHead>
+									)}
+									<TableHead className="w-10">
+										<span className="sr-only">Actions</span>
+									</TableHead>
+								</TableRow>
+							</TableHeader>
+						</Table>
+						<div className="h-[50vh] overflow-auto">
+							<Table className="table-fixed w-full">
+								<TableBody>
+									{[65, 80, 45, 72, 55, 88, 40, 60, 50, 75].map((widthPercent, index) => (
+										<TableRow
+											key={`skeleton-${index}`}
+											className="border-b border-border/40 hover:bg-transparent"
+										>
+											<TableCell className="w-8 px-0 py-2.5 text-center">
+												<div className="flex items-center justify-center h-full">
+													<Skeleton className="h-4 w-4 rounded" />
+												</div>
+											</TableCell>
+											<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/40">
+												<Skeleton className="h-4" style={{ width: `${widthPercent}%` }} />
+											</TableCell>
+											{columnVisibility.document_type && (
+												<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/40 overflow-hidden">
+													<Skeleton className="h-5 w-24 rounded" />
+												</TableCell>
+											)}
+											{columnVisibility.created_by && (
+												<TableCell className="w-36 py-2.5 truncate border-r border-border/40">
+													<Skeleton className="h-4 w-20" />
+												</TableCell>
+											)}
+											{columnVisibility.created_at && (
+												<TableCell className="w-32 py-2.5 border-r border-border/40">
+													<Skeleton className="h-4 w-20" />
+												</TableCell>
+											)}
+											{columnVisibility.status && (
+												<TableCell className="w-20 py-2.5 text-center">
+													<Skeleton className="h-5 w-5 mx-auto rounded-full" />
+												</TableCell>
+											)}
+											<TableCell className="w-10 py-2.5 text-center">
+												<Skeleton className="h-6 w-6 mx-auto rounded" />
+											</TableCell>
+										</TableRow>
+									))}
+								</TableBody>
+							</Table>
+						</div>
 					</div>
-				</div>
+					{/* Mobile Skeleton View */}
+					<div className="md:hidden divide-y divide-border/30 h-[50vh] overflow-auto">
+						{[70, 85, 55, 78, 62, 90].map((widthPercent, index) => (
+							<div key={`skeleton-mobile-${index}`} className="px-4 py-3">
+								<div className="flex items-start gap-3">
+									<Skeleton className="h-4 w-4 mt-0.5 rounded" />
+									<div className="flex-1 min-w-0 space-y-2">
+										<Skeleton className="h-4" style={{ width: `${widthPercent}%` }} />
+										<div className="flex flex-wrap items-center gap-2">
+											<Skeleton className="h-5 w-20 rounded" />
+											{columnVisibility.created_by && <Skeleton className="h-3 w-14" />}
+											{columnVisibility.created_at && <Skeleton className="h-3 w-20" />}
+										</div>
+									</div>
+									<div className="flex items-center gap-2">
+										{columnVisibility.status && <Skeleton className="h-5 w-5 rounded-full" />}
+										<Skeleton className="h-7 w-7 rounded" />
+									</div>
+								</div>
+							</div>
+						))}
+					</div>
+				</>
 			) : error ? (
-				<div className="flex h-[400px] w-full items-center justify-center">
-					<div className="flex flex-col items-center gap-2">
+				<div className="flex h-[50vh] w-full items-center justify-center">
+					<div className="flex flex-col items-center gap-3">
+						<AlertCircle className="h-8 w-8 text-destructive/60" />
 						<p className="text-sm text-destructive">{t("error_loading")}</p>
-						<Button variant="outline" size="sm" onClick={() => onRefresh()} className="mt-2">
-							{t("retry")}
-						</Button>
 					</div>
 				</div>
 			) : sorted.length === 0 ? (
-				<div className="flex h-[400px] w-full items-center justify-center">
+				<div className="flex h-[50vh] w-full items-center justify-center">
 					<motion.div
 						initial={{ opacity: 0, y: 20 }}
 						animate={{ opacity: 1, y: 0 }}
 						transition={{ duration: 0.4 }}
 						className="flex flex-col items-center gap-4 max-w-md px-4 text-center"
 					>
-						<div className="rounded-full bg-muted p-4">
-							<FileX className="h-8 w-8 text-muted-foreground" />
+						<div className="rounded-full bg-muted/50 p-4">
+							<FileX className="h-8 w-8 text-muted-foreground/60" />
 						</div>
-						<div className="space-y-2">
+						<div className="space-y-1.5">
 							<h3 className="text-lg font-semibold">{t("no_documents")}</h3>
 							<p className="text-sm text-muted-foreground">
 								Get started by uploading your first document.
@ -153,234 +482,301 @@ export function DocumentsTableShell({
 				</div>
 			) : (
 				<>
-					<div className="hidden md:block max-h-[60vh] overflow-auto">
+					{/* Desktop Table View - Notion Style */}
+					<div className="hidden md:flex md:flex-col">
+						{/* Fixed Header */}
 						<Table className="table-fixed w-full">
-							<TableHeader className="sticky top-0 bg-background">
-								<TableRow className="hover:bg-transparent">
-									<TableHead style={{ width: 28 }}>
-										<Checkbox
-											checked={allSelectedOnPage || (someSelectedOnPage && "indeterminate")}
-											onCheckedChange={(v) => toggleAll(!!v)}
-											aria-label="Select all"
-										/>
+							<TableHeader>
+								<TableRow className="hover:bg-transparent border-b border-border/40">
+									<TableHead className="w-8 px-0 text-center">
+										<div className="flex items-center justify-center h-full">
+											<Checkbox
+												checked={allSelectedOnPage || (someSelectedOnPage && "indeterminate")}
+												onCheckedChange={(v) => toggleAll(!!v)}
+												aria-label="Select all"
+												className="border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary"
+											/>
+										</div>
+									</TableHead>
+									<TableHead className="w-[35%] border-r border-border/40">
+										<SortableHeader
+											sortKey="title"
+											currentSortKey={sortKey}
+											sortDesc={sortDesc}
+											onSort={onSortHeader}
+											icon={<FileText size={14} className="text-muted-foreground" />}
+										>
+											Document
+										</SortableHeader>
 									</TableHead>
-									{columnVisibility.title && (
-										<TableHead style={{ width: 250 }}>
-											<Button
-												variant="ghost"
-												className="flex h-full w-full cursor-pointer select-none items-center justify-between gap-2"
-												onClick={() => onSortHeader("title")}
-											>
-												{t("title")}
-												{sortKey === "title" ? (
-													sortDesc ? (
-														<ChevronDown className="shrink-0 opacity-60" size={16} />
-													) : (
-														<ChevronUp className="shrink-0 opacity-60" size={16} />
-													)
-												) : null}
-											</Button>
-										</TableHead>
-									)}
 									{columnVisibility.document_type && (
-										<TableHead style={{ width: 180 }}>
-											<Button
-												variant="ghost"
-												className="flex h-full w-full cursor-pointer select-none items-center justify-between gap-2"
-												onClick={() => onSortHeader("document_type")}
+										<TableHead className="w-[20%] min-w-[120px] max-w-[200px] border-r border-border/40">
+											<SortableHeader
+												sortKey="document_type"
+												currentSortKey={sortKey}
+												sortDesc={sortDesc}
+												onSort={onSortHeader}
+												icon={<Network size={14} className="text-muted-foreground" />}
 											>
-												{t("type")}
-												{sortKey === "document_type" ? (
-													sortDesc ? (
-														<ChevronDown className="shrink-0 opacity-60" size={16} />
-													) : (
-														<ChevronUp className="shrink-0 opacity-60" size={16} />
-													)
-												) : null}
-											</Button>
+												Source
+											</SortableHeader>
 										</TableHead>
 									)}
-									{columnVisibility.content && (
-										<TableHead style={{ width: 300 }}>{t("content_summary")}</TableHead>
+									{columnVisibility.created_by && (
+										<TableHead className="w-36 border-r border-border/40">
+											<span className="flex items-center gap-1.5 text-sm font-medium text-muted-foreground/70">
+												<User size={14} className="opacity-60 text-muted-foreground" />
+												User
+											</span>
+										</TableHead>
 									)}
 									{columnVisibility.created_at && (
-										<TableHead style={{ width: 120 }}>
-											<Button
-												variant="ghost"
-												className="flex h-full w-full cursor-pointer select-none items-center justify-between gap-2"
-												onClick={() => onSortHeader("created_at")}
+										<TableHead className="w-32 border-r border-border/40">
+											<SortableHeader
+												sortKey="created_at"
+												currentSortKey={sortKey}
+												sortDesc={sortDesc}
+												onSort={onSortHeader}
+												icon={<Calendar size={14} className="text-muted-foreground" />}
 											>
-												Created At
-												{sortKey === "created_at" ? (
-													sortDesc ? (
-														<ChevronDown className="shrink-0 opacity-60" size={16} />
-													) : (
-														<ChevronUp className="shrink-0 opacity-60" size={16} />
-													)
-												) : null}
-											</Button>
+												Created
+											</SortableHeader>
 										</TableHead>
 									)}
-									<TableHead style={{ width: 60 }}>
+									{columnVisibility.status && (
+										<TableHead className="w-20 text-center">
+											<span className="text-sm font-medium text-muted-foreground/70">Status</span>
+										</TableHead>
+									)}
+									<TableHead className="w-10">
 										<span className="sr-only">Actions</span>
 									</TableHead>
 								</TableRow>
 							</TableHeader>
-							<TableBody>
-								{sorted.map((doc, index) => {
-									const icon = getDocumentTypeIcon(doc.document_type);
-									const title = doc.title;
-									const truncatedTitle = title.length > 30 ? `${title.slice(0, 30)}...` : title;
-									return (
-										<motion.tr
-											key={doc.id}
-											initial={{ opacity: 0, y: 10 }}
-											animate={{
-												opacity: 1,
-												y: 0,
-												transition: {
-													type: "spring",
-													stiffness: 300,
-													damping: 30,
-													delay: index * 0.03,
-												},
-											}}
-											exit={{ opacity: 0, y: -10 }}
-											className="border-b transition-colors hover:bg-muted/50"
-										>
-											<TableCell className="px-4 py-3">
-												<Checkbox
-													checked={selectedIds.has(doc.id)}
-													onCheckedChange={(v) => toggleOne(doc.id, !!v)}
-													aria-label="Select row"
-												/>
-											</TableCell>
-											{columnVisibility.title && (
-												<TableCell className="px-4 py-3">
-													<motion.div
-														className="flex items-center gap-2 font-medium"
-														whileHover={{ scale: 1.02 }}
-														transition={{ type: "spring", stiffness: 300 }}
-														style={{ display: "flex" }}
+						</Table>
+						{/* Scrollable Body */}
+						<div className="h-[50vh] overflow-auto">
+							<Table className="table-fixed w-full">
+								<TableBody>
+									{sorted.map((doc, index) => {
+										const title = doc.title;
+										const isSelected = selectedIds.has(doc.id);
+										const canSelect = isSelectable(doc);
+										return (
+											<motion.tr
+												key={doc.id}
+												initial={{ opacity: 0 }}
+												animate={{
+													opacity: 1,
+													transition: {
+														duration: 0.2,
+														delay: index * 0.02,
+													},
+												}}
+												className={`border-b border-border/40 transition-colors ${
+													isSelected ? "bg-primary/5 hover:bg-primary/8" : "hover:bg-muted/30"
+												}`}
+											>
+												<TableCell className="w-8 px-0 py-2.5 text-center">
+													<div className="flex items-center justify-center h-full">
+														<Checkbox
+															checked={isSelected}
+															onCheckedChange={(v) => canSelect && toggleOne(doc.id, !!v)}
+															disabled={!canSelect}
+															aria-label={
+																canSelect ? "Select row" : "Cannot select while processing"
+															}
+															className={`border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary ${!canSelect ? "opacity-40 cursor-not-allowed" : ""}`}
+														/>
+													</div>
+												</TableCell>
+												<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/40">
+													<button
+														type="button"
+														className="block w-full text-left text-sm text-foreground hover:text-foreground transition-colors cursor-pointer bg-transparent border-0 p-0 truncate"
+														onClick={(e) => {
+															// Ctrl (Win/Linux) or Cmd (Mac) + Click opens metadata
+															if (e.ctrlKey || e.metaKey) {
+																e.preventDefault();
+																e.stopPropagation();
+																handleViewMetadata(doc);
+															} else {
+																// Normal click opens document viewer (lazy loads content)
+																handleViewDocument(doc);
+															}
+														}}
+														onKeyDown={(e) => {
+															// Ctrl/Cmd + Enter opens metadata
+															if ((e.ctrlKey || e.metaKey) && e.key === "Enter") {
+																e.preventDefault();
+																handleViewMetadata(doc);
+															} else if (e.key === "Enter") {
+																// Enter opens document viewer
+																handleViewDocument(doc);
+															}
+														}}
 													>
+														<TruncatedText text={title} className="truncate block" />
+													</button>
+												</TableCell>
+												{columnVisibility.document_type && (
+													<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/40 overflow-hidden">
+														<DocumentTypeChip type={doc.document_type} />
+													</TableCell>
+												)}
+												{columnVisibility.created_by && (
+													<TableCell className="w-36 py-2.5 text-sm text-foreground truncate border-r border-border/40">
+														{doc.created_by_name || "—"}
+													</TableCell>
+												)}
+												{columnVisibility.created_at && (
+													<TableCell className="w-32 py-2.5 text-sm text-foreground border-r border-border/40">
 														<Tooltip>
 															<TooltipTrigger asChild>
-																<span className="flex items-center gap-2">
-																	<span className="text-muted-foreground shrink-0">{icon}</span>
-																	<span>{truncatedTitle}</span>
+																<span className="cursor-default">
+																	{formatRelativeDate(doc.created_at)}
 																</span>
 															</TooltipTrigger>
-															<TooltipContent>
-																<p>{title}</p>
+															<TooltipContent side="top">
+																{formatAbsoluteDate(doc.created_at)}
 															</TooltipContent>
 														</Tooltip>
-													</motion.div>
+													</TableCell>
+												)}
+												{columnVisibility.status && (
+													<TableCell className="w-20 py-2.5 text-center">
+														<StatusIndicator status={doc.status} />
+													</TableCell>
+												)}
+												<TableCell className="w-10 py-2.5 text-center">
+													<RowActions
+														document={doc}
+														deleteDocument={deleteDocument}
+														searchSpaceId={searchSpaceId}
+													/>
 												</TableCell>
-											)}
-											{columnVisibility.document_type && (
-												<TableCell className="px-4 py-3">
-													<div className="flex items-center gap-2">
-														<DocumentTypeChip type={doc.document_type} />
-													</div>
-												</TableCell>
-											)}
-											{columnVisibility.content && (
-												<TableCell className="px-4 py-3">
-													<div className="flex flex-col gap-2">
-														<div className="max-w-[300px] max-h-[60px] overflow-hidden text-sm text-muted-foreground">
-															{truncate(doc.content)}
-														</div>
-														<DocumentViewer
-															title={doc.title}
-															content={doc.content}
-															trigger={
-																<Button variant="ghost" size="sm" className="w-fit text-xs">
-																	{t("view_full")}
-																</Button>
-															}
-														/>
-													</div>
-												</TableCell>
-											)}
-											{columnVisibility.created_at && (
-												<TableCell className="px-4 py-3">
-													{new Date(doc.created_at).toLocaleDateString()}
-												</TableCell>
-											)}
-											<TableCell className="px-4 py-3">
-												<RowActions
-													document={doc}
-													deleteDocument={deleteDocument}
-													refreshDocuments={async () => {
-														await onRefresh();
-													}}
-													searchSpaceId={searchSpaceId as string}
-												/>
-											</TableCell>
-										</motion.tr>
-									);
-								})}
-							</TableBody>
-						</Table>
+											</motion.tr>
+										);
+									})}
+								</TableBody>
+							</Table>
+						</div>
 					</div>
-					<div className="md:hidden divide-y">
-						{sorted.map((doc) => {
-							const icon = getDocumentTypeIcon(doc.document_type);
+
+					{/* Mobile Card View - Notion Style */}
+					<div className="md:hidden divide-y divide-border/40 h-[50vh] overflow-auto">
+						{sorted.map((doc, index) => {
+							const isSelected = selectedIds.has(doc.id);
+							const canSelect = isSelectable(doc);
 							return (
-								<div key={doc.id} className="p-3">
+								<motion.div
+									key={doc.id}
+									initial={{ opacity: 0 }}
+									animate={{ opacity: 1, transition: { delay: index * 0.03 } }}
+									className={`px-4 py-3 transition-colors ${
+										isSelected ? "bg-primary/5" : "hover:bg-muted/20"
+									}`}
+								>
 									<div className="flex items-center gap-3">
 										<Checkbox
-											checked={selectedIds.has(doc.id)}
-											onCheckedChange={(v) => toggleOne(doc.id, !!v)}
-											aria-label="Select row"
+											checked={isSelected}
+											onCheckedChange={(v) => canSelect && toggleOne(doc.id, !!v)}
+											disabled={!canSelect}
+											aria-label={canSelect ? "Select row" : "Cannot select while processing"}
+											className={`border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary ${!canSelect ? "opacity-40 cursor-not-allowed" : ""}`}
 										/>
-										<div className="flex-1 min-w-0">
-											<div className="flex items-center gap-2 min-w-0">
-												<span className="text-muted-foreground shrink-0">{icon}</span>
-												<div className="font-medium truncate">{doc.title}</div>
-											</div>
-											<div className="mt-1 flex flex-wrap items-center gap-2">
+										<div className="flex-1 min-w-0 space-y-1.5">
+											<button
+												type="button"
+												className="text-left text-sm text-foreground hover:text-foreground transition-colors cursor-pointer truncate block w-full bg-transparent border-0 p-0"
+												onClick={(e) => {
+													// Ctrl (Win/Linux) or Cmd (Mac) + Click opens metadata
+													if (e.ctrlKey || e.metaKey) {
+														e.preventDefault();
+														e.stopPropagation();
+														handleViewMetadata(doc);
+													} else {
+														// Normal click opens document viewer (lazy loads content)
+														handleViewDocument(doc);
+													}
+												}}
+												onKeyDown={(e) => {
+													// Ctrl/Cmd + Enter opens metadata
+													if ((e.ctrlKey || e.metaKey) && e.key === "Enter") {
+														e.preventDefault();
+														handleViewMetadata(doc);
+													} else if (e.key === "Enter") {
+														// Enter opens document viewer
+														handleViewDocument(doc);
+													}
+												}}
+											>
+												{doc.title}
+											</button>
+											<div className="flex flex-wrap items-center gap-2">
 												<DocumentTypeChip type={doc.document_type} />
-												<span className="text-xs text-muted-foreground">
-													{new Date(doc.created_at).toLocaleDateString()}
-												</span>
+												{columnVisibility.created_by && doc.created_by_name && (
+													<span className="text-xs text-foreground">{doc.created_by_name}</span>
+												)}
+												{columnVisibility.created_at && (
+													<Tooltip>
+														<TooltipTrigger asChild>
+															<span className="text-xs text-foreground cursor-default">
+																{formatRelativeDate(doc.created_at)}
+															</span>
+														</TooltipTrigger>
+														<TooltipContent side="top">
+															{formatAbsoluteDate(doc.created_at)}
+														</TooltipContent>
+													</Tooltip>
+												)}
 											</div>
-											{columnVisibility.content && (
-												<div className="mt-2 text-sm text-muted-foreground">
-													{truncate(doc.content)}
-													<div className="mt-1">
-														<DocumentViewer
-															title={doc.title}
-															content={doc.content}
-															trigger={
-																<Button
-																	variant="ghost"
-																	size="sm"
-																	className="w-fit text-xs p-0 h-auto"
-																>
-																	{t("view_full")}
-																</Button>
-															}
-														/>
-													</div>
-												</div>
-											)}
 										</div>
-										<RowActions
-											document={doc}
-											deleteDocument={deleteDocument}
-											refreshDocuments={async () => {
-												await onRefresh();
-											}}
-											searchSpaceId={searchSpaceId as string}
-										/>
+										<div className="flex items-center gap-2">
+											{columnVisibility.status && <StatusIndicator status={doc.status} />}
+											<RowActions
+												document={doc}
+												deleteDocument={deleteDocument}
+												searchSpaceId={searchSpaceId}
+											/>
+										</div>
 									</div>
-								</div>
+								</motion.div>
 							);
 						})}
 					</div>
 				</>
 			)}
+
+			{/* Metadata Viewer - opened via Ctrl/Cmd+Click on document title */}
+			{/* Lazy loads metadata from API for real-time synced documents */}
+			<JsonMetadataViewer
+				title={metadataDoc?.title ?? ""}
+				metadata={metadataContent}
+				loading={metadataLoading}
+				open={!!metadataDoc}
+				onOpenChange={(open) => {
+					if (!open) handleCloseMetadata();
+				}}
+			/>
+
+			{/* Document Content Viewer - lazy loads content on-demand */}
+			<Dialog open={!!viewingDoc} onOpenChange={(open) => !open && handleCloseViewer()}>
+				<DialogContent className="max-w-4xl max-h-[80vh] overflow-y-auto">
+					<DialogHeader>
+						<DialogTitle>{viewingDoc?.title}</DialogTitle>
+					</DialogHeader>
+					<div className="mt-4">
+						{viewingLoading ? (
+							<div className="flex items-center justify-center py-12">
+								<Loader2 className="h-8 w-8 animate-spin text-muted-foreground" />
+							</div>
+						) : (
+							<MarkdownViewer content={viewingContent} />
+						)}
+					</div>
+				</DialogContent>
+			</Dialog>
 		</motion.div>
 	);
 }
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/PaginationControls.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/PaginationControls.tsx
@ -2,164 +2,89 @@

 import { ChevronFirst, ChevronLast, ChevronLeft, ChevronRight } from "lucide-react";
 import { motion } from "motion/react";
-import { useTranslations } from "next-intl";
 import { Button } from "@/components/ui/button";
-import { Label } from "@/components/ui/label";
-import { Pagination, PaginationContent, PaginationItem } from "@/components/ui/pagination";
-import {
-	Select,
-	SelectContent,
-	SelectItem,
-	SelectTrigger,
-	SelectValue,
-} from "@/components/ui/select";
+
+const PAGE_SIZE = 50;

 export function PaginationControls({
 	pageIndex,
-	pageSize,
 	total,
-	onPageSizeChange,
 	onFirst,
 	onPrev,
 	onNext,
 	onLast,
 	canPrev,
 	canNext,
-	id,
 }: {
 	pageIndex: number;
-	pageSize: number;
 	total: number;
-	onPageSizeChange: (size: number) => void;
 	onFirst: () => void;
 	onPrev: () => void;
 	onNext: () => void;
 	onLast: () => void;
 	canPrev: boolean;
 	canNext: boolean;
-	id: string;
 }) {
-	const t = useTranslations("documents");
-	const start = total === 0 ? 0 : pageIndex * pageSize + 1;
-	const end = Math.min((pageIndex + 1) * pageSize, total);
+	const start = pageIndex * PAGE_SIZE + 1;
+	const end = Math.min((pageIndex + 1) * PAGE_SIZE, total);

 	return (
-		<div className="flex items-center justify-between gap-8 mt-6">
-			<motion.div
-				className="flex items-center gap-3"
-				initial={{ opacity: 0, x: -20 }}
-				animate={{ opacity: 1, x: 0 }}
-				transition={{ type: "spring", stiffness: 300, damping: 30 }}
-			>
-				<Label htmlFor={id} className="max-sm:sr-only">
-					{t("rows_per_page")}
-				</Label>
-				<Select value={String(pageSize)} onValueChange={(v) => onPageSizeChange(Number(v))}>
-					<SelectTrigger id={id} className="w-fit whitespace-nowrap">
-						<SelectValue placeholder="Select number of results" />
-					</SelectTrigger>
-					<SelectContent>
-						{[5, 10, 25, 50].map((s) => (
-							<SelectItem key={s} value={String(s)}>
-								{s}
-							</SelectItem>
-						))}
-					</SelectContent>
-				</Select>
-			</motion.div>
+		<motion.div
+			className="flex items-center justify-end gap-3 py-3 px-2"
+			initial={{ opacity: 0, y: 10 }}
+			animate={{ opacity: 1, y: 0 }}
+			transition={{ type: "spring", stiffness: 300, damping: 30, delay: 0.3 }}
+		>
+			{/* Range indicator */}
+			<span className="text-sm text-muted-foreground tabular-nums">
+				{start}-{end} of {total}
+			</span>

-			<motion.div
-				className="flex grow justify-end whitespace-nowrap text-sm text-muted-foreground"
-				initial={{ opacity: 0 }}
-				animate={{ opacity: 1 }}
-				transition={{ delay: 0.2 }}
-			>
-				<p className="whitespace-nowrap text-sm text-muted-foreground" aria-live="polite">
-					<span className="text-foreground">
-						{start}-{end}
-					</span>{" "}
-					of <span className="text-foreground">{total}</span>
-				</p>
-			</motion.div>
-
-			<div>
-				<Pagination>
-					<PaginationContent>
-						<PaginationItem>
-							<motion.div
-								whileHover={{ scale: 1.05 }}
-								whileTap={{ scale: 0.95 }}
-								transition={{ type: "spring", stiffness: 400, damping: 17 }}
-							>
-								<Button
-									size="icon"
-									variant="outline"
-									className="disabled:pointer-events-none disabled:opacity-50"
-									onClick={onFirst}
-									disabled={!canPrev}
-									aria-label="Go to first page"
-								>
-									<ChevronFirst size={16} strokeWidth={2} aria-hidden="true" />
-								</Button>
-							</motion.div>
-						</PaginationItem>
-						<PaginationItem>
-							<motion.div
-								whileHover={{ scale: 1.05 }}
-								whileTap={{ scale: 0.95 }}
-								transition={{ type: "spring", stiffness: 400, damping: 17 }}
-							>
-								<Button
-									size="icon"
-									variant="outline"
-									className="disabled:pointer-events-none disabled:opacity-50"
-									onClick={onPrev}
-									disabled={!canPrev}
-									aria-label="Go to previous page"
-								>
-									<ChevronLeft size={16} strokeWidth={2} aria-hidden="true" />
-								</Button>
-							</motion.div>
-						</PaginationItem>
-						<PaginationItem>
-							<motion.div
-								whileHover={{ scale: 1.05 }}
-								whileTap={{ scale: 0.95 }}
-								transition={{ type: "spring", stiffness: 400, damping: 17 }}
-							>
-								<Button
-									size="icon"
-									variant="outline"
-									className="disabled:pointer-events-none disabled:opacity-50"
-									onClick={onNext}
-									disabled={!canNext}
-									aria-label="Go to next page"
-								>
-									<ChevronRight size={16} strokeWidth={2} aria-hidden="true" />
-								</Button>
-							</motion.div>
-						</PaginationItem>
-						<PaginationItem>
-							<motion.div
-								whileHover={{ scale: 1.05 }}
-								whileTap={{ scale: 0.95 }}
-								transition={{ type: "spring", stiffness: 400, damping: 17 }}
-							>
-								<Button
-									size="icon"
-									variant="outline"
-									className="disabled:pointer-events-none disabled:opacity-50"
-									onClick={onLast}
-									disabled={!canNext}
-									aria-label="Go to last page"
-								>
-									<ChevronLast size={16} strokeWidth={2} aria-hidden="true" />
-								</Button>
-							</motion.div>
-						</PaginationItem>
-					</PaginationContent>
-				</Pagination>
+			{/* Navigation buttons */}
+			<div className="flex items-center gap-1">
+				<Button
+					variant="ghost"
+					size="icon"
+					className="h-8 w-8 disabled:opacity-40"
+					onClick={onFirst}
+					disabled={!canPrev}
+					aria-label="Go to first page"
+				>
+					<ChevronFirst size={18} strokeWidth={2} />
+				</Button>
+				<Button
+					variant="ghost"
+					size="icon"
+					className="h-8 w-8 disabled:opacity-40"
+					onClick={onPrev}
+					disabled={!canPrev}
+					aria-label="Go to previous page"
+				>
+					<ChevronLeft size={18} strokeWidth={2} />
+				</Button>
+				<Button
+					variant="ghost"
+					size="icon"
+					className="h-8 w-8 disabled:opacity-40"
+					onClick={onNext}
+					disabled={!canNext}
+					aria-label="Go to next page"
+				>
+					<ChevronRight size={18} strokeWidth={2} />
+				</Button>
+				<Button
+					variant="ghost"
+					size="icon"
+					className="h-8 w-8 disabled:opacity-40"
+					onClick={onLast}
+					disabled={!canNext}
+					aria-label="Go to last page"
+				>
+					<ChevronLast size={18} strokeWidth={2} />
+				</Button>
 			</div>
-		</div>
+		</motion.div>
 	);
 }
+
+export { PAGE_SIZE };
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
@ -1,11 +1,9 @@
 "use client";

-import { FileText, MoreHorizontal, Pencil, Trash2 } from "lucide-react";
-import { motion } from "motion/react";
+import { MoreHorizontal, Pencil, Trash2 } from "lucide-react";
 import { useRouter } from "next/navigation";
 import { useState } from "react";
 import { toast } from "sonner";
-import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
 import {
 	AlertDialog,
 	AlertDialogAction,
@ -22,7 +20,6 @@ import {
 	DropdownMenuItem,
 	DropdownMenuTrigger,
 } from "@/components/ui/dropdown-menu";
-import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 import type { Document } from "./types";

 // Only FILE and NOTE document types can be edited
@ -34,16 +31,13 @@ const NON_DELETABLE_DOCUMENT_TYPES = ["SURFSENSE_DOCS"] as const;
 export function RowActions({
 	document,
 	deleteDocument,
-	refreshDocuments,
 	searchSpaceId,
 }: {
 	document: Document;
 	deleteDocument: (id: number) => Promise<boolean>;
-	refreshDocuments: () => Promise<void>;
 	searchSpaceId: string;
 }) {
 	const [isDeleteOpen, setIsDeleteOpen] = useState(false);
-	const [isMetadataOpen, setIsMetadataOpen] = useState(false);
 	const [isDeleting, setIsDeleting] = useState(false);
 	const router = useRouter();

@ -51,20 +45,37 @@ export function RowActions({
 		document.document_type as (typeof EDITABLE_DOCUMENT_TYPES)[number]
 	);

-	const isDeletable = !NON_DELETABLE_DOCUMENT_TYPES.includes(
+	// Documents in "pending" or "processing" state should show disabled delete
+	const isBeingProcessed =
+		document.status?.state === "pending" || document.status?.state === "processing";
+
+	// SURFSENSE_DOCS are system-managed and should not show delete at all
+	const shouldShowDelete = !NON_DELETABLE_DOCUMENT_TYPES.includes(
 		document.document_type as (typeof NON_DELETABLE_DOCUMENT_TYPES)[number]
 	);

+	// Edit and Delete are disabled while processing
+	const isEditDisabled = isBeingProcessed;
+	const isDeleteDisabled = isBeingProcessed;
+
 	const handleDelete = async () => {
 		setIsDeleting(true);
 		try {
 			const ok = await deleteDocument(document.id);
-			if (ok) toast.success("Document deleted successfully");
-			else toast.error("Failed to delete document");
-			await refreshDocuments();
-		} catch (error) {
+			if (!ok) toast.error("Failed to delete document");
+			// Note: Success toast is handled by the mutation atom's onSuccess callback
+			// Cache is updated optimistically by the mutation, no need to refresh
+		} catch (error: unknown) {
 			console.error("Error deleting document:", error);
-			toast.error("Failed to delete document");
+			// Check for 409 Conflict (document started processing after UI loaded)
+			const status =
+				(error as { response?: { status?: number } })?.response?.status ??
+				(error as { status?: number })?.status;
+			if (status === 409) {
+				toast.error("Document is now being processed. Please try again later.");
+			} else {
+				toast.error("Failed to delete document");
+			}
 		} finally {
 			setIsDeleting(false);
 			setIsDeleteOpen(false);
@ -76,124 +87,121 @@ export function RowActions({
 	};

 	return (
-		<div className="flex items-center justify-end gap-1">
+		<>
 			{/* Desktop Actions */}
-			<div className="hidden md:flex items-center gap-1">
-				{isEditable && (
-					<Tooltip>
-						<TooltipTrigger asChild>
-							<motion.div
-								whileHover={{ scale: 1.1 }}
-								whileTap={{ scale: 0.95 }}
-								transition={{ type: "spring", stiffness: 400, damping: 17 }}
-							>
-								<Button
-									variant="ghost"
-									size="icon"
-									className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80"
-									onClick={handleEdit}
-								>
-									<Pencil className="h-4 w-4" />
-									<span className="sr-only">Edit Document</span>
-								</Button>
-							</motion.div>
-						</TooltipTrigger>
-						<TooltipContent side="top">
-							<p>Edit Document</p>
-						</TooltipContent>
-					</Tooltip>
-				)}
-
-				<Tooltip>
-					<TooltipTrigger asChild>
-						<motion.div
-							whileHover={{ scale: 1.1 }}
-							whileTap={{ scale: 0.95 }}
-							transition={{ type: "spring", stiffness: 400, damping: 17 }}
-						>
+			<div className="hidden md:inline-flex items-center justify-center">
+				{isEditable ? (
+					// Editable documents: show 3-dot dropdown with edit + delete
+					<DropdownMenu>
+						<DropdownMenuTrigger asChild>
 							<Button
 								variant="ghost"
 								size="icon"
 								className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80"
-								onClick={() => setIsMetadataOpen(true)}
 							>
-								<FileText className="h-4 w-4" />
-								<span className="sr-only">View Metadata</span>
+								<MoreHorizontal className="h-4 w-4" />
+								<span className="sr-only">Open menu</span>
 							</Button>
-						</motion.div>
-					</TooltipTrigger>
-					<TooltipContent side="top">
-						<p>View Metadata</p>
-					</TooltipContent>
-				</Tooltip>
-
-				{isDeletable && (
-					<Tooltip>
-						<TooltipTrigger asChild>
-							<motion.div
-								whileHover={{ scale: 1.1 }}
-								whileTap={{ scale: 0.95 }}
-								transition={{ type: "spring", stiffness: 400, damping: 17 }}
+						</DropdownMenuTrigger>
+						<DropdownMenuContent align="end" className="w-40">
+							<DropdownMenuItem
+								onClick={() => !isEditDisabled && handleEdit()}
+								disabled={isEditDisabled}
+								className={
+									isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
+								}
 							>
-								<Button
-									variant="ghost"
-									size="icon"
-									className="h-8 w-8 text-muted-foreground hover:text-destructive hover:bg-destructive/10"
-									onClick={() => setIsDeleteOpen(true)}
-									disabled={isDeleting}
+								<Pencil className="mr-2 h-4 w-4" />
+								<span>Edit</span>
+							</DropdownMenuItem>
+							{shouldShowDelete && (
+								<DropdownMenuItem
+									onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
+									disabled={isDeleteDisabled}
+									className={
+										isDeleteDisabled
+											? "text-muted-foreground cursor-not-allowed opacity-50"
+											: "text-destructive focus:text-destructive"
+									}
 								>
-									<Trash2 className="h-4 w-4" />
-									<span className="sr-only">Delete</span>
-								</Button>
-							</motion.div>
-						</TooltipTrigger>
-						<TooltipContent side="top">
-							<p>Delete</p>
-						</TooltipContent>
-					</Tooltip>
+									<Trash2 className="mr-2 h-4 w-4" />
+									<span>Delete</span>
+								</DropdownMenuItem>
+							)}
+						</DropdownMenuContent>
+					</DropdownMenu>
+				) : (
+					// Non-editable documents: show only delete button directly
+					shouldShowDelete && (
+						<Button
+							variant="ghost"
+							size="icon"
+							className={`h-8 w-8 ${isDeleteDisabled ? "text-muted-foreground cursor-not-allowed" : "text-muted-foreground hover:text-destructive hover:bg-destructive/10"}`}
+							onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
+							disabled={isDeleting || isDeleteDisabled}
+						>
+							<Trash2 className="h-4 w-4" />
+							<span className="sr-only">Delete</span>
+						</Button>
+					)
 				)}
 			</div>

 			{/* Mobile Actions Dropdown */}
-			<div className="flex md:hidden">
-				<DropdownMenu>
-					<DropdownMenuTrigger asChild>
-						<Button variant="ghost" size="icon" className="h-8 w-8 text-muted-foreground">
-							<MoreHorizontal className="h-4 w-4" />
-							<span className="sr-only">Open menu</span>
-						</Button>
-					</DropdownMenuTrigger>
-					<DropdownMenuContent align="end" className="w-40">
-						{isEditable && (
-							<DropdownMenuItem onClick={handleEdit}>
+			<div className="inline-flex md:hidden items-center justify-center">
+				{isEditable ? (
+					// Editable documents: show 3-dot dropdown
+					<DropdownMenu>
+						<DropdownMenuTrigger asChild>
+							<Button variant="ghost" size="icon" className="h-8 w-8 text-muted-foreground">
+								<MoreHorizontal className="h-4 w-4" />
+								<span className="sr-only">Open menu</span>
+							</Button>
+						</DropdownMenuTrigger>
+						<DropdownMenuContent align="end" className="w-40">
+							<DropdownMenuItem
+								onClick={() => !isEditDisabled && handleEdit()}
+								disabled={isEditDisabled}
+								className={
+									isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
+								}
+							>
 								<Pencil className="mr-2 h-4 w-4" />
 								<span>Edit</span>
 							</DropdownMenuItem>
-						)}
-						<DropdownMenuItem onClick={() => setIsMetadataOpen(true)}>
-							<FileText className="mr-2 h-4 w-4" />
-							<span>Metadata</span>
-						</DropdownMenuItem>
-						{isDeletable && (
-							<DropdownMenuItem
-								onClick={() => setIsDeleteOpen(true)}
-								className="text-destructive focus:text-destructive"
-							>
-								<Trash2 className="mr-2 h-4 w-4" />
-								<span>Delete</span>
-							</DropdownMenuItem>
-						)}
-					</DropdownMenuContent>
-				</DropdownMenu>
+							{shouldShowDelete && (
+								<DropdownMenuItem
+									onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
+									disabled={isDeleteDisabled}
+									className={
+										isDeleteDisabled
+											? "text-muted-foreground cursor-not-allowed opacity-50"
+											: "text-destructive focus:text-destructive"
+									}
+								>
+									<Trash2 className="mr-2 h-4 w-4" />
+									<span>Delete</span>
+								</DropdownMenuItem>
+							)}
+						</DropdownMenuContent>
+					</DropdownMenu>
+				) : (
+					// Non-editable documents: show only delete button directly
+					shouldShowDelete && (
+						<Button
+							variant="ghost"
+							size="icon"
+							className={`h-8 w-8 ${isDeleteDisabled ? "text-muted-foreground cursor-not-allowed" : "text-muted-foreground hover:text-destructive hover:bg-destructive/10"}`}
+							onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
+							disabled={isDeleting || isDeleteDisabled}
+						>
+							<Trash2 className="h-4 w-4" />
+							<span className="sr-only">Delete</span>
+						</Button>
+					)
+				)}
 			</div>

-			<JsonMetadataViewer
-				title={document.title}
-				metadata={document.document_metadata}
-				open={isMetadataOpen}
-				onOpenChange={setIsMetadataOpen}
-			/>
-
 			<AlertDialog open={isDeleteOpen} onOpenChange={setIsDeleteOpen}>
 				<AlertDialogContent>
 					<AlertDialogHeader>
@ -214,6 +222,6 @@ export function RowActions({
 					</AlertDialogFooter>
 				</AlertDialogContent>
 			</AlertDialog>
-		</div>
+		</>
 	);
 }
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts
@ -1,18 +1,27 @@
 export type DocumentType = string;

+export type DocumentStatus = {
+	state: "ready" | "pending" | "processing" | "failed";
+	reason?: string;
+};
+
 export type Document = {
 	id: number;
 	title: string;
 	document_type: DocumentType;
-	document_metadata: any;
-	content: string;
+	// Optional: Only needed when viewing document details (lazy loaded)
+	document_metadata?: any;
+	content?: string;
 	created_at: string;
 	search_space_id: number;
+	created_by_id?: string | null;
+	created_by_name?: string | null;
+	status?: DocumentStatus;
 };

 export type ColumnVisibility = {
-	title: boolean;
 	document_type: boolean;
-	content: boolean;
+	created_by: boolean;
 	created_at: boolean;
+	status: boolean;
 };
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@ -2,22 +2,19 @@

 import { useQuery } from "@tanstack/react-query";
 import { useAtomValue } from "jotai";
-import { RefreshCw, SquarePlus, Upload } from "lucide-react";
 import { motion } from "motion/react";
-import { useParams, useRouter } from "next/navigation";
+import { useParams } from "next/navigation";
 import { useTranslations } from "next-intl";
-import { useCallback, useEffect, useId, useMemo, useState } from "react";
+import { useCallback, useEffect, useMemo, useState } from "react";
 import { toast } from "sonner";
 import { deleteDocumentMutationAtom } from "@/atoms/documents/document-mutation.atoms";
-import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms";
-import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
-import { Button } from "@/components/ui/button";
 import type { DocumentTypeEnum } from "@/contracts/types/document.types";
+import { useDocuments } from "@/hooks/use-documents";
 import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { cacheKeys } from "@/lib/query-client/cache-keys";
 import { DocumentsFilters } from "./components/DocumentsFilters";
 import { DocumentsTableShell, type SortKey } from "./components/DocumentsTableShell";
-import { PaginationControls } from "./components/PaginationControls";
+import { PAGE_SIZE, PaginationControls } from "./components/PaginationControls";
 import type { ColumnVisibility } from "./components/types";

 function useDebounced<T>(value: T, delay = 250) {
@ -31,70 +28,48 @@ function useDebounced<T>(value: T, delay = 250) {

 export default function DocumentsTable() {
 	const t = useTranslations("documents");
-	const id = useId();
 	const params = useParams();
-	const router = useRouter();
 	const searchSpaceId = Number(params.search_space_id);
-	const { openDialog: openUploadDialog } = useDocumentUploadDialog();
-
-	const handleNewNote = useCallback(() => {
-		router.push(`/dashboard/${searchSpaceId}/editor/new`);
-	}, [router, searchSpaceId]);

 	const [search, setSearch] = useState("");
 	const debouncedSearch = useDebounced(search, 250);
 	const [activeTypes, setActiveTypes] = useState<DocumentTypeEnum[]>([]);
 	const [columnVisibility, setColumnVisibility] = useState<ColumnVisibility>({
-		title: true,
 		document_type: true,
-		content: true,
+		created_by: true,
 		created_at: true,
+		status: true,
 	});
 	const [pageIndex, setPageIndex] = useState(0);
-	const [pageSize, setPageSize] = useState(50);
-	const [sortKey, setSortKey] = useState<SortKey>("title");
-	const [sortDesc, setSortDesc] = useState(false);
+	const [sortKey, setSortKey] = useState<SortKey>("created_at");
+	const [sortDesc, setSortDesc] = useState(true);
 	const [selectedIds, setSelectedIds] = useState<Set<number>>(new Set());
-	const { data: rawTypeCounts } = useAtomValue(documentTypeCountsAtom);
 	const { mutateAsync: deleteDocumentMutation } = useAtomValue(deleteDocumentMutationAtom);

-	// Build query parameters for fetching documents
-	const queryParams = useMemo(
-		() => ({
-			search_space_id: searchSpaceId,
-			page: pageIndex,
-			page_size: pageSize,
-			...(activeTypes.length > 0 && { document_types: activeTypes }),
-		}),
-		[searchSpaceId, pageIndex, pageSize, activeTypes]
-	);
+	// REAL-TIME: Use Electric SQL hook for live document updates (when not searching)
+	const {
+		documents: realtimeDocuments,
+		typeCounts: realtimeTypeCounts,
+		loading: realtimeLoading,
+		error: realtimeError,
+	} = useDocuments(searchSpaceId, activeTypes);

-	// Build search query parameters
+	// Check if we're in search mode
+	const isSearchMode = !!debouncedSearch.trim();
+
+	// Build search query parameters (only used when searching)
 	const searchQueryParams = useMemo(
 		() => ({
 			search_space_id: searchSpaceId,
 			page: pageIndex,
-			page_size: pageSize,
+			page_size: PAGE_SIZE,
 			title: debouncedSearch.trim(),
 			...(activeTypes.length > 0 && { document_types: activeTypes }),
 		}),
-		[searchSpaceId, pageIndex, pageSize, activeTypes, debouncedSearch]
+		[searchSpaceId, pageIndex, activeTypes, debouncedSearch]
 	);

-	// Use query for fetching documents
-	const {
-		data: documentsResponse,
-		isLoading: isDocumentsLoading,
-		refetch: refetchDocuments,
-		error: documentsError,
-	} = useQuery({
-		queryKey: cacheKeys.documents.globalQueryParams(queryParams),
-		queryFn: () => documentsApiService.getDocuments({ queryParams }),
-		staleTime: 3 * 60 * 1000, // 3 minutes
-		enabled: !!searchSpaceId && !debouncedSearch.trim(),
-	});
-
-	// Use query for searching documents
+	// API search query (only enabled when searching - Electric doesn't do full-text search)
 	const {
 		data: searchResponse,
 		isLoading: isSearchLoading,
@ -103,134 +78,135 @@ export default function DocumentsTable() {
 	} = useQuery({
 		queryKey: cacheKeys.documents.globalQueryParams(searchQueryParams),
 		queryFn: () => documentsApiService.searchDocuments({ queryParams: searchQueryParams }),
-		staleTime: 3 * 60 * 1000, // 3 minutes
-		enabled: !!searchSpaceId && !!debouncedSearch.trim(),
+		staleTime: 30 * 1000, // 30 seconds for search (shorter since it's on-demand)
+		enabled: !!searchSpaceId && isSearchMode,
 	});

-	// Determine if we should show SurfSense docs (when no type filter or SURFSENSE_DOCS is selected)
-	const showSurfsenseDocs =
-		activeTypes.length === 0 || activeTypes.includes("SURFSENSE_DOCS" as DocumentTypeEnum);
+	// Client-side sorting for real-time documents
+	const sortedRealtimeDocuments = useMemo(() => {
+		const docs = [...realtimeDocuments];
+		docs.sort((a, b) => {
+			const av = a[sortKey] ?? "";
+			const bv = b[sortKey] ?? "";
+			let cmp: number;
+			if (sortKey === "created_at") {
+				cmp = new Date(av as string).getTime() - new Date(bv as string).getTime();
+			} else {
+				cmp = String(av).localeCompare(String(bv));
+			}
+			return sortDesc ? -cmp : cmp;
+		});
+		return docs;
+	}, [realtimeDocuments, sortKey, sortDesc]);

-	// Use query for fetching SurfSense docs
-	const {
-		data: surfsenseDocsResponse,
-		isLoading: isSurfsenseDocsLoading,
-		refetch: refetchSurfsenseDocs,
-	} = useQuery({
-		queryKey: ["surfsense-docs", debouncedSearch, pageIndex, pageSize],
-		queryFn: () =>
-			documentsApiService.getSurfsenseDocs({
-				queryParams: {
-					page: pageIndex,
-					page_size: pageSize,
-					title: debouncedSearch.trim() || undefined,
-				},
-			}),
-		staleTime: 3 * 60 * 1000, // 3 minutes
-		enabled: showSurfsenseDocs,
-	});
+	// Client-side pagination for real-time documents
+	const paginatedRealtimeDocuments = useMemo(() => {
+		const start = pageIndex * PAGE_SIZE;
+		const end = start + PAGE_SIZE;
+		return sortedRealtimeDocuments.slice(start, end);
+	}, [sortedRealtimeDocuments, pageIndex]);

-	// Transform SurfSense docs to match the Document type
-	const surfsenseDocsAsDocuments: Document[] = useMemo(() => {
-		if (!surfsenseDocsResponse?.items) return [];
-		return surfsenseDocsResponse.items.map((doc) => ({
-			id: doc.id,
-			title: doc.title,
-			document_type: "SURFSENSE_DOCS",
-			document_metadata: { source: doc.source },
-			content: doc.content,
-			created_at: new Date().toISOString(),
-			search_space_id: -1, // Special value for global docs
-		}));
-	}, [surfsenseDocsResponse]);
+	// Determine what to display based on search mode
+	const displayDocs = isSearchMode
+		? (searchResponse?.items || []).map((item) => ({
+				id: item.id,
+				search_space_id: item.search_space_id,
+				document_type: item.document_type,
+				title: item.title,
+				created_by_id: item.created_by_id ?? null,
+				created_by_name: item.created_by_name ?? null,
+				created_at: item.created_at,
+				status: (
+					item as {
+						status?: { state: "ready" | "pending" | "processing" | "failed"; reason?: string };
+					}
+				).status ?? { state: "ready" as const },
+			}))
+		: paginatedRealtimeDocuments;

-	// Merge type counts with SURFSENSE_DOCS count
-	const typeCounts = useMemo(() => {
-		const counts = { ...(rawTypeCounts || {}) };
-		if (surfsenseDocsResponse?.total) {
-			counts.SURFSENSE_DOCS = surfsenseDocsResponse.total;
-		}
-		return counts;
-	}, [rawTypeCounts, surfsenseDocsResponse?.total]);
+	const displayTotal = isSearchMode ? searchResponse?.total || 0 : sortedRealtimeDocuments.length;

-	// Extract documents and total based on search state
-	const documents = debouncedSearch.trim()
-		? searchResponse?.items || []
-		: documentsResponse?.items || [];
-	const total = debouncedSearch.trim() ? searchResponse?.total || 0 : documentsResponse?.total || 0;
+	const loading = isSearchMode ? isSearchLoading : realtimeLoading;
+	const error = isSearchMode ? searchError : realtimeError;

-	const loading = debouncedSearch.trim() ? isSearchLoading : isDocumentsLoading;
-	const error = debouncedSearch.trim() ? searchError : documentsError;
-
-	// Display results directly
-	const displayDocs = documents;
-	const displayTotal = total;
-	const pageStart = pageIndex * pageSize;
-	const pageEnd = Math.min(pageStart + pageSize, displayTotal);
+	const pageEnd = Math.min((pageIndex + 1) * PAGE_SIZE, displayTotal);

 	const onToggleType = (type: DocumentTypeEnum, checked: boolean) => {
-		setActiveTypes((prev) => (checked ? [...prev, type] : prev.filter((t) => t !== type)));
+		setActiveTypes((prev) => {
+			if (checked) {
+				return prev.includes(type) ? prev : [...prev, type];
+			} else {
+				return prev.filter((t) => t !== type);
+			}
+		});
 		setPageIndex(0);
 	};

-	const onToggleColumn = (id: keyof ColumnVisibility, checked: boolean) => {
-		setColumnVisibility((prev) => ({ ...prev, [id]: checked }));
-	};
-
-	const [isRefreshing, setIsRefreshing] = useState(false);
-
-	const refreshCurrentView = useCallback(async () => {
-		if (isRefreshing) return;
-		setIsRefreshing(true);
-		try {
-			if (debouncedSearch.trim()) {
-				await refetchSearch();
-			} else {
-				await refetchDocuments();
-			}
-			toast.success(t("refresh_success") || "Documents refreshed");
-		} finally {
-			setIsRefreshing(false);
-		}
-	}, [debouncedSearch, refetchSearch, refetchDocuments, t, isRefreshing]);
-
-	// Create a delete function for single document deletion
-	const deleteDocument = useCallback(
-		async (id: number) => {
-			try {
-				await deleteDocumentMutation({ id });
-				return true;
-			} catch (error) {
-				console.error("Failed to delete document:", error);
-				return false;
-			}
-		},
-		[deleteDocumentMutation]
-	);
-
 	const onBulkDelete = async () => {
 		if (selectedIds.size === 0) {
 			toast.error(t("no_rows_selected"));
 			return;
 		}
+
+		// Filter out pending/processing documents - they cannot be deleted
+		// For real-time mode, use sortedRealtimeDocuments (which has status)
+		// For search mode, use searchResponse items (need to safely access status)
+		const allDocs = isSearchMode
+			? (searchResponse?.items || []).map((item) => ({
+					id: item.id,
+					status: (item as { status?: { state: string } }).status,
+				}))
+			: sortedRealtimeDocuments.map((doc) => ({ id: doc.id, status: doc.status }));
+
+		const selectedDocs = allDocs.filter((doc) => selectedIds.has(doc.id));
+		const deletableIds = selectedDocs
+			.filter((doc) => doc.status?.state !== "pending" && doc.status?.state !== "processing")
+			.map((doc) => doc.id);
+		const inProgressCount = selectedIds.size - deletableIds.length;
+
+		if (inProgressCount > 0) {
+			toast.warning(
+				`${inProgressCount} document(s) are pending or processing and cannot be deleted.`
+			);
+		}
+
+		if (deletableIds.length === 0) {
+			return;
+		}
+
 		try {
 			// Delete documents one by one using the mutation
+			// Track 409 conflicts separately (document started processing after UI loaded)
+			let conflictCount = 0;
 			const results = await Promise.all(
-				Array.from(selectedIds).map(async (id) => {
+				deletableIds.map(async (id) => {
 					try {
 						await deleteDocumentMutation({ id });
 						return true;
-					} catch {
+					} catch (error: unknown) {
+						const status =
+							(error as { response?: { status?: number } })?.response?.status ??
+							(error as { status?: number })?.status;
+						if (status === 409) conflictCount++;
 						return false;
 					}
 				})
 			);
 			const okCount = results.filter((r) => r === true).length;
-			if (okCount === selectedIds.size)
+			if (okCount === deletableIds.length) {
 				toast.success(t("delete_success_count", { count: okCount }));
-			else toast.error(t("delete_partial_failed"));
-			// Refetch the current page with appropriate method
-			await refreshCurrentView();
+			} else if (conflictCount > 0) {
+				toast.error(`${conflictCount} document(s) started processing. Please try again later.`);
+			} else {
+				toast.error(t("delete_partial_failed"));
+			}
+
+			// If in search mode, refetch search results to reflect deletion
+			if (isSearchMode) {
+				await refetchSearch();
+			}
+			// Real-time mode: Electric will sync the deletion automatically
+
 			setSelectedIds(new Set());
 		} catch (e) {
 			console.error(e);
@ -238,10 +214,47 @@ export default function DocumentsTable() {
 		}
 	};

+	// Single document delete handler for RowActions
+	const handleDeleteDocument = useCallback(
+		async (id: number): Promise<boolean> => {
+			try {
+				await deleteDocumentMutation({ id });
+				toast.success(t("delete_success") || "Document deleted");
+				// If in search mode, refetch search results to reflect deletion
+				if (isSearchMode) {
+					await refetchSearch();
+				}
+				// Real-time mode: Electric will sync the deletion automatically
+				return true;
+			} catch (e) {
+				console.error("Error deleting document:", e);
+				return false;
+			}
+		},
+		[deleteDocumentMutation, isSearchMode, refetchSearch, t]
+	);
+
+	const handleSortChange = useCallback((key: SortKey) => {
+		setSortKey((currentKey) => {
+			if (currentKey === key) {
+				setSortDesc((v) => !v);
+				return currentKey;
+			}
+			setSortDesc(false);
+			return key;
+		});
+	}, []);
+
+	// Reset page when search changes (type filter already resets via onToggleType)
+	// biome-ignore lint/correctness/useExhaustiveDependencies: Intentionally reset page on search change
+	useEffect(() => {
+		setPageIndex(0);
+	}, [debouncedSearch]);
+
 	useEffect(() => {
 		const mq = window.matchMedia("(max-width: 768px)");
 		const apply = (isSmall: boolean) => {
-			setColumnVisibility((prev) => ({ ...prev, content: !isSmall, created_at: !isSmall }));
+			setColumnVisibility((prev) => ({ ...prev, created_by: !isSmall, created_at: !isSmall }));
 		};
 		apply(mq.matches);
 		const onChange = (e: MediaQueryListEvent) => apply(e.matches);
@ -254,81 +267,44 @@ export default function DocumentsTable() {
 			initial={{ opacity: 0, y: 20 }}
 			animate={{ opacity: 1, y: 0 }}
 			transition={{ duration: 0.3 }}
-			className="w-full px-6 py-4 space-y-6 min-h-[calc(100vh-64px)]"
+			className="w-full max-w-7xl mx-auto px-6 pt-17 pb-6 space-y-6 min-h-[calc(100vh-64px)]"
 		>
-			<motion.div
-				className="flex items-center justify-between"
-				initial={{ opacity: 0, y: 10 }}
-				animate={{ opacity: 1, y: 0 }}
-				transition={{ delay: 0.1 }}
-			>
-				<div>
-					<h2 className="text-xl md:text-2xl font-bold tracking-tight">{t("title")}</h2>
-					<p className="text-xs md:text-sm text-muted-foreground">{t("subtitle")}</p>
-				</div>
-				<div className="flex items-center gap-2">
-					<Button onClick={openUploadDialog} variant="default" size="sm">
-						<Upload className="w-4 h-4 mr-2" />
-						{t("upload_documents")}
-					</Button>
-					<Button onClick={handleNewNote} variant="outline" size="sm">
-						<SquarePlus className="w-4 h-4 mr-2" />
-						{t("create_shared_note")}
-					</Button>
-					<Button onClick={refreshCurrentView} variant="outline" size="sm" disabled={isRefreshing}>
-						<RefreshCw className={`w-4 h-4 mr-2 ${isRefreshing ? "animate-spin" : ""}`} />
-						{t("refresh")}
-					</Button>
-				</div>
-			</motion.div>
-
+			{/* Filters - use real-time type counts */}
 			<DocumentsFilters
-				typeCounts={rawTypeCounts ?? {}}
+				typeCounts={realtimeTypeCounts}
 				selectedIds={selectedIds}
 				onSearch={setSearch}
 				searchValue={search}
 				onBulkDelete={onBulkDelete}
 				onToggleType={onToggleType}
 				activeTypes={activeTypes}
-				columnVisibility={columnVisibility}
-				onToggleColumn={onToggleColumn}
 			/>

+			{/* Table */}
 			<DocumentsTableShell
 				documents={displayDocs}
 				loading={!!loading}
 				error={!!error}
-				onRefresh={refreshCurrentView}
 				selectedIds={selectedIds}
 				setSelectedIds={setSelectedIds}
 				columnVisibility={columnVisibility}
-				deleteDocument={deleteDocument}
 				sortKey={sortKey}
 				sortDesc={sortDesc}
-				onSortChange={(key) => {
-					if (sortKey === key) setSortDesc((v) => !v);
-					else {
-						setSortKey(key);
-						setSortDesc(false);
-					}
-				}}
+				onSortChange={handleSortChange}
+				deleteDocument={handleDeleteDocument}
+				searchSpaceId={String(searchSpaceId)}
 			/>

+			{/* Pagination */}
 			<PaginationControls
 				pageIndex={pageIndex}
-				pageSize={pageSize}
 				total={displayTotal}
-				onPageSizeChange={(s) => {
-					setPageSize(s);
-					setPageIndex(0);
-				}}
 				onFirst={() => setPageIndex(0)}
 				onPrev={() => setPageIndex((i) => Math.max(0, i - 1))}
 				onNext={() => setPageIndex((i) => (pageEnd < displayTotal ? i + 1 : i))}
-				onLast={() => setPageIndex(Math.max(0, Math.ceil(displayTotal / pageSize) - 1))}
+				onLast={() => setPageIndex(Math.max(0, Math.ceil(displayTotal / PAGE_SIZE) - 1))}
 				canPrev={pageIndex > 0}
 				canNext={pageEnd < displayTotal}
-				id={id}
 			/>
 		</motion.div>
 	);
--- a/surfsense_web/atoms/connector-dialog/connector-dialog.atoms.ts
+++ b/surfsense_web/atoms/connector-dialog/connector-dialog.atoms.ts
@ -0,0 +1,4 @@
+import { atom } from "jotai";
+
+// Atom to control the connector dialog open state from anywhere in the app
+export const connectorDialogOpenAtom = atom(false);
--- a/surfsense_web/atoms/connectors/connector-mutation.atoms.ts
+++ b/surfsense_web/atoms/connectors/connector-mutation.atoms.ts
@ -1,5 +1,4 @@
 import { atomWithMutation } from "jotai-tanstack-query";
-import { toast } from "sonner";
 import type {
 	CreateConnectorRequest,
 	DeleteConnectorRequest,
@ -17,15 +16,16 @@ export const createConnectorMutationAtom = atomWithMutation((get) => {
 	const searchSpaceId = get(activeSearchSpaceIdAtom);

 	return {
-		mutationKey: cacheKeys.connectors.all(searchSpaceId!),
+		mutationKey: cacheKeys.connectors.all(searchSpaceId ?? ""),
 		enabled: !!searchSpaceId,
 		mutationFn: async (request: CreateConnectorRequest) => {
 			return connectorsApiService.createConnector(request);
 		},

 		onSuccess: () => {
+			if (!searchSpaceId) return;
 			queryClient.invalidateQueries({
-				queryKey: cacheKeys.connectors.all(searchSpaceId!),
+				queryKey: cacheKeys.connectors.all(searchSpaceId),
 			});
 		},
 	};
@ -35,15 +35,16 @@ export const updateConnectorMutationAtom = atomWithMutation((get) => {
 	const searchSpaceId = get(activeSearchSpaceIdAtom);

 	return {
-		mutationKey: cacheKeys.connectors.all(searchSpaceId!),
+		mutationKey: cacheKeys.connectors.all(searchSpaceId ?? ""),
 		enabled: !!searchSpaceId,
 		mutationFn: async (request: UpdateConnectorRequest) => {
 			return connectorsApiService.updateConnector(request);
 		},

 		onSuccess: (_, request: UpdateConnectorRequest) => {
+			if (!searchSpaceId) return;
 			queryClient.invalidateQueries({
-				queryKey: cacheKeys.connectors.all(searchSpaceId!),
+				queryKey: cacheKeys.connectors.all(searchSpaceId),
 			});
 			queryClient.invalidateQueries({
 				queryKey: cacheKeys.connectors.byId(String(request.id)),
@ -56,15 +57,16 @@ export const deleteConnectorMutationAtom = atomWithMutation((get) => {
 	const searchSpaceId = get(activeSearchSpaceIdAtom);

 	return {
-		mutationKey: cacheKeys.connectors.all(searchSpaceId!),
+		mutationKey: cacheKeys.connectors.all(searchSpaceId ?? ""),
 		enabled: !!searchSpaceId,
 		mutationFn: async (request: DeleteConnectorRequest) => {
 			return connectorsApiService.deleteConnector(request);
 		},

 		onSuccess: (_, request: DeleteConnectorRequest) => {
+			if (!searchSpaceId) return;
 			queryClient.setQueryData(
-				cacheKeys.connectors.all(searchSpaceId!),
+				cacheKeys.connectors.all(searchSpaceId),
 				(oldData: GetConnectorsResponse | undefined) => {
 					if (!oldData) return oldData;
 					return oldData.filter((connector) => connector.id !== request.id);
@ -88,9 +90,9 @@ export const indexConnectorMutationAtom = atomWithMutation((get) => {
 		},

 		onSuccess: (response: IndexConnectorResponse) => {
-			toast.success(response.message);
+			if (!searchSpaceId) return;
 			queryClient.invalidateQueries({
-				queryKey: cacheKeys.connectors.all(searchSpaceId!),
+				queryKey: cacheKeys.connectors.all(searchSpaceId),
 			});
 			queryClient.invalidateQueries({
 				queryKey: cacheKeys.connectors.byId(String(response.connector_id)),
--- a/surfsense_web/atoms/documents/document-mutation.atoms.ts
+++ b/surfsense_web/atoms/documents/document-mutation.atoms.ts
@ -48,7 +48,7 @@ export const uploadDocumentMutationAtom = atomWithMutation((get) => {
 		},

 		onSuccess: () => {
-			toast.success("Files uploaded for processing");
+			// Note: Toast notification is handled by the caller (DocumentUploadTab) to use i18n
 			// Invalidate logs summary to show new processing tasks immediately on documents page
 			queryClient.invalidateQueries({
 				queryKey: cacheKeys.logs.summary(searchSpaceId ?? undefined),
@ -95,7 +95,7 @@ export const deleteDocumentMutationAtom = atomWithMutation((get) => {
 		},

 		onSuccess: (_, request: DeleteDocumentRequest) => {
-			toast.success("Document deleted successfully");
+			// Note: Toast is handled by the caller (page.tsx onBulkDelete) to show count info
 			queryClient.setQueryData(
 				cacheKeys.documents.globalQueryParams(documentsQueryParams),
 				(oldData: GetDocumentsResponse | undefined) => {
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@ -19,7 +19,7 @@ import { Spinner } from "@/components/ui/spinner";
 import { Tabs, TabsContent } from "@/components/ui/tabs";
 import type { SearchSourceConnector } from "@/contracts/types/connector.types";
 import { useConnectorsElectric } from "@/hooks/use-connectors-electric";
-import { useDocumentsElectric } from "@/hooks/use-documents-electric";
+import { useDocuments } from "@/hooks/use-documents";
 import { useInbox } from "@/hooks/use-inbox";
 import { cn } from "@/lib/utils";
 import { ConnectorDialogHeader } from "./connector-popup/components/connector-dialog-header";
@ -37,7 +37,7 @@ import { AllConnectorsTab } from "./connector-popup/tabs/all-connectors-tab";
 import { ConnectorAccountsListView } from "./connector-popup/views/connector-accounts-list-view";
 import { YouTubeCrawlerView } from "./connector-popup/views/youtube-crawler-view";

-export const ConnectorIndicator: FC = () => {
+export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger = false }) => {
 	const searchSpaceId = useAtomValue(activeSearchSpaceIdAtom);
 	const searchParams = useSearchParams();
 	const { data: currentUser } = useAtomValue(currentUserAtom);
@ -63,7 +63,9 @@ export const ConnectorIndicator: FC = () => {
 	const llmConfigLoading = preferencesLoading || globalConfigsLoading;

 	// Fetch document type counts using Electric SQL + PGlite for real-time updates
-	const { documentTypeCounts, loading: documentTypesLoading } = useDocumentsElectric(searchSpaceId);
+	const { typeCounts: documentTypeCounts, loading: documentTypesLoading } = useDocuments(
+		searchSpaceId ? Number(searchSpaceId) : null
+	);

 	// Fetch notifications to detect indexing failures
 	const { inboxItems = [] } = useInbox(
@ -186,34 +188,38 @@ export const ConnectorIndicator: FC = () => {

 	return (
 		<Dialog open={isOpen} onOpenChange={handleOpenChange}>
-			<TooltipIconButton
-				data-joyride="connector-icon"
-				tooltip={hasConnectors ? `Manage ${activeConnectorsCount} connectors` : "Connect your data"}
-				side="bottom"
-				className={cn(
-					"size-[34px] rounded-full p-1 flex items-center justify-center transition-colors relative",
-					"hover:bg-muted-foreground/15 dark:hover:bg-muted-foreground/30",
-					"outline-none focus:outline-none focus-visible:outline-none font-semibold text-xs",
-					"border-0 ring-0 focus:ring-0 shadow-none focus:shadow-none"
-				)}
-				aria-label={
-					hasConnectors ? `View ${activeConnectorsCount} connectors` : "Add your first connector"
-				}
-				onClick={() => handleOpenChange(true)}
-			>
-				{isLoading ? (
-					<Spinner size="sm" />
-				) : (
-					<>
-						<Cable className="size-4 stroke-[1.5px]" />
-						{activeConnectorsCount > 0 && (
-							<span className="absolute -top-0.5 right-0 flex items-center justify-center min-w-[16px] h-4 px-1 text-[10px] font-medium rounded-full bg-primary text-primary-foreground shadow-sm">
-								{activeConnectorsCount > 99 ? "99+" : activeConnectorsCount}
-							</span>
-						)}
-					</>
-				)}
-			</TooltipIconButton>
+			{!hideTrigger && (
+				<TooltipIconButton
+					data-joyride="connector-icon"
+					tooltip={
+						hasConnectors ? `Manage ${activeConnectorsCount} connectors` : "Connect your data"
+					}
+					side="bottom"
+					className={cn(
+						"size-[34px] rounded-full p-1 flex items-center justify-center transition-colors relative",
+						"hover:bg-muted-foreground/15 dark:hover:bg-muted-foreground/30",
+						"outline-none focus:outline-none focus-visible:outline-none font-semibold text-xs",
+						"border-0 ring-0 focus:ring-0 shadow-none focus:shadow-none"
+					)}
+					aria-label={
+						hasConnectors ? `View ${activeConnectorsCount} connectors` : "Add your first connector"
+					}
+					onClick={() => handleOpenChange(true)}
+				>
+					{isLoading ? (
+						<Spinner size="sm" />
+					) : (
+						<>
+							<Cable className="size-4 stroke-[1.5px]" />
+							{activeConnectorsCount > 0 && (
+								<span className="absolute -top-0.5 right-0 flex items-center justify-center min-w-[16px] h-4 px-1 text-[10px] font-medium rounded-full bg-primary text-primary-foreground shadow-sm">
+									{activeConnectorsCount > 99 ? "99+" : activeConnectorsCount}
+								</span>
+							)}
+						</>
+					)}
+				</TooltipIconButton>
+			)}

 			<DialogContent className="max-w-3xl w-[95vw] sm:w-full h-[75vh] sm:h-[85vh] flex flex-col p-0 gap-0 overflow-hidden border border-border bg-muted text-foreground focus:outline-none focus:ring-0 focus-visible:outline-none focus-visible:ring-0 [&>button]:right-4 sm:[&>button]:right-12 [&>button]:top-6 sm:[&>button]:top-10 [&>button]:opacity-80 hover:[&>button]:opacity-100 [&>button_svg]:size-5">
 				<DialogTitle className="sr-only">Manage Connectors</DialogTitle>
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@ -1,8 +1,9 @@
 import { format } from "date-fns";
-import { useAtomValue } from "jotai";
+import { useAtom, useAtomValue } from "jotai";
 import { useRouter, useSearchParams } from "next/navigation";
 import { useCallback, useEffect, useRef, useState } from "react";
 import { toast } from "sonner";
+import { connectorDialogOpenAtom } from "@/atoms/connector-dialog/connector-dialog.atoms";
 import {
 	createConnectorMutationAtom,
 	deleteConnectorMutationAtom,
@ -49,7 +50,8 @@ export const useConnectorDialog = () => {
 	const { mutateAsync: deleteConnector } = useAtomValue(deleteConnectorMutationAtom);
 	const { mutateAsync: createConnector } = useAtomValue(createConnectorMutationAtom);

-	const [isOpen, setIsOpen] = useState(false);
+	// Use global atom for dialog open state so it can be controlled from anywhere
+	const [isOpen, setIsOpen] = useAtom(connectorDialogOpenAtom);
 	const [activeTab, setActiveTab] = useState("all");
 	const [connectingId, setConnectingId] = useState<string | null>(null);
 	const [isScrolled, setIsScrolled] = useState(false);
@ -293,6 +295,7 @@ export const useConnectorDialog = () => {
 		connectingConnectorType,
 		viewingAccountsType,
 		viewingMCPList,
+		setIsOpen,
 	]);

 	// Detect OAuth success / Failure and transition to config view
@ -345,9 +348,10 @@ export const useConnectorDialog = () => {

 						// If we found the connector, find the matching OAuth/Composio connector by type
 						if (newConnector) {
+							const connectorType = newConnector.connector_type;
 							oauthConnector =
-								OAUTH_CONNECTORS.find((c) => c.connectorType === newConnector!.connector_type) ||
-								COMPOSIO_CONNECTORS.find((c) => c.connectorType === newConnector!.connector_type);
+								OAUTH_CONNECTORS.find((c) => c.connectorType === connectorType) ||
+								COMPOSIO_CONNECTORS.find((c) => c.connectorType === connectorType);
 						}
 					}

@ -358,8 +362,9 @@ export const useConnectorDialog = () => {
 							COMPOSIO_CONNECTORS.find((c) => c.id === params.connector);

 						if (oauthConnector) {
+							const oauthConnectorType = oauthConnector.connectorType;
 							newConnector = result.data.find(
-								(c: SearchSourceConnector) => c.connector_type === oauthConnector!.connectorType
+								(c: SearchSourceConnector) => c.connector_type === oauthConnectorType
 							);
 						}
 					}
@ -399,7 +404,7 @@ export const useConnectorDialog = () => {
 			// Invalid query params - log but don't crash
 			console.warn("Invalid connector popup query params in OAuth success handler:", error);
 		}
-	}, [searchParams, searchSpaceId, refetchAllConnectors]);
+	}, [searchParams, searchSpaceId, refetchAllConnectors, setIsOpen]);

 	// Handle OAuth connection
 	const handleConnectOAuth = useCallback(
@ -514,7 +519,7 @@ export const useConnectorDialog = () => {
 		} finally {
 			setConnectingId(null);
 		}
-	}, [searchSpaceId, createConnector, refetchAllConnectors]);
+	}, [searchSpaceId, createConnector, refetchAllConnectors, setIsOpen]);

 	// Handle connecting non-OAuth connectors (like Tavily API)
 	const handleConnectNonOAuth = useCallback(
@ -677,12 +682,8 @@ export const useConnectorDialog = () => {
 								const successMessage =
 									currentConnectorType === "MCP_CONNECTOR"
 										? `${connector.name} added successfully`
-										: `${connectorTitle} connected and indexing started!`;
-								toast.success(successMessage, {
-									description: periodicEnabledForIndexing
-										? `Periodic sync enabled every ${getFrequencyLabel(frequencyMinutesForIndexing)}.`
-										: "You can continue working while we sync your data.",
-								});
+										: `${connectorTitle} connected and syncing started!`;
+								toast.success(successMessage);

 								const url = new URL(window.location.href);
 								url.searchParams.delete("modal");
@ -782,7 +783,6 @@ export const useConnectorDialog = () => {
 			updateConnector,
 			indexConnector,
 			router,
-			getFrequencyLabel,
 		]
 	);

@ -1010,11 +1010,7 @@ export const useConnectorDialog = () => {
 					);
 				}

-				toast.success(`${indexingConfig.connectorTitle} indexing started`, {
-					description: periodicEnabled
-						? `Periodic sync enabled every ${getFrequencyLabel(frequencyMinutes)}.`
-						: "You can continue working while we sync your data.",
-				});
+				toast.success(`${indexingConfig.connectorTitle} indexing started`);

 				// Update URL - the effect will handle closing the modal and clearing state
 				const url = new URL(window.location.href);
@ -1045,7 +1041,6 @@ export const useConnectorDialog = () => {
 			updateConnector,
 			periodicEnabled,
 			frequencyMinutes,
-			getFrequencyLabel,
 			router,
 			indexingConnectorConfig,
 		]
@ -1426,9 +1421,7 @@ export const useConnectorDialog = () => {
 						end_date: endDateStr,
 					},
 				});
-				toast.success("Indexing started", {
-					description: "You can continue working while we sync your data.",
-				});
+				toast.success("Indexing started");

 				// Invalidate queries to refresh data
 				queryClient.invalidateQueries({
@ -1445,7 +1438,7 @@ export const useConnectorDialog = () => {
 				}
 			}
 		},
-		[searchSpaceId, indexConnector, queryClient]
+		[searchSpaceId, indexConnector]
 	);

 	// Handle going back from edit view
@ -1527,7 +1520,7 @@ export const useConnectorDialog = () => {
 				}
 			}
 		},
-		[activeTab, isStartingIndexing, isDisconnecting, isSaving, isCreatingConnector]
+		[activeTab, isStartingIndexing, isDisconnecting, isSaving, isCreatingConnector, setIsOpen]
 	);

 	// Handle tab change
--- a/surfsense_web/components/json-metadata-viewer.tsx
+++ b/surfsense_web/components/json-metadata-viewer.tsx
@ -1,4 +1,4 @@
-import { FileJson } from "lucide-react";
+import { FileJson, Loader2 } from "lucide-react";
 import React from "react";
 import { defaultStyles, JsonView } from "react-json-view-lite";
 import { Button } from "@/components/ui/button";
@ -17,6 +17,7 @@ interface JsonMetadataViewerProps {
 	trigger?: React.ReactNode;
 	open?: boolean;
 	onOpenChange?: (open: boolean) => void;
+	loading?: boolean;
 }

 export function JsonMetadataViewer({
@ -25,6 +26,7 @@ export function JsonMetadataViewer({
 	trigger,
 	open,
 	onOpenChange,
+	loading,
 }: JsonMetadataViewerProps) {
 	// Ensure metadata is a valid object
 	const jsonData = React.useMemo(() => {
@ -54,7 +56,13 @@ export function JsonMetadataViewer({
 						</DialogTitle>
 					</DialogHeader>
 					<div className="mt-2 sm:mt-4 p-2 sm:p-4 bg-muted/30 rounded-md text-xs sm:text-sm">
-						<JsonView data={jsonData} style={defaultStyles} />
+						{loading ? (
+							<div className="flex items-center justify-center py-12">
+								<Loader2 className="h-8 w-8 animate-spin text-muted-foreground" />
+							</div>
+						) : (
+							<JsonView data={jsonData} style={defaultStyles} />
+						)}
 					</div>
 				</DialogContent>
 			</Dialog>
--- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
+++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
@ -90,7 +90,7 @@ export function LayoutDataProvider({
 	});

 	// Fetch threads (40 total to allow up to 20 per section - shared/private)
-	const { data: threadsData } = useQuery({
+	const { data: threadsData, isPending: isLoadingThreads } = useQuery({
 		queryKey: ["threads", searchSpaceId, { limit: 40 }],
 		queryFn: () => fetchThreads(Number(searchSpaceId), 40),
 		enabled: !!searchSpaceId,
@ -585,6 +585,7 @@ export function LayoutDataProvider({
 				theme={theme}
 				setTheme={setTheme}
 				isChatPage={isChatPage}
+				isLoadingChats={isLoadingThreads}
 				inbox={{
 					isOpen: isInboxSidebarOpen,
 					onOpenChange: setIsInboxSidebarOpen,
--- a/surfsense_web/components/layout/ui/shell/LayoutShell.tsx
+++ b/surfsense_web/components/layout/ui/shell/LayoutShell.tsx
@ -74,6 +74,7 @@ interface LayoutShellProps {
 	className?: string;
 	// Inbox props
 	inbox?: InboxProps;
+	isLoadingChats?: boolean;
 }

 export function LayoutShell({
@ -110,6 +111,7 @@ export function LayoutShell({
 	children,
 	className,
 	inbox,
+	isLoadingChats = false,
 }: LayoutShellProps) {
 	const isMobile = useIsMobile();
 	const [mobileMenuOpen, setMobileMenuOpen] = useState(false);
@ -162,6 +164,7 @@ export function LayoutShell({
 							pageUsage={pageUsage}
 							theme={theme}
 							setTheme={setTheme}
+							isLoadingChats={isLoadingChats}
 						/>

 						<main className={cn("flex-1", isChatPage ? "overflow-hidden" : "overflow-auto")}>
@ -232,6 +235,7 @@ export function LayoutShell({
 							theme={theme}
 							setTheme={setTheme}
 							className="hidden md:flex border-r shrink-0"
+							isLoadingChats={isLoadingChats}
 						/>

 						{/* Docked Inbox Sidebar - renders as flex sibling between sidebar and content */}
--- a/surfsense_web/components/layout/ui/sidebar/MobileSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/MobileSidebar.tsx
@ -37,6 +37,7 @@ interface MobileSidebarProps {
 	pageUsage?: PageUsage;
 	theme?: string;
 	setTheme?: (theme: "light" | "dark" | "system") => void;
+	isLoadingChats?: boolean;
 }

 export function MobileSidebarTrigger({ onClick }: { onClick: () => void }) {
@ -78,6 +79,7 @@ export function MobileSidebar({
 	pageUsage,
 	theme,
 	setTheme,
+	isLoadingChats = false,
 }: MobileSidebarProps) {
 	const handleSearchSpaceSelect = (id: number) => {
 		onSearchSpaceSelect(id);
@ -158,6 +160,7 @@ export function MobileSidebar({
 						theme={theme}
 						setTheme={setTheme}
 						className="w-full border-none"
+						isLoadingChats={isLoadingChats}
 					/>
 				</div>
 			</SheetContent>
--- a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx
@ -3,6 +3,7 @@
 import { FolderOpen, PenSquare } from "lucide-react";
 import { useTranslations } from "next-intl";
 import { Button } from "@/components/ui/button";
+import { Skeleton } from "@/components/ui/skeleton";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 import { cn } from "@/lib/utils";
 import type { ChatItem, NavItem, PageUsage, SearchSpace, User } from "../../types/layout.types";
@ -14,6 +15,15 @@ import { SidebarHeader } from "./SidebarHeader";
 import { SidebarSection } from "./SidebarSection";
 import { SidebarUserProfile } from "./SidebarUserProfile";

+function ChatListItemSkeleton() {
+	return (
+		<div className="flex w-full items-center gap-2 rounded-md p-2">
+			<Skeleton className="h-4 w-4 shrink-0 rounded" />
+			<Skeleton className="h-4 w-full max-w-[180px]" />
+		</div>
+	);
+}
+
 interface SidebarProps {
 	searchSpace: SearchSpace | null;
 	isCollapsed?: boolean;
@ -39,6 +49,7 @@ interface SidebarProps {
 	theme?: string;
 	setTheme?: (theme: "light" | "dark" | "system") => void;
 	className?: string;
+	isLoadingChats?: boolean;
 }

 export function Sidebar({
@ -66,6 +77,7 @@ export function Sidebar({
 	theme,
 	setTheme,
 	className,
+	isLoadingChats = false,
 }: SidebarProps) {
 	const t = useTranslations("sidebar");

@ -153,7 +165,15 @@ export function Sidebar({
 							) : undefined
 						}
 					>
-						{sharedChats.length > 0 ? (
+						{isLoadingChats ? (
+							<div className="flex flex-col gap-0.5">
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+							</div>
+						) : sharedChats.length > 0 ? (
 							<div className="relative min-h-0 flex-1">
 								<div
 									className={`flex flex-col gap-0.5 max-h-full overflow-y-auto scrollbar-thin scrollbar-thumb-muted-foreground/20 scrollbar-track-transparent ${sharedChats.length > 4 ? "pb-8" : ""}`}
@ -206,7 +226,15 @@ export function Sidebar({
 							) : undefined
 						}
 					>
-						{chats.length > 0 ? (
+						{isLoadingChats ? (
+							<div className="flex flex-col gap-0.5">
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+							</div>
+						) : chats.length > 0 ? (
 							<div className="relative flex-1 min-h-0">
 								<div
 									className={`flex flex-col gap-0.5 h-full overflow-y-auto scrollbar-thin scrollbar-thumb-muted-foreground/20 scrollbar-track-transparent ${chats.length > 4 ? "pb-8" : ""}`}
--- a/surfsense_web/contracts/enums/connectorIcons.tsx
+++ b/surfsense_web/contracts/enums/connectorIcons.tsx
@ -92,7 +92,7 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas
 		case "FILE":
 			return <File {...iconProps} />;
 		case "GOOGLE_DRIVE_FILE":
-			return <File {...iconProps} />;
+			return <Image src="/connectors/google-drive.svg" alt="Google Drive" {...imgProps} />;
 		case "COMPOSIO_GOOGLE_DRIVE_CONNECTOR":
 			return <Image src="/connectors/google-drive.svg" alt="Google Drive" {...imgProps} />;
 		case "COMPOSIO_GMAIL_CONNECTOR":
--- a/surfsense_web/contracts/types/document.types.ts
+++ b/surfsense_web/contracts/types/document.types.ts
@ -23,6 +23,7 @@ export const documentTypeEnum = z.enum([
 	"ELASTICSEARCH_CONNECTOR",
 	"BOOKSTACK_CONNECTOR",
 	"CIRCLEBACK",
+	"OBSIDIAN_CONNECTOR",
 	"SURFSENSE_DOCS",
 	"NOTE",
 	"COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
@ -41,6 +42,8 @@ export const document = z.object({
 	created_at: z.string(),
 	updated_at: z.string().nullable(),
 	search_space_id: z.number(),
+	created_by_id: z.string().nullable().optional(),
+	created_by_name: z.string().nullable().optional(),
 });

 export const extensionDocumentContent = z.object({
--- a/surfsense_web/hooks/use-documents-electric.ts
+++ b/surfsense_web/hooks/use-documents-electric.ts
@ -1,185 +0,0 @@
-"use client";
-
-import { useEffect, useMemo, useRef, useState } from "react";
-import type { SyncHandle } from "@/lib/electric/client";
-import { useElectricClient } from "@/lib/electric/context";
-
-interface Document {
-	id: number;
-	search_space_id: number;
-	document_type: string;
-	created_at: string;
-}
-
-/**
- * Hook for managing documents with Electric SQL real-time sync
- *
- * Uses the Electric client from context (provided by ElectricProvider)
- * instead of initializing its own - prevents race conditions and memory leaks
- */
-export function useDocumentsElectric(searchSpaceId: number | string | null) {
-	// Get Electric client from context - ElectricProvider handles initialization
-	const electricClient = useElectricClient();
-
-	const [documents, setDocuments] = useState<Document[]>([]);
-	const [loading, setLoading] = useState(true);
-	const [error, setError] = useState<Error | null>(null);
-	const syncHandleRef = useRef<SyncHandle | null>(null);
-	const liveQueryRef = useRef<{ unsubscribe: () => void } | null>(null);
-	const syncKeyRef = useRef<string | null>(null);
-
-	// Calculate document type counts from synced documents
-	const documentTypeCounts = useMemo(() => {
-		if (!documents.length) return {};
-
-		const counts: Record<string, number> = {};
-		for (const doc of documents) {
-			counts[doc.document_type] = (counts[doc.document_type] || 0) + 1;
-		}
-		return counts;
-	}, [documents]);
-
-	// Start syncing when Electric client is available
-	useEffect(() => {
-		// Wait for both searchSpaceId and Electric client to be available
-		if (!searchSpaceId || !electricClient) {
-			setLoading(!electricClient); // Still loading if waiting for Electric
-			if (!searchSpaceId) {
-				setDocuments([]);
-			}
-			return;
-		}
-
-		// Create a unique key for this sync to prevent duplicate subscriptions
-		const syncKey = `documents_${searchSpaceId}`;
-		if (syncKeyRef.current === syncKey) {
-			// Already syncing for this search space
-			return;
-		}
-
-		let mounted = true;
-		syncKeyRef.current = syncKey;
-
-		async function startSync() {
-			try {
-				console.log("[useDocumentsElectric] Starting sync for search space:", searchSpaceId);
-
-				const handle = await electricClient.syncShape({
-					table: "documents",
-					where: `search_space_id = ${searchSpaceId}`,
-					columns: ["id", "document_type", "search_space_id", "created_at"],
-					primaryKey: ["id"],
-				});
-
-				console.log("[useDocumentsElectric] Sync started:", {
-					isUpToDate: handle.isUpToDate,
-				});
-
-				// Wait for initial sync with timeout
-				if (!handle.isUpToDate && handle.initialSyncPromise) {
-					try {
-						await Promise.race([
-							handle.initialSyncPromise,
-							new Promise((resolve) => setTimeout(resolve, 2000)),
-						]);
-					} catch (syncErr) {
-						console.error("[useDocumentsElectric] Initial sync failed:", syncErr);
-					}
-				}
-
-				if (!mounted) {
-					handle.unsubscribe();
-					return;
-				}
-
-				syncHandleRef.current = handle;
-				setLoading(false);
-				setError(null);
-
-				// Fetch initial documents
-				await fetchDocuments();
-
-				// Set up live query for real-time updates
-				await setupLiveQuery();
-			} catch (err) {
-				if (!mounted) return;
-				console.error("[useDocumentsElectric] Failed to start sync:", err);
-				setError(err instanceof Error ? err : new Error("Failed to sync documents"));
-				setLoading(false);
-			}
-		}
-
-		async function fetchDocuments() {
-			try {
-				const result = await electricClient.db.query<Document>(
-					`SELECT id, document_type, search_space_id, created_at FROM documents WHERE search_space_id = $1 ORDER BY created_at DESC`,
-					[searchSpaceId]
-				);
-				if (mounted) {
-					setDocuments(result.rows || []);
-				}
-			} catch (err) {
-				console.error("[useDocumentsElectric] Failed to fetch:", err);
-			}
-		}
-
-		async function setupLiveQuery() {
-			try {
-				// eslint-disable-next-line @typescript-eslint/no-explicit-any
-				const db = electricClient.db as any;
-
-				if (db.live?.query && typeof db.live.query === "function") {
-					const liveQuery = await db.live.query(
-						`SELECT id, document_type, search_space_id, created_at FROM documents WHERE search_space_id = $1 ORDER BY created_at DESC`,
-						[searchSpaceId]
-					);
-
-					if (!mounted) {
-						liveQuery.unsubscribe?.();
-						return;
-					}
-
-					// Set initial results
-					if (liveQuery.initialResults?.rows) {
-						setDocuments(liveQuery.initialResults.rows);
-					} else if (liveQuery.rows) {
-						setDocuments(liveQuery.rows);
-					}
-
-					// Subscribe to changes
-					if (typeof liveQuery.subscribe === "function") {
-						liveQuery.subscribe((result: { rows: Document[] }) => {
-							if (mounted && result.rows) {
-								setDocuments(result.rows);
-							}
-						});
-					}
-
-					if (typeof liveQuery.unsubscribe === "function") {
-						liveQueryRef.current = liveQuery;
-					}
-				}
-			} catch (liveErr) {
-				console.error("[useDocumentsElectric] Failed to set up live query:", liveErr);
-			}
-		}
-
-		startSync();
-
-		return () => {
-			mounted = false;
-			syncKeyRef.current = null;
-
-			if (syncHandleRef.current) {
-				syncHandleRef.current.unsubscribe();
-				syncHandleRef.current = null;
-			}
-			if (liveQueryRef.current) {
-				liveQueryRef.current.unsubscribe();
-				liveQueryRef.current = null;
-			}
-		};
-	}, [searchSpaceId, electricClient]);
-
-	return { documentTypeCounts, loading, error };
-}
--- a/surfsense_web/hooks/use-documents.ts
+++ b/surfsense_web/hooks/use-documents.ts
@ -0,0 +1,449 @@
+"use client";
+
+import { useCallback, useEffect, useMemo, useRef, useState } from "react";
+import type { DocumentTypeEnum } from "@/contracts/types/document.types";
+import { documentsApiService } from "@/lib/apis/documents-api.service";
+import type { SyncHandle } from "@/lib/electric/client";
+import { useElectricClient } from "@/lib/electric/context";
+
+// Stable empty array to prevent infinite re-renders when no typeFilter is provided
+const EMPTY_TYPE_FILTER: DocumentTypeEnum[] = [];
+
+// Document status type (matches backend DocumentStatus JSONB)
+export interface DocumentStatusType {
+	state: "ready" | "pending" | "processing" | "failed";
+	reason?: string;
+}
+
+// Document from Electric sync (lightweight table columns - NO content/metadata)
+interface DocumentElectric {
+	id: number;
+	search_space_id: number;
+	document_type: string;
+	title: string;
+	created_by_id: string | null;
+	created_at: string;
+	status: DocumentStatusType | null;
+}
+
+// Document for display (with resolved user name)
+export interface DocumentDisplay {
+	id: number;
+	search_space_id: number;
+	document_type: string;
+	title: string;
+	created_by_id: string | null;
+	created_by_name: string | null;
+	created_at: string;
+	status: DocumentStatusType;
+}
+
+/**
+ * Deduplicate by ID and sort by created_at descending (newest first)
+ */
+function deduplicateAndSort<T extends { id: number; created_at: string }>(items: T[]): T[] {
+	const seen = new Map<number, T>();
+	for (const item of items) {
+		// Keep the most recent version if duplicate
+		const existing = seen.get(item.id);
+		if (!existing || new Date(item.created_at) > new Date(existing.created_at)) {
+			seen.set(item.id, item);
+		}
+	}
+	return Array.from(seen.values()).sort(
+		(a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime()
+	);
+}
+
+/**
+ * Check if a document has valid/complete data
+ */
+function isValidDocument(doc: DocumentElectric): boolean {
+	return doc.id != null && doc.title != null && doc.title !== "";
+}
+
+/**
+ * Real-time documents hook with Electric SQL
+ *
+ * Architecture (100% Reliable):
+ * 1. API is the PRIMARY source of truth - always loads first
+ * 2. Electric provides REAL-TIME updates for additions and deletions
+ * 3. Use syncHandle.isUpToDate to determine if deletions can be trusted
+ * 4. Handles bulk deletions correctly by checking sync state
+ *
+ * @param searchSpaceId - The search space ID to filter documents
+ * @param typeFilter - Optional document types to filter by
+ */
+export function useDocuments(
+	searchSpaceId: number | null,
+	typeFilter: DocumentTypeEnum[] = EMPTY_TYPE_FILTER
+) {
+	const electricClient = useElectricClient();
+
+	const [documents, setDocuments] = useState<DocumentDisplay[]>([]);
+	const [loading, setLoading] = useState(true);
+	const [error, setError] = useState<Error | null>(null);
+
+	// Track if initial API load is complete (source of truth)
+	const apiLoadedRef = useRef(false);
+
+	// User cache: userId → displayName
+	const userCacheRef = useRef<Map<string, string>>(new Map());
+
+	// Electric sync refs
+	const syncHandleRef = useRef<SyncHandle | null>(null);
+	const liveQueryRef = useRef<{ unsubscribe?: () => void } | null>(null);
+
+	// Real-time type counts
+	const typeCounts = useMemo(() => {
+		const counts: Record<string, number> = {};
+		for (const doc of documents) {
+			counts[doc.document_type] = (counts[doc.document_type] || 0) + 1;
+		}
+		return counts;
+	}, [documents]);
+
+	// Populate user cache from API response
+	const populateUserCache = useCallback(
+		(items: Array<{ created_by_id?: string | null; created_by_name?: string | null }>) => {
+			for (const item of items) {
+				if (item.created_by_id && item.created_by_name) {
+					userCacheRef.current.set(item.created_by_id, item.created_by_name);
+				}
+			}
+		},
+		[]
+	);
+
+	// Convert API item to display doc
+	const apiToDisplayDoc = useCallback(
+		(item: {
+			id: number;
+			search_space_id: number;
+			document_type: string;
+			title: string;
+			created_by_id?: string | null;
+			created_by_name?: string | null;
+			created_at: string;
+			status?: DocumentStatusType | null;
+		}): DocumentDisplay => ({
+			id: item.id,
+			search_space_id: item.search_space_id,
+			document_type: item.document_type,
+			title: item.title,
+			created_by_id: item.created_by_id ?? null,
+			created_by_name: item.created_by_name ?? null,
+			created_at: item.created_at,
+			status: item.status ?? { state: "ready" },
+		}),
+		[]
+	);
+
+	// Convert Electric doc to display doc
+	const electricToDisplayDoc = useCallback(
+		(doc: DocumentElectric): DocumentDisplay => ({
+			...doc,
+			created_by_name: doc.created_by_id
+				? (userCacheRef.current.get(doc.created_by_id) ?? null)
+				: null,
+			status: doc.status ?? { state: "ready" },
+		}),
+		[]
+	);
+
+	// EFFECT 1: Load from API (PRIMARY source of truth)
+	useEffect(() => {
+		if (!searchSpaceId) {
+			setLoading(false);
+			return;
+		}
+
+		// Capture validated value for async closure
+		const spaceId = searchSpaceId;
+		const currentTypeFilter = typeFilter;
+
+		let mounted = true;
+		apiLoadedRef.current = false;
+
+		async function loadFromApi() {
+			try {
+				setLoading(true);
+				console.log("[useDocuments] Loading from API (source of truth):", spaceId);
+
+				const response = await documentsApiService.getDocuments({
+					queryParams: {
+						search_space_id: spaceId,
+						page: 0,
+						page_size: -1, // Fetch all documents
+						...(currentTypeFilter.length > 0 && { document_types: currentTypeFilter }),
+					},
+				});
+
+				if (!mounted) return;
+
+				populateUserCache(response.items);
+				const docs = response.items.map(apiToDisplayDoc);
+				setDocuments(docs);
+				apiLoadedRef.current = true;
+				setError(null);
+				console.log("[useDocuments] API loaded", docs.length, "documents");
+			} catch (err) {
+				if (!mounted) return;
+				console.error("[useDocuments] API load failed:", err);
+				setError(err instanceof Error ? err : new Error("Failed to load documents"));
+			} finally {
+				if (mounted) setLoading(false);
+			}
+		}
+
+		loadFromApi();
+
+		return () => {
+			mounted = false;
+		};
+	}, [searchSpaceId, typeFilter, populateUserCache, apiToDisplayDoc]);
+
+	// EFFECT 2: Start Electric sync + live query for real-time updates
+	useEffect(() => {
+		if (!searchSpaceId || !electricClient) return;
+
+		// Capture validated values for async closure
+		const spaceId = searchSpaceId;
+		const client = electricClient;
+		const currentTypeFilter = typeFilter;
+
+		let mounted = true;
+
+		async function setupElectricRealtime() {
+			// Cleanup previous subscriptions
+			if (syncHandleRef.current) {
+				syncHandleRef.current.unsubscribe();
+				syncHandleRef.current = null;
+			}
+			if (liveQueryRef.current) {
+				liveQueryRef.current.unsubscribe?.();
+				liveQueryRef.current = null;
+			}
+
+			try {
+				console.log("[useDocuments] Starting Electric sync for real-time updates");
+
+				// Start Electric sync
+				const handle = await client.syncShape({
+					table: "documents",
+					where: `search_space_id = ${spaceId}`,
+					columns: [
+						"id",
+						"document_type",
+						"search_space_id",
+						"title",
+						"created_by_id",
+						"created_at",
+						"status",
+					],
+					primaryKey: ["id"],
+				});
+
+				if (!mounted) {
+					handle.unsubscribe();
+					return;
+				}
+
+				syncHandleRef.current = handle;
+				console.log("[useDocuments] Sync started, isUpToDate:", handle.isUpToDate);
+
+				// Wait for initial sync (with timeout)
+				if (!handle.isUpToDate && handle.initialSyncPromise) {
+					await Promise.race([
+						handle.initialSyncPromise,
+						new Promise((resolve) => setTimeout(resolve, 5000)),
+					]);
+					console.log("[useDocuments] Initial sync complete, isUpToDate:", handle.isUpToDate);
+				}
+
+				if (!mounted) return;
+
+				// Set up live query
+				const db = client.db as {
+					live?: {
+						query: <T>(
+							sql: string,
+							params?: (number | string)[]
+						) => Promise<{
+							subscribe: (cb: (result: { rows: T[] }) => void) => void;
+							unsubscribe?: () => void;
+						}>;
+					};
+				};
+
+				if (!db.live?.query) {
+					console.warn("[useDocuments] Live queries not available");
+					return;
+				}
+
+				let query = `SELECT id, document_type, search_space_id, title, created_by_id, created_at, status
+					FROM documents 
+					WHERE search_space_id = $1`;
+
+				const params: (number | string)[] = [spaceId];
+
+				if (currentTypeFilter.length > 0) {
+					const placeholders = currentTypeFilter.map((_, i) => `$${i + 2}`).join(", ");
+					query += ` AND document_type IN (${placeholders})`;
+					params.push(...currentTypeFilter);
+				}
+
+				query += ` ORDER BY created_at DESC`;
+
+				const liveQuery = await db.live.query<DocumentElectric>(query, params);
+
+				if (!mounted) {
+					liveQuery.unsubscribe?.();
+					return;
+				}
+
+				console.log("[useDocuments] Live query subscribed");
+
+				liveQuery.subscribe((result: { rows: DocumentElectric[] }) => {
+					if (!mounted || !result.rows) return;
+
+					// DEBUG: Log first few raw documents to see what's coming from Electric
+					console.log("[useDocuments] Raw data sample:", result.rows.slice(0, 3));
+
+					const validItems = result.rows.filter(isValidDocument);
+					const isFullySynced = syncHandleRef.current?.isUpToDate ?? false;
+
+					console.log(
+						`[useDocuments] Live update: ${result.rows.length} raw, ${validItems.length} valid, synced: ${isFullySynced}`
+					);
+
+					// Fetch user names for new users (non-blocking)
+					const unknownUserIds = validItems
+						.filter(
+							(doc): doc is DocumentElectric & { created_by_id: string } =>
+								doc.created_by_id !== null && !userCacheRef.current.has(doc.created_by_id)
+						)
+						.map((doc) => doc.created_by_id);
+
+					if (unknownUserIds.length > 0) {
+						documentsApiService
+							.getDocuments({
+								queryParams: { search_space_id: spaceId, page: 0, page_size: 20 },
+							})
+							.then((response) => {
+								populateUserCache(response.items);
+								if (mounted) {
+									setDocuments((prev) =>
+										prev.map((doc) => ({
+											...doc,
+											created_by_name: doc.created_by_id
+												? (userCacheRef.current.get(doc.created_by_id) ?? null)
+												: null,
+										}))
+									);
+								}
+							})
+							.catch(() => {});
+					}
+
+					// Smart update logic based on sync state
+					setDocuments((prev) => {
+						// Don't process if API hasn't loaded yet
+						if (!apiLoadedRef.current) {
+							console.log("[useDocuments] Waiting for API load, skipping live update");
+							return prev;
+						}
+
+						// Case 1: Live query is empty
+						if (validItems.length === 0) {
+							if (isFullySynced && prev.length > 0) {
+								// Electric is fully synced and says 0 items - trust it (all deleted)
+								console.log("[useDocuments] All documents deleted (Electric synced)");
+								return [];
+							}
+							// Partial sync or error - keep existing
+							console.log("[useDocuments] Empty live result, keeping existing");
+							return prev;
+						}
+
+						// Case 2: Electric is fully synced - TRUST IT COMPLETELY (handles bulk deletes)
+						if (isFullySynced) {
+							const liveDocs = deduplicateAndSort(validItems.map(electricToDisplayDoc));
+							console.log(
+								`[useDocuments] Synced update: ${liveDocs.length} docs (was ${prev.length})`
+							);
+							return liveDocs;
+						}
+
+						// Case 3: Partial sync - only ADD new items, don't remove any
+						const existingIds = new Set(prev.map((d) => d.id));
+						const liveIds = new Set(validItems.map((d) => d.id));
+
+						// Find new items (in live but not in prev)
+						const newItems = validItems
+							.filter((item) => !existingIds.has(item.id))
+							.map(electricToDisplayDoc);
+
+						// Find updated items (in both, update with latest data)
+						const updatedPrev = prev.map((doc) => {
+							if (liveIds.has(doc.id)) {
+								const liveItem = validItems.find((v) => v.id === doc.id);
+								if (liveItem) {
+									return electricToDisplayDoc(liveItem);
+								}
+							}
+							return doc;
+						});
+
+						if (newItems.length > 0) {
+							console.log(`[useDocuments] Adding ${newItems.length} new items (partial sync)`);
+							return deduplicateAndSort([...newItems, ...updatedPrev]);
+						}
+
+						return updatedPrev;
+					});
+				});
+
+				liveQueryRef.current = liveQuery;
+			} catch (err) {
+				console.error("[useDocuments] Electric setup failed:", err);
+				// Don't set error - API data is already loaded
+			}
+		}
+
+		setupElectricRealtime();
+
+		return () => {
+			mounted = false;
+			if (syncHandleRef.current) {
+				syncHandleRef.current.unsubscribe();
+				syncHandleRef.current = null;
+			}
+			if (liveQueryRef.current) {
+				liveQueryRef.current.unsubscribe?.();
+				liveQueryRef.current = null;
+			}
+		};
+	}, [searchSpaceId, electricClient, typeFilter, electricToDisplayDoc, populateUserCache]);
+
+	// Track previous searchSpaceId to detect actual changes
+	const prevSearchSpaceIdRef = useRef<number | null>(null);
+
+	// Reset on search space change (not on initial mount)
+	useEffect(() => {
+		if (prevSearchSpaceIdRef.current !== null && prevSearchSpaceIdRef.current !== searchSpaceId) {
+			setDocuments([]);
+			apiLoadedRef.current = false;
+			userCacheRef.current.clear();
+		}
+		prevSearchSpaceIdRef.current = searchSpaceId;
+	}, [searchSpaceId]);
+
+	return {
+		documents,
+		typeCounts,
+		total: documents.length,
+		loading,
+		error,
+	};
+}
--- a/surfsense_web/hooks/use-inbox.ts
+++ b/surfsense_web/hooks/use-inbox.ts
@ -38,10 +38,14 @@ function deduplicateAndSort(items: InboxItem[]): InboxItem[] {

 /**
 * Calculate the cutoff date for sync window
+ * IMPORTANT: Rounds to the start of the day (midnight UTC) to ensure stable values
+ * across re-renders. Without this, millisecond differences cause multiple syncs!
 */
 function getSyncCutoffDate(): string {
 	const cutoff = new Date();
 	cutoff.setDate(cutoff.getDate() - SYNC_WINDOW_DAYS);
+	// Round to start of day to prevent millisecond differences causing duplicate syncs
+	cutoff.setUTCHours(0, 0, 0, 0);
 	return cutoff.toISOString();
 }

--- a/surfsense_web/lib/electric/client.ts
+++ b/surfsense_web/lib/electric/client.ts
@ -12,10 +12,21 @@
 * 3. Works even if logout cleanup fails
 */

-import { PGlite } from "@electric-sql/pglite";
+import { PGlite, type Transaction } from "@electric-sql/pglite";
 import { live } from "@electric-sql/pglite/live";
 import { electricSync } from "@electric-sql/pglite-sync";

+// Debug logging - only logs in development, silent in production
+const IS_DEV = process.env.NODE_ENV === "development";
+
+function debugLog(...args: unknown[]) {
+	if (IS_DEV) console.log(...args);
+}
+
+function debugWarn(...args: unknown[]) {
+	if (IS_DEV) console.warn(...args);
+}
+
 // Types
 export interface ElectricClient {
 	db: PGlite;
@ -56,7 +67,14 @@ const pendingSyncs = new Map<string, Promise<SyncHandle>>();
 // v2: user-specific database architecture
 // v3: consistent cutoff date for sync+queries, visibility refresh support
 // v4: heartbeat-based stale notification detection with updated_at tracking
-const SYNC_VERSION = 4;
+// v5: fixed duplicate key errors (root cause: unstable cutoff dates in use-inbox.ts)
+//     - added onMustRefetch handler for server-side refetch scenarios
+//     - fixed getSyncCutoffDate to use stable midnight UTC timestamps
+// v6: real-time documents table - added title and created_by_id columns for live document display
+// v7: removed use-documents-electric.ts - consolidated to single documents sync to prevent conflicts
+// v8: added status column for real-time document processing status (ready/processing/failed)
+// v9: added pending state for accurate document queue visibility
+const SYNC_VERSION = 11;

 // Database name prefix for identifying SurfSense databases
 const DB_PREFIX = "surfsense-";
@ -77,7 +95,7 @@ function getDbName(userId: string): string {
 }

 /**
- * Clean up databases from OTHER users (not the current user)
+ * Clean up databases from OTHER users AND old versions
 * This is called on login to ensure clean state
 */
 async function cleanupOtherUserDatabases(currentUserId: string): Promise<void> {
@ -85,6 +103,10 @@ async function cleanupOtherUserDatabases(currentUserId: string): Promise<void> {
 		return;
 	}

+	// The exact database identifier we want to keep (current user + current version)
+	// Format: "surfsense-{userId}-v{version}"
+	const currentDbIdentifier = `${DB_PREFIX}${currentUserId}-v${SYNC_VERSION}`;
+
 	try {
 		// Try to list all databases (not supported in all browsers)
 		if (typeof window.indexedDB.databases === "function") {
@ -95,26 +117,27 @@ async function cleanupOtherUserDatabases(currentUserId: string): Promise<void> {
 				if (!dbName) continue;

 				// Check if this is a SurfSense database
-				if (dbName.startsWith(DB_PREFIX) || dbName.includes("surfsense")) {
-					// Don't delete current user's database
-					if (dbName.includes(currentUserId)) {
-						console.log(`[Electric] Keeping current user's database: ${dbName}`);
+				if (dbName.includes("surfsense")) {
+					// Check if this is the current database
+					// PGlite stores with "/pglite/" prefix, so we check if the name ENDS WITH our identifier
+					if (dbName.endsWith(currentDbIdentifier)) {
+						debugLog(`[Electric] Keeping current database: ${dbName}`);
 						continue;
 					}

-					// Delete databases from other users
+					// Delete ALL other databases (other users OR old versions of current user)
 					try {
-						console.log(`[Electric] Deleting stale database: ${dbName}`);
+						debugLog(`[Electric] Deleting stale database: ${dbName}`);
 						window.indexedDB.deleteDatabase(dbName);
 					} catch (deleteErr) {
-						console.warn(`[Electric] Failed to delete database ${dbName}:`, deleteErr);
+						debugWarn(`[Electric] Failed to delete database ${dbName}:`, deleteErr);
 					}
 				}
 			}
 		}
 	} catch (err) {
 		// indexedDB.databases() not supported - that's okay, login cleanup is best-effort
-		console.warn("[Electric] Could not enumerate databases for cleanup:", err);
+		debugWarn("[Electric] Could not enumerate databases for cleanup:", err);
 	}
 }

@ -140,7 +163,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {

 	// If initialized for a different user, close the old client first
 	if (electricClient && currentUserId !== userId) {
-		console.log(`[Electric] User changed from ${currentUserId} to ${userId}, reinitializing...`);
+		debugLog(`[Electric] User changed from ${currentUserId} to ${userId}, reinitializing...`);
 		await cleanupElectric();
 	}

@ -155,12 +178,12 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 	initPromise = (async () => {
 		try {
 			// STEP 1: Clean up databases from other users (login-time cleanup)
-			console.log("[Electric] Cleaning up databases from other users...");
+			debugLog("[Electric] Cleaning up databases from other users...");
 			await cleanupOtherUserDatabases(userId);

 			// STEP 2: Create user-specific PGlite database
 			const dbName = getDbName(userId);
-			console.log(`[Electric] Initializing database: ${dbName}`);
+			debugLog(`[Electric] Initializing database: ${dbName}`);

 			const db = await PGlite.create({
 				dataDir: dbName,
@ -216,18 +239,22 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 			`);

 			// Create the documents table schema in PGlite
-			// Only sync minimal fields needed for type counts: id, document_type, search_space_id
+			// Sync columns needed for real-time table display (lightweight - no content/metadata)
 			await db.exec(`
 				CREATE TABLE IF NOT EXISTS documents (
 					id INTEGER PRIMARY KEY,
 					search_space_id INTEGER NOT NULL,
 					document_type TEXT NOT NULL,
-					created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+					title TEXT NOT NULL DEFAULT '',
+					created_by_id TEXT,
+					created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+					status JSONB DEFAULT '{"state": "ready"}'::jsonb
 				);
 				
 				CREATE INDEX IF NOT EXISTS idx_documents_search_space_id ON documents(search_space_id);
 				CREATE INDEX IF NOT EXISTS idx_documents_type ON documents(document_type);
 				CREATE INDEX IF NOT EXISTS idx_documents_search_space_type ON documents(search_space_id, document_type);
+				CREATE INDEX IF NOT EXISTS idx_documents_status ON documents((status->>'state'));
 			`);

 			await db.exec(`
@ -290,14 +317,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 					// Check if we already have an active sync for this shape (memory optimization)
 					const existingHandle = activeSyncHandles.get(cacheKey);
 					if (existingHandle) {
-						console.log(`[Electric] Reusing existing sync handle for: ${cacheKey}`);
+						debugLog(`[Electric] Reusing existing sync handle for: ${cacheKey}`);
 						return existingHandle;
 					}

 					// Check if there's already a pending sync for this shape (prevent race condition)
 					const pendingSync = pendingSyncs.get(cacheKey);
 					if (pendingSync) {
-						console.log(`[Electric] Waiting for pending sync to complete: ${cacheKey}`);
+						debugLog(`[Electric] Waiting for pending sync to complete: ${cacheKey}`);
 						return pendingSync;
 					}

@ -323,7 +350,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {

 								if (singleQuoteCount % 2 !== 0) {
 									// Odd number of quotes means unterminated string literal
-									console.warn("Where clause has unmatched quotes, fixing:", where);
+									debugWarn("Where clause has unmatched quotes, fixing:", where);
 									// Add closing quote at the end
 									validatedWhere = `${where}'`;
 									params.where = validatedWhere;
@ -337,15 +364,15 @@ export async function initElectric(userId: string): Promise<ElectricClient> {

 						if (columns) params.columns = columns.join(",");

-						console.log("[Electric] Syncing shape with params:", params);
-						console.log("[Electric] Electric URL:", `${electricUrl}/v1/shape`);
-						console.log("[Electric] Where clause:", where, "Validated:", validatedWhere);
+						debugLog("[Electric] Syncing shape with params:", params);
+						debugLog("[Electric] Electric URL:", `${electricUrl}/v1/shape`);
+						debugLog("[Electric] Where clause:", where, "Validated:", validatedWhere);

 						try {
 							// Debug: Test Electric SQL connection directly first (DEV ONLY - skipped in production)
 							if (process.env.NODE_ENV === "development") {
 								const testUrl = `${electricUrl}/v1/shape?table=${table}&offset=-1${validatedWhere ? `&where=${encodeURIComponent(validatedWhere)}` : ""}`;
-								console.log("[Electric] Testing Electric SQL directly:", testUrl);
+								debugLog("[Electric] Testing Electric SQL directly:", testUrl);
 								try {
 									const testResponse = await fetch(testUrl);
 									const testHeaders = {
@ -353,9 +380,9 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 										offset: testResponse.headers.get("electric-offset"),
 										upToDate: testResponse.headers.get("electric-up-to-date"),
 									};
-									console.log("[Electric] Direct Electric SQL response headers:", testHeaders);
+									debugLog("[Electric] Direct Electric SQL response headers:", testHeaders);
 									const testData = await testResponse.json();
-									console.log(
+									debugLog(
 										"[Electric] Direct Electric SQL data count:",
 										Array.isArray(testData) ? testData.length : "not array",
 										testData
@ -396,14 +423,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								// Shorter timeout (5 seconds) as fallback
 								setTimeout(() => {
 									if (!syncResolved) {
-										console.warn(
+										debugWarn(
 											`[Electric] ⚠️ Sync timeout for ${table} - checking isUpToDate one more time...`
 										);
 										// Check isUpToDate one more time before resolving
 										// This will be checked after shape is created
 										setTimeout(() => {
 											if (!syncResolved) {
-												console.warn(
+												debugWarn(
 													`[Electric] ⚠️ Sync timeout for ${table} - resolving anyway after 5s`
 												);
 												resolveInitialSync();
@ -413,7 +440,22 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								}, 5000);
 							});

-							// Include userId in shapeKey for user-specific sync state
+							// ROOT CAUSE FIX: The duplicate key errors were caused by unstable cutoff dates
+							// in use-inbox.ts generating different sync keys on each render.
+							// That's now fixed (rounded to midnight UTC in getSyncCutoffDate).
+							// We can safely use shapeKey for fast incremental sync.
+
+							const shapeKey = `${userId}_v${SYNC_VERSION}_${table}_${where?.replace(/[^a-zA-Z0-9]/g, "_") || "all"}`;
+
+							// Type assertion to PGlite with electric extension
+							const pgWithElectric = db as unknown as {
+								electric: {
+									syncShapeToTable: (
+										config: Record<string, unknown>
+									) => Promise<{ unsubscribe: () => void; isUpToDate: boolean; stream: unknown }>;
+								};
+							};
+
 							const shapeConfig = {
 								shape: {
 									url: `${electricUrl}/v1/shape`,
@ -425,9 +467,9 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								},
 								table,
 								primaryKey,
-								shapeKey: `${userId}_v${SYNC_VERSION}_${table}_${where?.replace(/[^a-zA-Z0-9]/g, "_") || "all"}`, // User-specific versioned key
+								shapeKey, // Re-enabled for fast incremental sync (root cause in use-inbox.ts is fixed)
 								onInitialSync: () => {
-									console.log(
+									debugLog(
 										`[Electric] ✅ Initial sync complete for ${table} - data should now be in PGlite`
 									);
 									resolveInitialSync();
@ -440,21 +482,37 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 									);
 									rejectInitialSync(error);
 								},
+								// Handle must-refetch: clear table data before Electric re-inserts from scratch
+								// This prevents "duplicate key" errors when the shape is invalidated
+								onMustRefetch: async (tx: Transaction) => {
+									debugLog(
+										`[Electric] ⚠️ Must refetch triggered for ${table} - clearing existing data`
+									);
+									try {
+										// Delete rows matching the shape's WHERE clause
+										// If no WHERE clause, delete all rows from the table
+										if (validatedWhere) {
+											// Parse the WHERE clause to build a DELETE statement
+											// The WHERE clause is already validated and formatted
+											await tx.exec(`DELETE FROM ${table} WHERE ${validatedWhere}`);
+											debugLog(`[Electric] 🗑️ Cleared ${table} rows matching: ${validatedWhere}`);
+										} else {
+											// No WHERE clause means we're syncing the entire table
+											await tx.exec(`DELETE FROM ${table}`);
+											debugLog(`[Electric] 🗑️ Cleared all rows from ${table}`);
+										}
+									} catch (cleanupError) {
+										console.error(
+											`[Electric] ❌ Failed to clear ${table} during must-refetch:`,
+											cleanupError
+										);
+										// Re-throw to let Electric handle the error
+										throw cleanupError;
+									}
+								},
 							};

-							console.log(
-								"[Electric] syncShapeToTable config:",
-								JSON.stringify(shapeConfig, null, 2)
-							);
-
-							// Type assertion to PGlite with electric extension
-							const pgWithElectric = db as PGlite & {
-								electric: {
-									syncShapeToTable: (
-										config: typeof shapeConfig
-									) => Promise<{ unsubscribe: () => void; isUpToDate: boolean; stream: unknown }>;
-								};
-							};
+							debugLog("[Electric] syncShapeToTable config:", JSON.stringify(shapeConfig, null, 2));

 							let shape: { unsubscribe: () => void; isUpToDate: boolean; stream: unknown };
 							try {
@ -464,7 +522,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								const errorMessage =
 									syncError instanceof Error ? syncError.message : String(syncError);
 								if (errorMessage.includes("Already syncing")) {
-									console.warn(
+									debugWarn(
 										`[Electric] Already syncing ${table}, waiting for existing sync to settle...`
 									);

@ -474,12 +532,12 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 									// Check if an active handle now exists (another sync might have completed)
 									const existingHandle = activeSyncHandles.get(cacheKey);
 									if (existingHandle) {
-										console.log(`[Electric] Found existing handle after waiting: ${cacheKey}`);
+										debugLog(`[Electric] Found existing handle after waiting: ${cacheKey}`);
 										return existingHandle;
 									}

 									// Retry once after waiting
-									console.log(`[Electric] Retrying sync for ${table}...`);
+									debugLog(`[Electric] Retrying sync for ${table}...`);
 									try {
 										shape = await pgWithElectric.electric.syncShapeToTable(shapeConfig);
 									} catch (retryError) {
@ -487,12 +545,10 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 											retryError instanceof Error ? retryError.message : String(retryError);
 										if (retryMessage.includes("Already syncing")) {
 											// Still syncing - create a placeholder handle that indicates the table is being synced
-											console.warn(
-												`[Electric] ${table} still syncing, creating placeholder handle`
-											);
+											debugWarn(`[Electric] ${table} still syncing, creating placeholder handle`);
 											const placeholderHandle: SyncHandle = {
 												unsubscribe: () => {
-													console.log(`[Electric] Placeholder unsubscribe for: ${cacheKey}`);
+													debugLog(`[Electric] Placeholder unsubscribe for: ${cacheKey}`);
 													activeSyncHandles.delete(cacheKey);
 												},
 												get isUpToDate() {
@ -516,7 +572,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 							}

 							// Log the actual shape result structure
-							console.log("[Electric] Shape sync result (initial):", {
+							debugLog("[Electric] Shape sync result (initial):", {
 								hasUnsubscribe: typeof shape?.unsubscribe === "function",
 								isUpToDate: shape?.isUpToDate,
 								hasStream: !!shape?.stream,
@ -525,7 +581,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {

 							// Recommended Approach Step 1: Check isUpToDate immediately
 							if (shape.isUpToDate) {
-								console.log(
+								debugLog(
 									`[Electric] ✅ Sync already up-to-date for ${table} (resuming from previous state)`
 								);
 								resolveInitialSync();
@ -533,7 +589,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								// Recommended Approach Step 2: Subscribe to stream and watch for "up-to-date" message
 								if (shape?.stream) {
 									const stream = shape.stream as any;
-									console.log("[Electric] Shape stream details:", {
+									debugLog("[Electric] Shape stream details:", {
 										shapeHandle: stream?.shapeHandle,
 										lastOffset: stream?.lastOffset,
 										isUpToDate: stream?.isUpToDate,
@ -546,14 +602,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 									// NOTE: We keep this subscription active - don't unsubscribe!
 									// The stream is what Electric SQL uses for real-time updates
 									if (typeof stream?.subscribe === "function") {
-										console.log(
+										debugLog(
 											"[Electric] Subscribing to shape stream to watch for up-to-date message..."
 										);
 										// Subscribe but don't store unsubscribe - we want it to stay active
 										stream.subscribe((messages: unknown[]) => {
 											// Continue receiving updates even after sync is resolved
 											if (!syncResolved) {
-												console.log(
+												debugLog(
 													"[Electric] 🔵 Shape stream received messages:",
 													messages?.length || 0
 												);
@ -570,14 +626,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 														(typeof msg === "object" && "up-to-date" in msg)
 													) {
 														if (!syncResolved) {
-															console.log(`[Electric] ✅ Received up-to-date message for ${table}`);
+															debugLog(`[Electric] ✅ Received up-to-date message for ${table}`);
 															resolveInitialSync();
 														}
 														// Continue listening for real-time updates - don't return!
 													}
 												}
 												if (!syncResolved && messages.length > 0) {
-													console.log(
+													debugLog(
 														"[Electric] First message:",
 														JSON.stringify(messages[0], null, 2)
 													);
@ -586,16 +642,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {

 											// Also check stream's isUpToDate property after receiving messages
 											if (!syncResolved && stream?.isUpToDate) {
-												console.log(`[Electric] ✅ Stream isUpToDate is true for ${table}`);
+												debugLog(`[Electric] ✅ Stream isUpToDate is true for ${table}`);
 												resolveInitialSync();
 											}
 										});

 										// Also check stream's isUpToDate property immediately
 										if (stream?.isUpToDate) {
-											console.log(
-												`[Electric] ✅ Stream isUpToDate is true immediately for ${table}`
-											);
+											debugLog(`[Electric] ✅ Stream isUpToDate is true immediately for ${table}`);
 											resolveInitialSync();
 										}
 									}
@ -608,9 +662,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 										}

 										if (shape.isUpToDate || stream?.isUpToDate) {
-											console.log(
-												`[Electric] ✅ Sync completed (detected via polling) for ${table}`
-											);
+											debugLog(`[Electric] ✅ Sync completed (detected via polling) for ${table}`);
 											clearInterval(pollInterval);
 											resolveInitialSync();
 										}
@ -621,7 +673,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 										clearInterval(pollInterval);
 									});
 								} else {
-									console.warn(
+									debugWarn(
 										`[Electric] ⚠️ No stream available for ${table}, relying on callback and timeout`
 									);
 								}
@ -630,7 +682,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 							// Create the sync handle with proper cleanup
 							const syncHandle: SyncHandle = {
 								unsubscribe: () => {
-									console.log(`[Electric] Unsubscribing from: ${cacheKey}`);
+									debugLog(`[Electric] Unsubscribing from: ${cacheKey}`);
 									// Remove from cache first
 									activeSyncHandles.delete(cacheKey);
 									// Then unsubscribe from the shape
@ -648,7 +700,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {

 							// Cache the sync handle for reuse (memory optimization)
 							activeSyncHandles.set(cacheKey, syncHandle);
-							console.log(
+							debugLog(
 								`[Electric] Cached sync handle for: ${cacheKey} (total cached: ${activeSyncHandles.size})`
 							);

@ -660,7 +712,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								const response = await fetch(`${electricUrl}/v1/shape?table=${table}&offset=-1`, {
 									method: "GET",
 								});
-								console.log(
+								debugLog(
 									"[Electric] Electric SQL server response:",
 									response.status,
 									response.statusText
@ -682,14 +734,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 					// Clean up the pending sync when done (whether success or failure)
 					syncPromise.finally(() => {
 						pendingSyncs.delete(cacheKey);
-						console.log(`[Electric] Pending sync removed for: ${cacheKey}`);
+						debugLog(`[Electric] Pending sync removed for: ${cacheKey}`);
 					});

 					return syncPromise;
 				},
 			};

-			console.log(`[Electric] ✅ Initialized successfully for user: ${userId}`);
+			debugLog(`[Electric] ✅ Initialized successfully for user: ${userId}`);
 			return electricClient;
 		} catch (error) {
 			console.error("[Electric] Failed to initialize:", error);
@ -715,10 +767,10 @@ export async function cleanupElectric(): Promise<void> {
 	}

 	const userIdToClean = currentUserId;
-	console.log(`[Electric] Cleaning up for user: ${userIdToClean}`);
+	debugLog(`[Electric] Cleaning up for user: ${userIdToClean}`);

 	// Unsubscribe from all active sync handles first (memory cleanup)
-	console.log(`[Electric] Unsubscribing from ${activeSyncHandles.size} active sync handles`);
+	debugLog(`[Electric] Unsubscribing from ${activeSyncHandles.size} active sync handles`);
 	// Copy keys to array to avoid mutation during iteration
 	const handleKeys = Array.from(activeSyncHandles.keys());
 	for (const key of handleKeys) {
@ -727,7 +779,7 @@ export async function cleanupElectric(): Promise<void> {
 			try {
 				handle.unsubscribe();
 			} catch (err) {
-				console.warn(`[Electric] Failed to unsubscribe from ${key}:`, err);
+				debugWarn(`[Electric] Failed to unsubscribe from ${key}:`, err);
 			}
 		}
 	}
@ -738,7 +790,7 @@ export async function cleanupElectric(): Promise<void> {
 	try {
 		// Close the PGlite database connection
 		await electricClient.db.close();
-		console.log("[Electric] Database closed");
+		debugLog("[Electric] Database closed");
 	} catch (error) {
 		console.error("[Electric] Error closing database:", error);
 	}
@ -754,13 +806,13 @@ export async function cleanupElectric(): Promise<void> {
 		try {
 			const dbName = `${DB_PREFIX}${userIdToClean}-v${SYNC_VERSION}`;
 			window.indexedDB.deleteDatabase(dbName);
-			console.log(`[Electric] Deleted database: ${dbName}`);
+			debugLog(`[Electric] Deleted database: ${dbName}`);
 		} catch (err) {
-			console.warn("[Electric] Failed to delete database:", err);
+			debugWarn("[Electric] Failed to delete database:", err);
 		}
 	}

-	console.log("[Electric] Cleanup complete");
+	debugLog("[Electric] Cleanup complete");
 }

 /**
--- a/surfsense_web/messages/en.json
+++ b/surfsense_web/messages/en.json
@ -308,6 +308,7 @@
 		"no_rows_selected": "No rows selected",
 		"delete_success_count": "Successfully deleted {count} document(s)",
 		"delete_partial_failed": "Some documents could not be deleted",
+		"delete_success": "Document deleted successfully",
 		"delete_error": "Error deleting documents",
 		"filter_by_title": "Filter by title...",
 		"bulk_delete": "Delete Selected",
@ -328,7 +329,6 @@
 		"filter_placeholder": "Filter by title...",
 		"rows_per_page": "Rows per page",
 		"refresh": "Refresh",
-		"refresh_success": "Documents refreshed",
 		"upload_documents": "Upload Documents",
 		"create_shared_note": "Create Shared Note",
 		"processing_documents": "Processing documents...",
--- a/surfsense_web/messages/zh.json
+++ b/surfsense_web/messages/zh.json
@ -313,7 +313,6 @@
 		"filter_placeholder": "按标题筛选...",
 		"rows_per_page": "每页行数",
 		"refresh": "刷新",
-		"refresh_success": "文档已刷新",
 		"upload_documents": "上传文档",
 		"create_shared_note": "创建共享笔记",
 		"processing_documents": "正在处理文档...",