From 90f9fad95cb0dcc86faa26ae267a170abf90e470 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Wed, 4 Feb 2026 12:55:38 +0530
Subject: [PATCH 01/36] feat: enhance document management with user information
 and connector dialog

---
 .../app/routes/documents_routes.py            |  28 +-
 surfsense_backend/app/schemas/documents.py    |   1 +
 .../[search_space_id]/client-layout.tsx       |   3 +
 .../(manage)/components/DocumentTypeIcon.tsx  |  33 +-
 .../(manage)/components/DocumentsFilters.tsx  | 354 +++++++------
 .../components/DocumentsTableShell.tsx        | 464 ++++++++++--------
 .../components/PaginationControls.tsx         | 193 +++-----
 .../(manage)/components/RowActions.tsx        |  38 +-
 .../documents/(manage)/components/types.ts    |   5 +-
 .../documents/(manage)/page.tsx               | 119 ++---
 .../connector-dialog.atoms.ts                 |   5 +
 .../assistant-ui/connector-popup.tsx          |  60 +--
 .../hooks/use-connector-dialog.ts             |   6 +-
 13 files changed, 665 insertions(+), 644 deletions(-)
 create mode 100644 surfsense_web/atoms/connector-dialog/connector-dialog.atoms.ts

diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py
index be90df459..d25a2db48 100644
--- a/surfsense_backend/app/routes/documents_routes.py
+++ b/surfsense_backend/app/routes/documents_routes.py
@@ -211,7 +211,11 @@ async def read_documents(
                 Permission.DOCUMENTS_READ.value,
                 "You don't have permission to read documents in this search space",
             )
-            query = select(Document).filter(Document.search_space_id == search_space_id)
+            query = (
+                select(Document)
+                .options(selectinload(Document.created_by))
+                .filter(Document.search_space_id == search_space_id)
+            )
             count_query = (
                 select(func.count())
                 .select_from(Document)
@@ -221,6 +225,7 @@ async def read_documents(
             # Get documents from all search spaces user has membership in
             query = (
                 select(Document)
+                .options(selectinload(Document.created_by))
                 .join(SearchSpace)
                 .join(SearchSpaceMembership)
                 .filter(SearchSpaceMembership.user_id == user.id)
@@ -261,6 +266,11 @@ async def read_documents(
         # Convert database objects to API-friendly format
         api_documents = []
         for doc in db_documents:
+            # Get user name (display_name or email fallback)
+            created_by_name = None
+            if doc.created_by:
+                created_by_name = doc.created_by.display_name or doc.created_by.email
+            
             api_documents.append(
                 DocumentRead(
                     id=doc.id,
@@ -273,6 +283,8 @@ async def read_documents(
                     created_at=doc.created_at,
                     updated_at=doc.updated_at,
                     search_space_id=doc.search_space_id,
+                    created_by_id=doc.created_by_id,
+                    created_by_name=created_by_name,
                 )
             )
 
@@ -341,7 +353,11 @@ async def search_documents(
                 Permission.DOCUMENTS_READ.value,
                 "You don't have permission to read documents in this search space",
             )
-            query = select(Document).filter(Document.search_space_id == search_space_id)
+            query = (
+                select(Document)
+                .options(selectinload(Document.created_by))
+                .filter(Document.search_space_id == search_space_id)
+            )
             count_query = (
                 select(func.count())
                 .select_from(Document)
@@ -351,6 +367,7 @@ async def search_documents(
             # Get documents from all search spaces user has membership in
             query = (
                 select(Document)
+                .options(selectinload(Document.created_by))
                 .join(SearchSpace)
                 .join(SearchSpaceMembership)
                 .filter(SearchSpaceMembership.user_id == user.id)
@@ -395,6 +412,11 @@ async def search_documents(
         # Convert database objects to API-friendly format
         api_documents = []
         for doc in db_documents:
+            # Get user name (display_name or email fallback)
+            created_by_name = None
+            if doc.created_by:
+                created_by_name = doc.created_by.display_name or doc.created_by.email
+            
             api_documents.append(
                 DocumentRead(
                     id=doc.id,
@@ -407,6 +429,8 @@ async def search_documents(
                     created_at=doc.created_at,
                     updated_at=doc.updated_at,
                     search_space_id=doc.search_space_id,
+                    created_by_id=doc.created_by_id,
+                    created_by_name=created_by_name,
                 )
             )
 
diff --git a/surfsense_backend/app/schemas/documents.py b/surfsense_backend/app/schemas/documents.py
index 1f82ae9ce..ad1907b90 100644
--- a/surfsense_backend/app/schemas/documents.py
+++ b/surfsense_backend/app/schemas/documents.py
@@ -53,6 +53,7 @@ class DocumentRead(BaseModel):
     updated_at: datetime | None
     search_space_id: int
     created_by_id: UUID | None = None  # User who created/uploaded this document
+    created_by_name: str | None = None  # Display name or email of the user who created this document
 
     model_config = ConfigDict(from_attributes=True)
 
diff --git a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx
index b9ddb9b74..83a579970 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx
@@ -13,6 +13,7 @@ import {
 	llmPreferencesAtom,
 } from "@/atoms/new-llm-config/new-llm-config-query.atoms";
 import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
+import { ConnectorIndicator } from "@/components/assistant-ui/connector-popup";
 import { DocumentUploadDialogProvider } from "@/components/assistant-ui/document-upload-popup";
 import { DashboardBreadcrumb } from "@/components/dashboard-breadcrumb";
 import { LayoutDataProvider } from "@/components/layout";
@@ -192,6 +193,8 @@ export function DashboardClientLayout({
 			<LayoutDataProvider searchSpaceId={searchSpaceId} breadcrumb={<DashboardBreadcrumb />}>
 				{children}
 			</LayoutDataProvider>
+			{/* Global connector dialog - triggered from documents page */}
+			<ConnectorIndicator hideTrigger />
 		</DocumentUploadDialogProvider>
 	);
 }
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
index e483dea12..246cff1c0 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
@@ -2,6 +2,7 @@
 
 import type React from "react";
 import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
+import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 
 export function getDocumentTypeIcon(type: string): React.ReactNode {
 	return getConnectorIcon(type);
@@ -14,17 +15,35 @@ export function getDocumentTypeLabel(type: string): string {
 		.join(" ");
 }
 
+const MAX_LABEL_LENGTH = 28;
+
 export function DocumentTypeChip({ type, className }: { type: string; className?: string }) {
 	const icon = getDocumentTypeIcon(type);
-	return (
+	const fullLabel = getDocumentTypeLabel(type);
+	const truncatedLabel = fullLabel.length > MAX_LABEL_LENGTH 
+		? `${fullLabel.slice(0, MAX_LABEL_LENGTH)}...` 
+		: fullLabel;
+	const needsTruncation = fullLabel.length > MAX_LABEL_LENGTH;
+
+	const chip = (
 		<span
-			className={
-				"inline-flex items-center gap-1.5 rounded-full border border-border bg-primary/5 px-2 py-1 text-xs font-medium " +
-				(className ?? "")
-			}
+			className={`inline-flex items-center gap-1.5 rounded-md border border-border/50 bg-muted/30 px-2 py-0.5 text-xs font-medium text-muted-foreground ${className ?? ""}`}
 		>
-			<span className="text-primary">{icon}</span>
-			{getDocumentTypeLabel(type)}
+			<span className="opacity-70 flex-shrink-0">{icon}</span>
+			<span className="truncate">{truncatedLabel}</span>
 		</span>
 	);
+
+	if (needsTruncation) {
+		return (
+			<Tooltip>
+				<TooltipTrigger asChild>{chip}</TooltipTrigger>
+				<TooltipContent side="top" className="max-w-xs">
+					<p>{fullLabel}</p>
+				</TooltipContent>
+			</Tooltip>
+		);
+	}
+
+	return chip;
 }
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
index 67413d6f0..87d349e38 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
@@ -1,9 +1,20 @@
 "use client";
 
-import { CircleAlert, CircleX, Columns3, Filter, ListFilter, Trash } from "lucide-react";
-import { AnimatePresence, motion, type Variants } from "motion/react";
+import { useSetAtom } from "jotai";
+import {
+	CircleAlert,
+	CircleX,
+	Columns3,
+	FilePlus2,
+	FileType,
+	SlidersHorizontal,
+	Trash,
+} from "lucide-react";
+import { motion } from "motion/react";
 import { useTranslations } from "next-intl";
 import React, { useMemo, useRef } from "react";
+import { connectorDialogOpenAtom } from "@/atoms/connector-dialog/connector-dialog.atoms";
+import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
 import {
 	AlertDialog,
 	AlertDialogAction,
@@ -17,25 +28,13 @@ import {
 } from "@/components/ui/alert-dialog";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import {
-	DropdownMenu,
-	DropdownMenuCheckboxItem,
-	DropdownMenuContent,
-	DropdownMenuLabel,
-	DropdownMenuTrigger,
-} from "@/components/ui/dropdown-menu";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
 import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
 import type { DocumentTypeEnum } from "@/contracts/types/document.types";
+import { getDocumentTypeIcon, getDocumentTypeLabel } from "./DocumentTypeIcon";
 import type { ColumnVisibility } from "./types";
 
-const fadeInScale: Variants = {
-	hidden: { opacity: 0, scale: 0.95 },
-	visible: { opacity: 1, scale: 1, transition: { type: "spring", stiffness: 300, damping: 30 } },
-	exit: { opacity: 0, scale: 0.95, transition: { duration: 0.15 } },
-};
-
 export function DocumentsFilters({
 	typeCounts: typeCountsRecord,
 	selectedIds,
@@ -61,6 +60,10 @@ export function DocumentsFilters({
 	const id = React.useId();
 	const inputRef = useRef<HTMLInputElement>(null);
 
+	// Dialog hooks for action buttons
+	const { openDialog: openUploadDialog } = useDocumentUploadDialog();
+	const setConnectorDialogOpen = useSetAtom(connectorDialogOpenAtom);
+
 	const uniqueTypes = useMemo(() => {
 		return Object.keys(typeCountsRecord).sort() as DocumentTypeEnum[];
 	}, [typeCountsRecord]);
@@ -75,14 +78,41 @@ export function DocumentsFilters({
 
 	return (
 		<motion.div
-			className="flex flex-wrap items-center justify-start gap-3 w-full"
+			className="flex flex-col gap-4"
 			initial={{ opacity: 0, y: 10 }}
 			animate={{ opacity: 1, y: 0 }}
 			transition={{ type: "spring", stiffness: 300, damping: 30, delay: 0.1 }}
 		>
-			<div className="flex items-center gap-3 flex-wrap w-full sm:w-auto">
+			{/* Main toolbar row */}
+			<div className="flex flex-wrap items-center gap-3">
+				{/* Action Buttons - Left Side */}
+				<div className="flex items-center gap-2">
+					<Button
+						onClick={openUploadDialog}
+						variant="outline"
+						size="sm"
+						className="h-9 gap-2 bg-white text-gray-700 border-white hover:bg-gray-50 dark:bg-white dark:text-gray-800 dark:hover:bg-gray-100"
+					>
+						<FilePlus2 size={16} />
+						<span>Upload documents</span>
+					</Button>
+					<Button
+						onClick={() => setConnectorDialogOpen(true)}
+						variant="outline"
+						size="sm"
+						className="h-9 gap-2 bg-white text-gray-700 border-white hover:bg-gray-50 dark:bg-white dark:text-gray-800 dark:hover:bg-gray-100"
+					>
+						<SlidersHorizontal size={16} />
+						<span>Manage connectors</span>
+					</Button>
+				</div>
+
+				{/* Spacer */}
+				<div className="flex-1" />
+
+				{/* Search Input */}
 				<motion.div
-					className="relative w-full sm:w-auto"
+					className="relative w-[180px]"
 					initial={{ opacity: 0, y: -10 }}
 					animate={{ opacity: 1, y: 0 }}
 					transition={{ type: "spring", stiffness: 300, damping: 30 }}
@@ -90,183 +120,199 @@ export function DocumentsFilters({
 					<Input
 						id={`${id}-input`}
 						ref={inputRef}
-						className="peer w-full sm:min-w-60 ps-9"
+						className="peer h-9 w-full pl-3 pr-9 text-sm bg-background border-border/60 focus-visible:ring-1 focus-visible:ring-ring/30"
 						value={searchValue}
 						onChange={(e) => onSearch(e.target.value)}
-						placeholder={t("filter_placeholder")}
+						placeholder="Filter by title"
 						type="text"
 						aria-label={t("filter_placeholder")}
 					/>
-					<motion.div
-						className="pointer-events-none absolute inset-y-0 start-0 flex items-center justify-center ps-3 text-muted-foreground/80 peer-disabled:opacity-50"
-						initial={{ scale: 0.8 }}
-						animate={{ scale: 1 }}
-						transition={{ delay: 0.1 }}
-					>
-						<ListFilter size={16} strokeWidth={2} aria-hidden="true" />
-					</motion.div>
 					{Boolean(searchValue) && (
 						<motion.button
-							className="absolute inset-y-0 end-0 flex h-full w-9 items-center justify-center rounded-e-lg text-muted-foreground/80 outline-offset-2 transition-colors hover:text-foreground focus:z-10 focus-visible:outline focus-visible:outline-ring/70"
+							className="absolute inset-y-0 right-0 flex h-full w-9 items-center justify-center rounded-r-md text-muted-foreground/60 hover:text-foreground transition-colors"
 							aria-label="Clear filter"
 							onClick={() => {
 								onSearch("");
 								inputRef.current?.focus();
 							}}
-							initial={{ opacity: 0, rotate: -90 }}
-							animate={{ opacity: 1, rotate: 0 }}
-							exit={{ opacity: 0, rotate: 90 }}
+							initial={{ opacity: 0, scale: 0.8 }}
+							animate={{ opacity: 1, scale: 1 }}
+							exit={{ opacity: 0, scale: 0.8 }}
 							whileHover={{ scale: 1.1 }}
 							whileTap={{ scale: 0.9 }}
 						>
-							<CircleX size={16} strokeWidth={2} aria-hidden="true" />
+							<CircleX size={14} strokeWidth={2} aria-hidden="true" />
 						</motion.button>
 					)}
 				</motion.div>
 
-				<Popover>
-					<PopoverTrigger asChild>
-						<motion.div
-							whileHover={{ scale: 1.05 }}
-							whileTap={{ scale: 0.95 }}
-							transition={{ type: "spring", stiffness: 400, damping: 17 }}
-						>
-							<Button variant="outline">
-								<Filter
-									className="-ms-1 me-2 opacity-60"
-									size={16}
-									strokeWidth={2}
-									aria-hidden="true"
-								/>
-								Type
+				{/* Filter Buttons Group */}
+				<div className="flex items-center gap-2 flex-wrap">
+					{/* Type Filter */}
+					<Popover>
+						<PopoverTrigger asChild>
+							<Button
+								variant="outline"
+								size="sm"
+								className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border"
+							>
+								<FileType size={14} className="text-muted-foreground" />
+								<span className="hidden sm:inline">Type</span>
 								{activeTypes.length > 0 && (
-									<motion.span
-										initial={{ scale: 0.8 }}
-										animate={{ scale: 1 }}
-										className="-me-1 ms-3 inline-flex h-5 max-h-full items-center rounded border border-border bg-background px-1 text-[0.625rem] font-medium text-muted-foreground/70"
-									>
+									<span className="flex h-5 w-5 items-center justify-center rounded-full bg-primary text-[10px] font-medium text-primary-foreground">
 										{activeTypes.length}
-									</motion.span>
+									</span>
 								)}
 							</Button>
-						</motion.div>
-					</PopoverTrigger>
-					<PopoverContent className="min-w-36 p-3" align="start">
-						<motion.div initial="hidden" animate="visible" exit="exit" variants={fadeInScale}>
-							<div className="space-y-3">
-								<div className="text-xs font-medium text-muted-foreground">Filters</div>
-								<div className="space-y-3">
-									<AnimatePresence>
-										{uniqueTypes.map((value: DocumentTypeEnum, i) => (
-											<motion.div
-												key={value}
-												className="flex items-center gap-2"
-												initial={{ opacity: 0, y: -5 }}
-												animate={{ opacity: 1, y: 0 }}
-												exit={{ opacity: 0, y: 5 }}
-												transition={{ delay: i * 0.05 }}
+						</PopoverTrigger>
+						<PopoverContent className="w-64 !p-0 overflow-hidden" align="end">
+							<div className="px-2.5 pt-3">
+								<div className="mb-1.5 px-1 text-[11px] font-medium text-muted-foreground">
+									Filter by source
+								</div>
+								<div className="space-y-0.5 max-h-[300px] overflow-y-auto overflow-x-hidden">
+									{uniqueTypes.map((value: DocumentTypeEnum, i) => (
+										<button
+											key={value}
+											type="button"
+											className="flex w-full items-center gap-2 py-1 px-2.5 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
+											onClick={() => onToggleType(value, !activeTypes.includes(value))}
+										>
+											<Checkbox
+												id={`${id}-${i}`}
+												checked={activeTypes.includes(value)}
+												onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
+												className="h-3.5 w-3.5 flex-shrink-0 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
+											/>
+											<Label
+												htmlFor={`${id}-${i}`}
+												className="flex flex-1 items-center gap-2 font-normal text-xs cursor-pointer min-w-0"
 											>
-												<Checkbox
-													id={`${id}-${i}`}
-													checked={activeTypes.includes(value)}
-													onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
-												/>
-												<Label
-													htmlFor={`${id}-${i}`}
-													className="flex grow justify-between gap-2 font-normal"
-												>
-													{value}{" "}
-													<span className="ms-2 text-xs text-muted-foreground">
-														{typeCounts.get(value)}
-													</span>
-												</Label>
-											</motion.div>
-										))}
-									</AnimatePresence>
+												<span className="opacity-60 flex-shrink-0">{getDocumentTypeIcon(value)}</span>
+												<span className="truncate min-w-0">{getDocumentTypeLabel(value)}</span>
+												<span className="text-[10px] text-muted-foreground/70 tabular-nums flex-shrink-0 ml-auto">
+													{typeCounts.get(value)}
+												</span>
+											</Label>
+										</button>
+									))}
+								</div>
+								{activeTypes.length > 0 && (
+									<div className="mt-1 pt-1 pb-1 border-t border-border/50 pb-1">
+										<Button
+											variant="ghost"
+											size="sm"
+											className="w-full h-6 text-[11px]"
+											onClick={() => {
+												activeTypes.forEach((t) => {
+													onToggleType(t, false);
+												});
+											}}
+										>
+											Clear filters
+										</Button>
+									</div>
+								)}
+							</div>
+						</PopoverContent>
+					</Popover>
+
+					{/* View/Columns Popover */}
+					<Popover>
+						<PopoverTrigger asChild>
+							<Button
+								variant="outline"
+								size="sm"
+								className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border"
+							>
+								<Columns3 size={14} className="text-muted-foreground" />
+								<span className="hidden sm:inline">View</span>
+							</Button>
+						</PopoverTrigger>
+						<PopoverContent className="w-36 !p-0 overflow-hidden" align="end">
+							<div className="px-2.5 pt-3 pb-2">
+								<div className="mb-1.5 px-1 text-[11px] font-medium text-muted-foreground">
+									Toggle columns
+								</div>
+								<div className="space-y-0.5">
+									{(
+										[
+											["document_type", "Source"],
+											["created_by", "User"],
+											["created_at", "Created"],
+										] as Array<[keyof ColumnVisibility, string]>
+									).map(([key, label], i) => (
+										<button
+											key={key}
+											type="button"
+											className="flex w-full items-center gap-2 py-1 px-2.5 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
+											onClick={() => onToggleColumn(key, !columnVisibility[key])}
+										>
+											<Checkbox
+												id={`${id}-col-${i}`}
+												checked={columnVisibility[key]}
+												onCheckedChange={(checked: boolean) => onToggleColumn(key, !!checked)}
+												className="h-3.5 w-3.5 flex-shrink-0 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
+											/>
+											<Label
+												htmlFor={`${id}-col-${i}`}
+												className="flex flex-1 items-center gap-2 font-normal text-xs cursor-pointer min-w-0"
+											>
+												<span className="truncate min-w-0">{label}</span>
+											</Label>
+										</button>
+									))}
 								</div>
 							</div>
-						</motion.div>
-					</PopoverContent>
-				</Popover>
+						</PopoverContent>
+					</Popover>
+				</div>
 
-				<DropdownMenu>
-					<DropdownMenuTrigger asChild>
-						<motion.div
-							whileHover={{ scale: 1.05 }}
-							whileTap={{ scale: 0.95 }}
-							transition={{ type: "spring", stiffness: 400, damping: 17 }}
-						>
-							<Button variant="outline">
-								<Columns3
-									className="-ms-1 me-2 opacity-60"
-									size={16}
-									strokeWidth={2}
-									aria-hidden="true"
-								/>
-								View
-							</Button>
-						</motion.div>
-					</DropdownMenuTrigger>
-					<DropdownMenuContent align="end">
-						<DropdownMenuLabel>Toggle columns</DropdownMenuLabel>
-						{(
-							[
-								["title", "Title"],
-								["document_type", "Type"],
-								["content", "Content"],
-								["created_at", "Created At"],
-							] as Array<[keyof ColumnVisibility, string]>
-						).map(([key, label]) => (
-							<DropdownMenuCheckboxItem
-								key={key}
-								className="capitalize"
-								checked={columnVisibility[key]}
-								onCheckedChange={(v) => onToggleColumn(key, !!v)}
-								onSelect={(e) => e.preventDefault()}
-							>
-								{label}
-							</DropdownMenuCheckboxItem>
-						))}
-					</DropdownMenuContent>
-				</DropdownMenu>
-			</div>
-
-			<div className="flex items-center gap-3 w-full sm:w-auto sm:ml-auto">
+				{/* Bulk Delete Button */}
 				{selectedIds.size > 0 && (
 					<AlertDialog>
 						<AlertDialogTrigger asChild>
-							<Button className="w-full sm:w-auto" variant="outline">
-								<Trash
-									className="-ms-1 me-2 opacity-60"
-									size={16}
-									strokeWidth={2}
-									aria-hidden="true"
-								/>
-								Delete
-								<span className="-me-1 ms-3 inline-flex h-5 max-h-full items-center rounded border border-border bg-background px-1 text-[0.625rem] font-medium text-muted-foreground/70">
-									{selectedIds.size}
-								</span>
-							</Button>
+							<motion.div
+								initial={{ opacity: 0, scale: 0.9 }}
+								animate={{ opacity: 1, scale: 1 }}
+								exit={{ opacity: 0, scale: 0.9 }}
+							>
+								<Button
+									variant="destructive"
+									size="sm"
+									className="h-9 gap-2"
+								>
+									<Trash size={14} />
+									Delete
+									<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
+										{selectedIds.size}
+									</span>
+								</Button>
+							</motion.div>
 						</AlertDialogTrigger>
-						<AlertDialogContent>
-							<div className="flex flex-col gap-2 max-sm:items-center sm:flex-row sm:gap-4">
+						<AlertDialogContent className="max-w-md">
+							<div className="flex flex-col gap-2 sm:flex-row sm:gap-4">
 								<div
-									className="flex size-9 shrink-0 items-center justify-center rounded-full border border-border"
+									className="flex size-10 shrink-0 items-center justify-center rounded-full bg-destructive/10 text-destructive"
 									aria-hidden="true"
 								>
-									<CircleAlert className="opacity-80" size={16} strokeWidth={2} />
+									<CircleAlert size={18} strokeWidth={2} />
 								</div>
-								<AlertDialogHeader>
-									<AlertDialogTitle>Are you absolutely sure?</AlertDialogTitle>
+								<AlertDialogHeader className="flex-1">
+									<AlertDialogTitle>Delete {selectedIds.size} document{selectedIds.size !== 1 ? "s" : ""}?</AlertDialogTitle>
 									<AlertDialogDescription>
-										This action cannot be undone. This will permanently delete {selectedIds.size}{" "}
-										selected {selectedIds.size === 1 ? "row" : "rows"}.
+										This action cannot be undone. This will permanently delete the selected {selectedIds.size === 1 ? "document" : "documents"} from your search space.
 									</AlertDialogDescription>
 								</AlertDialogHeader>
 							</div>
 							<AlertDialogFooter>
 								<AlertDialogCancel>Cancel</AlertDialogCancel>
-								<AlertDialogAction onClick={onBulkDelete}>Delete</AlertDialogAction>
+								<AlertDialogAction
+									onClick={onBulkDelete}
+									className="bg-destructive text-destructive-foreground hover:bg-destructive/90"
+								>
+									Delete
+								</AlertDialogAction>
 							</AlertDialogFooter>
 						</AlertDialogContent>
 					</AlertDialog>
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index d9908f46c..faa7605a3 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -4,9 +4,10 @@ import { ChevronDown, ChevronUp, FileX, Plus } from "lucide-react";
 import { motion } from "motion/react";
 import { useParams } from "next/navigation";
 import { useTranslations } from "next-intl";
-import React from "react";
+import React, { useState } from "react";
 import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
 import { DocumentViewer } from "@/components/document-viewer";
+import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
 import { Spinner } from "@/components/ui/spinner";
@@ -19,7 +20,7 @@ import {
 	TableRow,
 } from "@/components/ui/table";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
-import { DocumentTypeChip, getDocumentTypeIcon } from "./DocumentTypeIcon";
+import { DocumentTypeChip } from "./DocumentTypeIcon";
 import { RowActions } from "./RowActions";
 import type { ColumnVisibility, Document } from "./types";
 
@@ -36,13 +37,45 @@ function sortDocuments(docs: Document[], key: SortKey, desc: boolean): Document[
 	return desc ? sorted.reverse() : sorted;
 }
 
-function truncate(text: string, len = 150): string {
-	const plain = text
-		.replace(/[#*_`>\-[\]()]+/g, " ")
-		.replace(/\s+/g, " ")
-		.trim();
-	if (plain.length <= len) return plain;
-	return `${plain.slice(0, len)}...`;
+function formatDate(dateStr: string): string {
+	const date = new Date(dateStr);
+	return date.toLocaleDateString("en-US", {
+		year: "numeric",
+		month: "long",
+		day: "numeric",
+	});
+}
+
+function SortableHeader({
+	children,
+	sortKey,
+	currentSortKey,
+	sortDesc,
+	onSort,
+}: {
+	children: React.ReactNode;
+	sortKey: SortKey;
+	currentSortKey: SortKey;
+	sortDesc: boolean;
+	onSort: (key: SortKey) => void;
+}) {
+	const isActive = currentSortKey === sortKey;
+	return (
+		<button
+			type="button"
+			onClick={() => onSort(sortKey)}
+			className="flex items-center gap-1.5 text-left font-medium text-muted-foreground hover:text-foreground transition-colors group"
+		>
+			{children}
+			<span className={`transition-opacity ${isActive ? "opacity-100" : "opacity-0 group-hover:opacity-50"}`}>
+				{isActive && sortDesc ? (
+					<ChevronDown size={14} />
+				) : (
+					<ChevronUp size={14} />
+				)}
+			</span>
+		</button>
+	);
 }
 
 export function DocumentsTableShell({
@@ -75,6 +108,9 @@ export function DocumentsTableShell({
 	const searchSpaceId = params.search_space_id;
 	const { openDialog } = useDocumentUploadDialog();
 
+	// State for metadata viewer (opened via Ctrl/Cmd+Click)
+	const [metadataDoc, setMetadataDoc] = useState<Document | null>(null);
+
 	const sorted = React.useMemo(
 		() => sortDocuments(documents, sortKey, sortDesc),
 		[documents, sortKey, sortDesc]
@@ -107,23 +143,23 @@ export function DocumentsTableShell({
 
 	return (
 		<motion.div
-			className="rounded-md border mt-6 overflow-hidden"
+			className="rounded-xl border border-border/50 bg-card/30 backdrop-blur-sm overflow-hidden shadow-sm"
 			initial={{ opacity: 0, y: 20 }}
 			animate={{ opacity: 1, y: 0 }}
 			transition={{ type: "spring", stiffness: 300, damping: 30, delay: 0.2 }}
 		>
 			{loading ? (
 				<div className="flex h-[400px] w-full items-center justify-center">
-					<div className="flex flex-col items-center gap-2">
+					<div className="flex flex-col items-center gap-3">
 						<Spinner size="lg" className="text-primary" />
 						<p className="text-sm text-muted-foreground">{t("loading")}</p>
 					</div>
 				</div>
 			) : error ? (
 				<div className="flex h-[400px] w-full items-center justify-center">
-					<div className="flex flex-col items-center gap-2">
+					<div className="flex flex-col items-center gap-3">
 						<p className="text-sm text-destructive">{t("error_loading")}</p>
-						<Button variant="outline" size="sm" onClick={() => onRefresh()} className="mt-2">
+						<Button variant="outline" size="sm" onClick={() => onRefresh()}>
 							{t("retry")}
 						</Button>
 					</div>
@@ -136,10 +172,10 @@ export function DocumentsTableShell({
 						transition={{ duration: 0.4 }}
 						className="flex flex-col items-center gap-4 max-w-md px-4 text-center"
 					>
-						<div className="rounded-full bg-muted p-4">
-							<FileX className="h-8 w-8 text-muted-foreground" />
+						<div className="rounded-full bg-muted/50 p-4">
+							<FileX className="h-8 w-8 text-muted-foreground/60" />
 						</div>
-						<div className="space-y-2">
+						<div className="space-y-1.5">
 							<h3 className="text-lg font-semibold">{t("no_documents")}</h3>
 							<p className="text-sm text-muted-foreground">
 								Get started by uploading your first document.
@@ -153,218 +189,232 @@ export function DocumentsTableShell({
 				</div>
 			) : (
 				<>
-					<div className="hidden md:block max-h-[60vh] overflow-auto">
-						<Table className="table-fixed w-full">
-							<TableHeader className="sticky top-0 bg-background">
-								<TableRow className="hover:bg-transparent">
-									<TableHead style={{ width: 28 }}>
+					{/* Desktop Table View */}
+					<div className="hidden md:flex md:flex-col">
+						{/* Fixed Header */}
+						<Table>
+							<TableHeader>
+								<TableRow className="bg-muted/30 hover:bg-muted/30 border-b border-border/50">
+									<TableHead className="w-[40px] pl-4">
 										<Checkbox
 											checked={allSelectedOnPage || (someSelectedOnPage && "indeterminate")}
 											onCheckedChange={(v) => toggleAll(!!v)}
 											aria-label="Select all"
+											className="data-[state=checked]:bg-primary data-[state=checked]:border-primary"
 										/>
 									</TableHead>
-									{columnVisibility.title && (
-										<TableHead style={{ width: 250 }}>
-											<Button
-												variant="ghost"
-												className="flex h-full w-full cursor-pointer select-none items-center justify-between gap-2"
-												onClick={() => onSortHeader("title")}
-											>
-												{t("title")}
-												{sortKey === "title" ? (
-													sortDesc ? (
-														<ChevronDown className="shrink-0 opacity-60" size={16} />
-													) : (
-														<ChevronUp className="shrink-0 opacity-60" size={16} />
-													)
-												) : null}
-											</Button>
-										</TableHead>
-									)}
+									<TableHead className="min-w-[200px]">
+										<SortableHeader
+											sortKey="title"
+											currentSortKey={sortKey}
+											sortDesc={sortDesc}
+											onSort={onSortHeader}
+										>
+											Document
+										</SortableHeader>
+									</TableHead>
 									{columnVisibility.document_type && (
-										<TableHead style={{ width: 180 }}>
-											<Button
-												variant="ghost"
-												className="flex h-full w-full cursor-pointer select-none items-center justify-between gap-2"
-												onClick={() => onSortHeader("document_type")}
+										<TableHead className="w-[160px]">
+											<SortableHeader
+												sortKey="document_type"
+												currentSortKey={sortKey}
+												sortDesc={sortDesc}
+												onSort={onSortHeader}
 											>
-												{t("type")}
-												{sortKey === "document_type" ? (
-													sortDesc ? (
-														<ChevronDown className="shrink-0 opacity-60" size={16} />
-													) : (
-														<ChevronUp className="shrink-0 opacity-60" size={16} />
-													)
-												) : null}
-											</Button>
+												Source
+											</SortableHeader>
 										</TableHead>
 									)}
-									{columnVisibility.content && (
-										<TableHead style={{ width: 300 }}>{t("content_summary")}</TableHead>
+									{columnVisibility.created_by && (
+										<TableHead className="w-[150px]">
+											<span className="text-muted-foreground font-medium">User</span>
+										</TableHead>
 									)}
 									{columnVisibility.created_at && (
-										<TableHead style={{ width: 120 }}>
-											<Button
-												variant="ghost"
-												className="flex h-full w-full cursor-pointer select-none items-center justify-between gap-2"
-												onClick={() => onSortHeader("created_at")}
+										<TableHead className="w-[150px]">
+											<SortableHeader
+												sortKey="created_at"
+												currentSortKey={sortKey}
+												sortDesc={sortDesc}
+												onSort={onSortHeader}
 											>
-												Created At
-												{sortKey === "created_at" ? (
-													sortDesc ? (
-														<ChevronDown className="shrink-0 opacity-60" size={16} />
-													) : (
-														<ChevronUp className="shrink-0 opacity-60" size={16} />
-													)
-												) : null}
-											</Button>
+												Created
+											</SortableHeader>
 										</TableHead>
 									)}
-									<TableHead style={{ width: 60 }}>
+									<TableHead className="w-[80px] pr-4">
 										<span className="sr-only">Actions</span>
 									</TableHead>
 								</TableRow>
 							</TableHeader>
-							<TableBody>
-								{sorted.map((doc, index) => {
-									const icon = getDocumentTypeIcon(doc.document_type);
-									const title = doc.title;
-									const truncatedTitle = title.length > 30 ? `${title.slice(0, 30)}...` : title;
-									return (
-										<motion.tr
-											key={doc.id}
-											initial={{ opacity: 0, y: 10 }}
-											animate={{
-												opacity: 1,
-												y: 0,
-												transition: {
-													type: "spring",
-													stiffness: 300,
-													damping: 30,
-													delay: index * 0.03,
-												},
-											}}
-											exit={{ opacity: 0, y: -10 }}
-											className="border-b transition-colors hover:bg-muted/50"
-										>
-											<TableCell className="px-4 py-3">
-												<Checkbox
-													checked={selectedIds.has(doc.id)}
-													onCheckedChange={(v) => toggleOne(doc.id, !!v)}
-													aria-label="Select row"
-												/>
-											</TableCell>
-											{columnVisibility.title && (
-												<TableCell className="px-4 py-3">
-													<motion.div
-														className="flex items-center gap-2 font-medium"
-														whileHover={{ scale: 1.02 }}
-														transition={{ type: "spring", stiffness: 300 }}
-														style={{ display: "flex" }}
-													>
-														<Tooltip>
-															<TooltipTrigger asChild>
-																<span className="flex items-center gap-2">
-																	<span className="text-muted-foreground shrink-0">{icon}</span>
-																	<span>{truncatedTitle}</span>
-																</span>
-															</TooltipTrigger>
-															<TooltipContent>
-																<p>{title}</p>
-															</TooltipContent>
-														</Tooltip>
-													</motion.div>
-												</TableCell>
-											)}
-											{columnVisibility.document_type && (
-												<TableCell className="px-4 py-3">
-													<div className="flex items-center gap-2">
-														<DocumentTypeChip type={doc.document_type} />
-													</div>
-												</TableCell>
-											)}
-											{columnVisibility.content && (
-												<TableCell className="px-4 py-3">
-													<div className="flex flex-col gap-2">
-														<div className="max-w-[300px] max-h-[60px] overflow-hidden text-sm text-muted-foreground">
-															{truncate(doc.content)}
-														</div>
-														<DocumentViewer
-															title={doc.title}
-															content={doc.content}
-															trigger={
-																<Button variant="ghost" size="sm" className="w-fit text-xs">
-																	{t("view_full")}
-																</Button>
-															}
-														/>
-													</div>
-												</TableCell>
-											)}
-											{columnVisibility.created_at && (
-												<TableCell className="px-4 py-3">
-													{new Date(doc.created_at).toLocaleDateString()}
-												</TableCell>
-											)}
-											<TableCell className="px-4 py-3">
-												<RowActions
-													document={doc}
-													deleteDocument={deleteDocument}
-													refreshDocuments={async () => {
-														await onRefresh();
-													}}
-													searchSpaceId={searchSpaceId as string}
-												/>
-											</TableCell>
-										</motion.tr>
-									);
-								})}
-							</TableBody>
 						</Table>
+						{/* Scrollable Body */}
+						<div className="max-h-[55vh] overflow-auto">
+							<Table>
+								<TableBody>
+									{sorted.map((doc, index) => {
+										const title = doc.title;
+										const truncatedTitle = title.length > 50 ? `${title.slice(0, 50)}...` : title;
+										const isSelected = selectedIds.has(doc.id);
+										return (
+											<motion.tr
+												key={doc.id}
+												initial={{ opacity: 0 }}
+												animate={{
+													opacity: 1,
+													transition: {
+														duration: 0.2,
+														delay: index * 0.02,
+													},
+												}}
+												className={`border-b border-border/30 transition-colors ${
+													isSelected
+														? "bg-primary/5 hover:bg-primary/10"
+														: "hover:bg-muted/40"
+												}`}
+											>
+												<TableCell className="w-[40px] pl-4 py-3">
+													<Checkbox
+														checked={isSelected}
+														onCheckedChange={(v) => toggleOne(doc.id, !!v)}
+														aria-label="Select row"
+														className="data-[state=checked]:bg-primary data-[state=checked]:border-primary"
+													/>
+												</TableCell>
+												<TableCell className="min-w-[200px] py-3">
+													<DocumentViewer
+														title={doc.title}
+														content={doc.content}
+														trigger={
+															<button
+																type="button"
+																className="text-left font-medium text-foreground/90 hover:text-primary transition-colors cursor-pointer bg-transparent border-0 p-0"
+																onClick={(e) => {
+																	// Ctrl (Win/Linux) or Cmd (Mac) + Click opens metadata
+																	if (e.ctrlKey || e.metaKey) {
+																		e.preventDefault();
+																		e.stopPropagation();
+																		setMetadataDoc(doc);
+																	}
+																}}
+																onKeyDown={(e) => {
+																	// Ctrl/Cmd + Enter opens metadata
+																	if ((e.ctrlKey || e.metaKey) && e.key === "Enter") {
+																		e.preventDefault();
+																		setMetadataDoc(doc);
+																	}
+																}}
+															>
+																{title.length > 50 ? (
+																	<Tooltip>
+																		<TooltipTrigger asChild>
+																			<span>{truncatedTitle}</span>
+																		</TooltipTrigger>
+																		<TooltipContent side="top" className="max-w-xs">
+																			<p className="break-words">{title}</p>
+																		</TooltipContent>
+																	</Tooltip>
+																) : (
+																	title
+																)}
+															</button>
+														}
+													/>
+												</TableCell>
+												{columnVisibility.document_type && (
+													<TableCell className="w-[160px] py-3">
+														<DocumentTypeChip type={doc.document_type} />
+													</TableCell>
+												)}
+												{columnVisibility.created_by && (
+													<TableCell className="w-[150px] py-3 text-sm text-muted-foreground truncate">
+														{doc.created_by_name || "—"}
+													</TableCell>
+												)}
+												{columnVisibility.created_at && (
+													<TableCell className="w-[150px] py-3 text-sm text-muted-foreground">
+														{formatDate(doc.created_at)}
+													</TableCell>
+												)}
+												<TableCell className="w-[80px] pr-4 py-3">
+													<RowActions
+														document={doc}
+														deleteDocument={deleteDocument}
+														refreshDocuments={async () => {
+															await onRefresh();
+														}}
+														searchSpaceId={searchSpaceId as string}
+													/>
+												</TableCell>
+											</motion.tr>
+										);
+									})}
+								</TableBody>
+							</Table>
+						</div>
 					</div>
-					<div className="md:hidden divide-y">
-						{sorted.map((doc) => {
-							const icon = getDocumentTypeIcon(doc.document_type);
+
+					{/* Mobile Card View */}
+					<div className="md:hidden divide-y divide-border/30">
+						{sorted.map((doc, index) => {
+							const isSelected = selectedIds.has(doc.id);
 							return (
-								<div key={doc.id} className="p-3">
-									<div className="flex items-center gap-3">
+								<motion.div
+									key={doc.id}
+									initial={{ opacity: 0 }}
+									animate={{ opacity: 1, transition: { delay: index * 0.03 } }}
+									className={`p-4 transition-colors ${
+										isSelected ? "bg-primary/5" : "hover:bg-muted/30"
+									}`}
+								>
+									<div className="flex items-start gap-3">
 										<Checkbox
-											checked={selectedIds.has(doc.id)}
+											checked={isSelected}
 											onCheckedChange={(v) => toggleOne(doc.id, !!v)}
 											aria-label="Select row"
+											className="mt-0.5 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
 										/>
-										<div className="flex-1 min-w-0">
-											<div className="flex items-center gap-2 min-w-0">
-												<span className="text-muted-foreground shrink-0">{icon}</span>
-												<div className="font-medium truncate">{doc.title}</div>
-											</div>
-											<div className="mt-1 flex flex-wrap items-center gap-2">
-												<DocumentTypeChip type={doc.document_type} />
-												<span className="text-xs text-muted-foreground">
-													{new Date(doc.created_at).toLocaleDateString()}
-												</span>
-											</div>
-											{columnVisibility.content && (
-												<div className="mt-2 text-sm text-muted-foreground">
-													{truncate(doc.content)}
-													<div className="mt-1">
-														<DocumentViewer
-															title={doc.title}
-															content={doc.content}
-															trigger={
-																<Button
-																	variant="ghost"
-																	size="sm"
-																	className="w-fit text-xs p-0 h-auto"
-																>
-																	{t("view_full")}
-																</Button>
+										<div className="flex-1 min-w-0 space-y-2">
+											<DocumentViewer
+												title={doc.title}
+												content={doc.content}
+												trigger={
+													<button
+														type="button"
+														className="text-left font-medium text-sm text-foreground/90 hover:text-primary transition-colors cursor-pointer truncate block w-full bg-transparent border-0 p-0"
+														onClick={(e) => {
+															// Ctrl (Win/Linux) or Cmd (Mac) + Click opens metadata
+															if (e.ctrlKey || e.metaKey) {
+																e.preventDefault();
+																e.stopPropagation();
+																setMetadataDoc(doc);
 															}
-														/>
-													</div>
-												</div>
-											)}
+														}}
+														onKeyDown={(e) => {
+															// Ctrl/Cmd + Enter opens metadata
+															if ((e.ctrlKey || e.metaKey) && e.key === "Enter") {
+																e.preventDefault();
+																setMetadataDoc(doc);
+															}
+														}}
+													>
+														{doc.title}
+													</button>
+												}
+											/>
+											<div className="flex flex-wrap items-center gap-2">
+												<DocumentTypeChip type={doc.document_type} />
+												{columnVisibility.created_by && doc.created_by_name && (
+													<span className="text-xs text-muted-foreground">
+														{doc.created_by_name}
+													</span>
+												)}
+												{columnVisibility.created_at && (
+													<span className="text-xs text-muted-foreground">
+														{formatDate(doc.created_at)}
+													</span>
+												)}
+											</div>
 										</div>
 										<RowActions
 											document={doc}
@@ -375,12 +425,22 @@ export function DocumentsTableShell({
 											searchSpaceId={searchSpaceId as string}
 										/>
 									</div>
-								</div>
+								</motion.div>
 							);
 						})}
 					</div>
 				</>
 			)}
+
+			{/* Metadata Viewer - opened via Ctrl/Cmd+Click on document title */}
+			<JsonMetadataViewer
+				title={metadataDoc?.title ?? ""}
+				metadata={metadataDoc?.document_metadata}
+				open={!!metadataDoc}
+				onOpenChange={(open) => {
+					if (!open) setMetadataDoc(null);
+				}}
+			/>
 		</motion.div>
 	);
 }
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/PaginationControls.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/PaginationControls.tsx
index d87fa2dc9..bd8a9f1cc 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/PaginationControls.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/PaginationControls.tsx
@@ -2,164 +2,89 @@
 
 import { ChevronFirst, ChevronLast, ChevronLeft, ChevronRight } from "lucide-react";
 import { motion } from "motion/react";
-import { useTranslations } from "next-intl";
 import { Button } from "@/components/ui/button";
-import { Label } from "@/components/ui/label";
-import { Pagination, PaginationContent, PaginationItem } from "@/components/ui/pagination";
-import {
-	Select,
-	SelectContent,
-	SelectItem,
-	SelectTrigger,
-	SelectValue,
-} from "@/components/ui/select";
+
+const PAGE_SIZE = 50;
 
 export function PaginationControls({
 	pageIndex,
-	pageSize,
 	total,
-	onPageSizeChange,
 	onFirst,
 	onPrev,
 	onNext,
 	onLast,
 	canPrev,
 	canNext,
-	id,
 }: {
 	pageIndex: number;
-	pageSize: number;
 	total: number;
-	onPageSizeChange: (size: number) => void;
 	onFirst: () => void;
 	onPrev: () => void;
 	onNext: () => void;
 	onLast: () => void;
 	canPrev: boolean;
 	canNext: boolean;
-	id: string;
 }) {
-	const t = useTranslations("documents");
-	const start = total === 0 ? 0 : pageIndex * pageSize + 1;
-	const end = Math.min((pageIndex + 1) * pageSize, total);
+	const start = pageIndex * PAGE_SIZE + 1;
+	const end = Math.min((pageIndex + 1) * PAGE_SIZE, total);
 
 	return (
-		<div className="flex items-center justify-between gap-8 mt-6">
-			<motion.div
-				className="flex items-center gap-3"
-				initial={{ opacity: 0, x: -20 }}
-				animate={{ opacity: 1, x: 0 }}
-				transition={{ type: "spring", stiffness: 300, damping: 30 }}
-			>
-				<Label htmlFor={id} className="max-sm:sr-only">
-					{t("rows_per_page")}
-				</Label>
-				<Select value={String(pageSize)} onValueChange={(v) => onPageSizeChange(Number(v))}>
-					<SelectTrigger id={id} className="w-fit whitespace-nowrap">
-						<SelectValue placeholder="Select number of results" />
-					</SelectTrigger>
-					<SelectContent>
-						{[5, 10, 25, 50].map((s) => (
-							<SelectItem key={s} value={String(s)}>
-								{s}
-							</SelectItem>
-						))}
-					</SelectContent>
-				</Select>
-			</motion.div>
+		<motion.div
+			className="flex items-center justify-end gap-3 py-3 px-2"
+			initial={{ opacity: 0, y: 10 }}
+			animate={{ opacity: 1, y: 0 }}
+			transition={{ type: "spring", stiffness: 300, damping: 30, delay: 0.3 }}
+		>
+			{/* Range indicator */}
+			<span className="text-sm text-muted-foreground tabular-nums">
+				{start}-{end} of {total}
+			</span>
 
-			<motion.div
-				className="flex grow justify-end whitespace-nowrap text-sm text-muted-foreground"
-				initial={{ opacity: 0 }}
-				animate={{ opacity: 1 }}
-				transition={{ delay: 0.2 }}
-			>
-				<p className="whitespace-nowrap text-sm text-muted-foreground" aria-live="polite">
-					<span className="text-foreground">
-						{start}-{end}
-					</span>{" "}
-					of <span className="text-foreground">{total}</span>
-				</p>
-			</motion.div>
-
-			<div>
-				<Pagination>
-					<PaginationContent>
-						<PaginationItem>
-							<motion.div
-								whileHover={{ scale: 1.05 }}
-								whileTap={{ scale: 0.95 }}
-								transition={{ type: "spring", stiffness: 400, damping: 17 }}
-							>
-								<Button
-									size="icon"
-									variant="outline"
-									className="disabled:pointer-events-none disabled:opacity-50"
-									onClick={onFirst}
-									disabled={!canPrev}
-									aria-label="Go to first page"
-								>
-									<ChevronFirst size={16} strokeWidth={2} aria-hidden="true" />
-								</Button>
-							</motion.div>
-						</PaginationItem>
-						<PaginationItem>
-							<motion.div
-								whileHover={{ scale: 1.05 }}
-								whileTap={{ scale: 0.95 }}
-								transition={{ type: "spring", stiffness: 400, damping: 17 }}
-							>
-								<Button
-									size="icon"
-									variant="outline"
-									className="disabled:pointer-events-none disabled:opacity-50"
-									onClick={onPrev}
-									disabled={!canPrev}
-									aria-label="Go to previous page"
-								>
-									<ChevronLeft size={16} strokeWidth={2} aria-hidden="true" />
-								</Button>
-							</motion.div>
-						</PaginationItem>
-						<PaginationItem>
-							<motion.div
-								whileHover={{ scale: 1.05 }}
-								whileTap={{ scale: 0.95 }}
-								transition={{ type: "spring", stiffness: 400, damping: 17 }}
-							>
-								<Button
-									size="icon"
-									variant="outline"
-									className="disabled:pointer-events-none disabled:opacity-50"
-									onClick={onNext}
-									disabled={!canNext}
-									aria-label="Go to next page"
-								>
-									<ChevronRight size={16} strokeWidth={2} aria-hidden="true" />
-								</Button>
-							</motion.div>
-						</PaginationItem>
-						<PaginationItem>
-							<motion.div
-								whileHover={{ scale: 1.05 }}
-								whileTap={{ scale: 0.95 }}
-								transition={{ type: "spring", stiffness: 400, damping: 17 }}
-							>
-								<Button
-									size="icon"
-									variant="outline"
-									className="disabled:pointer-events-none disabled:opacity-50"
-									onClick={onLast}
-									disabled={!canNext}
-									aria-label="Go to last page"
-								>
-									<ChevronLast size={16} strokeWidth={2} aria-hidden="true" />
-								</Button>
-							</motion.div>
-						</PaginationItem>
-					</PaginationContent>
-				</Pagination>
+			{/* Navigation buttons */}
+			<div className="flex items-center gap-1">
+				<Button
+					variant="ghost"
+					size="icon"
+					className="h-8 w-8 disabled:opacity-40"
+					onClick={onFirst}
+					disabled={!canPrev}
+					aria-label="Go to first page"
+				>
+					<ChevronFirst size={18} strokeWidth={2} />
+				</Button>
+				<Button
+					variant="ghost"
+					size="icon"
+					className="h-8 w-8 disabled:opacity-40"
+					onClick={onPrev}
+					disabled={!canPrev}
+					aria-label="Go to previous page"
+				>
+					<ChevronLeft size={18} strokeWidth={2} />
+				</Button>
+				<Button
+					variant="ghost"
+					size="icon"
+					className="h-8 w-8 disabled:opacity-40"
+					onClick={onNext}
+					disabled={!canNext}
+					aria-label="Go to next page"
+				>
+					<ChevronRight size={18} strokeWidth={2} />
+				</Button>
+				<Button
+					variant="ghost"
+					size="icon"
+					className="h-8 w-8 disabled:opacity-40"
+					onClick={onLast}
+					disabled={!canNext}
+					aria-label="Go to last page"
+				>
+					<ChevronLast size={18} strokeWidth={2} />
+				</Button>
 			</div>
-		</div>
+		</motion.div>
 	);
 }
+
+export { PAGE_SIZE };
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
index d9a894e5a..cc6ed3fe8 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
@@ -1,11 +1,10 @@
 "use client";
 
-import { FileText, MoreHorizontal, Pencil, Trash2 } from "lucide-react";
+import { MoreHorizontal, Pencil, Trash2 } from "lucide-react";
 import { motion } from "motion/react";
 import { useRouter } from "next/navigation";
 import { useState } from "react";
 import { toast } from "sonner";
-import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
 import {
 	AlertDialog,
 	AlertDialogAction,
@@ -43,7 +42,6 @@ export function RowActions({
 	searchSpaceId: string;
 }) {
 	const [isDeleteOpen, setIsDeleteOpen] = useState(false);
-	const [isMetadataOpen, setIsMetadataOpen] = useState(false);
 	const [isDeleting, setIsDeleting] = useState(false);
 	const router = useRouter();
 
@@ -104,29 +102,6 @@ export function RowActions({
 					</Tooltip>
 				)}
 
-				<Tooltip>
-					<TooltipTrigger asChild>
-						<motion.div
-							whileHover={{ scale: 1.1 }}
-							whileTap={{ scale: 0.95 }}
-							transition={{ type: "spring", stiffness: 400, damping: 17 }}
-						>
-							<Button
-								variant="ghost"
-								size="icon"
-								className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80"
-								onClick={() => setIsMetadataOpen(true)}
-							>
-								<FileText className="h-4 w-4" />
-								<span className="sr-only">View Metadata</span>
-							</Button>
-						</motion.div>
-					</TooltipTrigger>
-					<TooltipContent side="top">
-						<p>View Metadata</p>
-					</TooltipContent>
-				</Tooltip>
-
 				{isDeletable && (
 					<Tooltip>
 						<TooltipTrigger asChild>
@@ -170,10 +145,6 @@ export function RowActions({
 								<span>Edit</span>
 							</DropdownMenuItem>
 						)}
-						<DropdownMenuItem onClick={() => setIsMetadataOpen(true)}>
-							<FileText className="mr-2 h-4 w-4" />
-							<span>Metadata</span>
-						</DropdownMenuItem>
 						{isDeletable && (
 							<DropdownMenuItem
 								onClick={() => setIsDeleteOpen(true)}
@@ -187,13 +158,6 @@ export function RowActions({
 				</DropdownMenu>
 			</div>
 
-			<JsonMetadataViewer
-				title={document.title}
-				metadata={document.document_metadata}
-				open={isMetadataOpen}
-				onOpenChange={setIsMetadataOpen}
-			/>
-
 			<AlertDialog open={isDeleteOpen} onOpenChange={setIsDeleteOpen}>
 				<AlertDialogContent>
 					<AlertDialogHeader>
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts
index 73b68b588..b52054dcd 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts
@@ -8,11 +8,12 @@ export type Document = {
 	content: string;
 	created_at: string;
 	search_space_id: number;
+	created_by_id?: string | null;
+	created_by_name?: string | null;
 };
 
 export type ColumnVisibility = {
-	title: boolean;
 	document_type: boolean;
-	content: boolean;
+	created_by: boolean;
 	created_at: boolean;
 };
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
index 52eb3546c..269c2ca2f 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@@ -2,22 +2,19 @@
 
 import { useQuery } from "@tanstack/react-query";
 import { useAtomValue } from "jotai";
-import { RefreshCw, SquarePlus, Upload } from "lucide-react";
 import { motion } from "motion/react";
-import { useParams, useRouter } from "next/navigation";
+import { useParams } from "next/navigation";
 import { useTranslations } from "next-intl";
-import { useCallback, useEffect, useId, useMemo, useState } from "react";
+import { useCallback, useEffect, useMemo, useState } from "react";
 import { toast } from "sonner";
 import { deleteDocumentMutationAtom } from "@/atoms/documents/document-mutation.atoms";
 import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms";
-import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
-import { Button } from "@/components/ui/button";
 import type { DocumentTypeEnum } from "@/contracts/types/document.types";
 import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { cacheKeys } from "@/lib/query-client/cache-keys";
 import { DocumentsFilters } from "./components/DocumentsFilters";
 import { DocumentsTableShell, type SortKey } from "./components/DocumentsTableShell";
-import { PaginationControls } from "./components/PaginationControls";
+import { PAGE_SIZE, PaginationControls } from "./components/PaginationControls";
 import type { ColumnVisibility } from "./components/types";
 
 function useDebounced<T>(value: T, delay = 250) {
@@ -31,29 +28,20 @@ function useDebounced<T>(value: T, delay = 250) {
 
 export default function DocumentsTable() {
 	const t = useTranslations("documents");
-	const id = useId();
 	const params = useParams();
-	const router = useRouter();
 	const searchSpaceId = Number(params.search_space_id);
-	const { openDialog: openUploadDialog } = useDocumentUploadDialog();
-
-	const handleNewNote = useCallback(() => {
-		router.push(`/dashboard/${searchSpaceId}/editor/new`);
-	}, [router, searchSpaceId]);
 
 	const [search, setSearch] = useState("");
 	const debouncedSearch = useDebounced(search, 250);
 	const [activeTypes, setActiveTypes] = useState<DocumentTypeEnum[]>([]);
 	const [columnVisibility, setColumnVisibility] = useState<ColumnVisibility>({
-		title: true,
 		document_type: true,
-		content: true,
+		created_by: true,
 		created_at: true,
 	});
 	const [pageIndex, setPageIndex] = useState(0);
-	const [pageSize, setPageSize] = useState(50);
-	const [sortKey, setSortKey] = useState<SortKey>("title");
-	const [sortDesc, setSortDesc] = useState(false);
+	const [sortKey, setSortKey] = useState<SortKey>("created_at");
+	const [sortDesc, setSortDesc] = useState(true);
 	const [selectedIds, setSelectedIds] = useState<Set<number>>(new Set());
 	const { data: rawTypeCounts } = useAtomValue(documentTypeCountsAtom);
 	const { mutateAsync: deleteDocumentMutation } = useAtomValue(deleteDocumentMutationAtom);
@@ -63,10 +51,10 @@ export default function DocumentsTable() {
 		() => ({
 			search_space_id: searchSpaceId,
 			page: pageIndex,
-			page_size: pageSize,
+			page_size: PAGE_SIZE,
 			...(activeTypes.length > 0 && { document_types: activeTypes }),
 		}),
-		[searchSpaceId, pageIndex, pageSize, activeTypes]
+		[searchSpaceId, pageIndex, activeTypes]
 	);
 
 	// Build search query parameters
@@ -74,11 +62,11 @@ export default function DocumentsTable() {
 		() => ({
 			search_space_id: searchSpaceId,
 			page: pageIndex,
-			page_size: pageSize,
+			page_size: PAGE_SIZE,
 			title: debouncedSearch.trim(),
 			...(activeTypes.length > 0 && { document_types: activeTypes }),
 		}),
-		[searchSpaceId, pageIndex, pageSize, activeTypes, debouncedSearch]
+		[searchSpaceId, pageIndex, activeTypes, debouncedSearch]
 	);
 
 	// Use query for fetching documents
@@ -112,17 +100,14 @@ export default function DocumentsTable() {
 		activeTypes.length === 0 || activeTypes.includes("SURFSENSE_DOCS" as DocumentTypeEnum);
 
 	// Use query for fetching SurfSense docs
-	const {
-		data: surfsenseDocsResponse,
-		isLoading: isSurfsenseDocsLoading,
-		refetch: refetchSurfsenseDocs,
-	} = useQuery({
-		queryKey: ["surfsense-docs", debouncedSearch, pageIndex, pageSize],
+	// eslint-disable-next-line @typescript-eslint/no-unused-vars
+	const { data: surfsenseDocsResponse } = useQuery({
+		queryKey: ["surfsense-docs", debouncedSearch, pageIndex, PAGE_SIZE],
 		queryFn: () =>
 			documentsApiService.getSurfsenseDocs({
 				queryParams: {
 					page: pageIndex,
-					page_size: pageSize,
+					page_size: PAGE_SIZE,
 					title: debouncedSearch.trim() || undefined,
 				},
 			}),
@@ -131,7 +116,8 @@ export default function DocumentsTable() {
 	});
 
 	// Transform SurfSense docs to match the Document type
-	const surfsenseDocsAsDocuments: Document[] = useMemo(() => {
+	// eslint-disable-next-line @typescript-eslint/no-unused-vars
+	const surfsenseDocsAsDocuments = useMemo(() => {
 		if (!surfsenseDocsResponse?.items) return [];
 		return surfsenseDocsResponse.items.map((doc) => ({
 			id: doc.id,
@@ -145,6 +131,7 @@ export default function DocumentsTable() {
 	}, [surfsenseDocsResponse]);
 
 	// Merge type counts with SURFSENSE_DOCS count
+	// eslint-disable-next-line @typescript-eslint/no-unused-vars
 	const typeCounts = useMemo(() => {
 		const counts = { ...(rawTypeCounts || {}) };
 		if (surfsenseDocsResponse?.total) {
@@ -165,11 +152,17 @@ export default function DocumentsTable() {
 	// Display results directly
 	const displayDocs = documents;
 	const displayTotal = total;
-	const pageStart = pageIndex * pageSize;
-	const pageEnd = Math.min(pageStart + pageSize, displayTotal);
+	const pageEnd = Math.min((pageIndex + 1) * PAGE_SIZE, displayTotal);
 
 	const onToggleType = (type: DocumentTypeEnum, checked: boolean) => {
-		setActiveTypes((prev) => (checked ? [...prev, type] : prev.filter((t) => t !== type)));
+		setActiveTypes((prev) => {
+			if (checked) {
+				// Only add if not already in the array
+				return prev.includes(type) ? prev : [...prev, type];
+			} else {
+				return prev.filter((t) => t !== type);
+			}
+		});
 		setPageIndex(0);
 	};
 
@@ -238,10 +231,21 @@ export default function DocumentsTable() {
 		}
 	};
 
+	const handleSortChange = useCallback((key: SortKey) => {
+		setSortKey((currentKey) => {
+			if (currentKey === key) {
+				setSortDesc((v) => !v);
+				return currentKey;
+			}
+			setSortDesc(false);
+			return key;
+		});
+	}, []);
+
 	useEffect(() => {
 		const mq = window.matchMedia("(max-width: 768px)");
 		const apply = (isSmall: boolean) => {
-			setColumnVisibility((prev) => ({ ...prev, content: !isSmall, created_at: !isSmall }));
+			setColumnVisibility((prev) => ({ ...prev, created_by: !isSmall, created_at: !isSmall }));
 		};
 		apply(mq.matches);
 		const onChange = (e: MediaQueryListEvent) => apply(e.matches);
@@ -254,34 +258,9 @@ export default function DocumentsTable() {
 			initial={{ opacity: 0, y: 20 }}
 			animate={{ opacity: 1, y: 0 }}
 			transition={{ duration: 0.3 }}
-			className="w-full px-6 py-4 space-y-6 min-h-[calc(100vh-64px)]"
+			className="w-full max-w-7xl mx-auto px-6 pt-17 pb-6 space-y-6 min-h-[calc(100vh-64px)]"
 		>
-			<motion.div
-				className="flex items-center justify-between"
-				initial={{ opacity: 0, y: 10 }}
-				animate={{ opacity: 1, y: 0 }}
-				transition={{ delay: 0.1 }}
-			>
-				<div>
-					<h2 className="text-xl md:text-2xl font-bold tracking-tight">{t("title")}</h2>
-					<p className="text-xs md:text-sm text-muted-foreground">{t("subtitle")}</p>
-				</div>
-				<div className="flex items-center gap-2">
-					<Button onClick={openUploadDialog} variant="default" size="sm">
-						<Upload className="w-4 h-4 mr-2" />
-						{t("upload_documents")}
-					</Button>
-					<Button onClick={handleNewNote} variant="outline" size="sm">
-						<SquarePlus className="w-4 h-4 mr-2" />
-						{t("create_shared_note")}
-					</Button>
-					<Button onClick={refreshCurrentView} variant="outline" size="sm" disabled={isRefreshing}>
-						<RefreshCw className={`w-4 h-4 mr-2 ${isRefreshing ? "animate-spin" : ""}`} />
-						{t("refresh")}
-					</Button>
-				</div>
-			</motion.div>
-
+			{/* Filters */}
 			<DocumentsFilters
 				typeCounts={rawTypeCounts ?? {}}
 				selectedIds={selectedIds}
@@ -294,6 +273,7 @@ export default function DocumentsTable() {
 				onToggleColumn={onToggleColumn}
 			/>
 
+			{/* Table */}
 			<DocumentsTableShell
 				documents={displayDocs}
 				loading={!!loading}
@@ -305,30 +285,19 @@ export default function DocumentsTable() {
 				deleteDocument={deleteDocument}
 				sortKey={sortKey}
 				sortDesc={sortDesc}
-				onSortChange={(key) => {
-					if (sortKey === key) setSortDesc((v) => !v);
-					else {
-						setSortKey(key);
-						setSortDesc(false);
-					}
-				}}
+				onSortChange={handleSortChange}
 			/>
 
+			{/* Pagination */}
 			<PaginationControls
 				pageIndex={pageIndex}
-				pageSize={pageSize}
 				total={displayTotal}
-				onPageSizeChange={(s) => {
-					setPageSize(s);
-					setPageIndex(0);
-				}}
 				onFirst={() => setPageIndex(0)}
 				onPrev={() => setPageIndex((i) => Math.max(0, i - 1))}
 				onNext={() => setPageIndex((i) => (pageEnd < displayTotal ? i + 1 : i))}
-				onLast={() => setPageIndex(Math.max(0, Math.ceil(displayTotal / pageSize) - 1))}
+				onLast={() => setPageIndex(Math.max(0, Math.ceil(displayTotal / PAGE_SIZE) - 1))}
 				canPrev={pageIndex > 0}
 				canNext={pageEnd < displayTotal}
-				id={id}
 			/>
 		</motion.div>
 	);
diff --git a/surfsense_web/atoms/connector-dialog/connector-dialog.atoms.ts b/surfsense_web/atoms/connector-dialog/connector-dialog.atoms.ts
new file mode 100644
index 000000000..38205a8d2
--- /dev/null
+++ b/surfsense_web/atoms/connector-dialog/connector-dialog.atoms.ts
@@ -0,0 +1,5 @@
+import { atom } from "jotai";
+
+// Atom to control the connector dialog open state from anywhere in the app
+export const connectorDialogOpenAtom = atom(false);
+
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index 9b201e96b..abb32dde1 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -37,7 +37,7 @@ import { AllConnectorsTab } from "./connector-popup/tabs/all-connectors-tab";
 import { ConnectorAccountsListView } from "./connector-popup/views/connector-accounts-list-view";
 import { YouTubeCrawlerView } from "./connector-popup/views/youtube-crawler-view";
 
-export const ConnectorIndicator: FC = () => {
+export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger = false }) => {
 	const searchSpaceId = useAtomValue(activeSearchSpaceIdAtom);
 	const searchParams = useSearchParams();
 	const { data: currentUser } = useAtomValue(currentUserAtom);
@@ -186,34 +186,36 @@ export const ConnectorIndicator: FC = () => {
 
 	return (
 		<Dialog open={isOpen} onOpenChange={handleOpenChange}>
-			<TooltipIconButton
-				data-joyride="connector-icon"
-				tooltip={hasConnectors ? `Manage ${activeConnectorsCount} connectors` : "Connect your data"}
-				side="bottom"
-				className={cn(
-					"size-[34px] rounded-full p-1 flex items-center justify-center transition-colors relative",
-					"hover:bg-muted-foreground/15 dark:hover:bg-muted-foreground/30",
-					"outline-none focus:outline-none focus-visible:outline-none font-semibold text-xs",
-					"border-0 ring-0 focus:ring-0 shadow-none focus:shadow-none"
-				)}
-				aria-label={
-					hasConnectors ? `View ${activeConnectorsCount} connectors` : "Add your first connector"
-				}
-				onClick={() => handleOpenChange(true)}
-			>
-				{isLoading ? (
-					<Spinner size="sm" />
-				) : (
-					<>
-						<Cable className="size-4 stroke-[1.5px]" />
-						{activeConnectorsCount > 0 && (
-							<span className="absolute -top-0.5 right-0 flex items-center justify-center min-w-[16px] h-4 px-1 text-[10px] font-medium rounded-full bg-primary text-primary-foreground shadow-sm">
-								{activeConnectorsCount > 99 ? "99+" : activeConnectorsCount}
-							</span>
-						)}
-					</>
-				)}
-			</TooltipIconButton>
+			{!hideTrigger && (
+				<TooltipIconButton
+					data-joyride="connector-icon"
+					tooltip={hasConnectors ? `Manage ${activeConnectorsCount} connectors` : "Connect your data"}
+					side="bottom"
+					className={cn(
+						"size-[34px] rounded-full p-1 flex items-center justify-center transition-colors relative",
+						"hover:bg-muted-foreground/15 dark:hover:bg-muted-foreground/30",
+						"outline-none focus:outline-none focus-visible:outline-none font-semibold text-xs",
+						"border-0 ring-0 focus:ring-0 shadow-none focus:shadow-none"
+					)}
+					aria-label={
+						hasConnectors ? `View ${activeConnectorsCount} connectors` : "Add your first connector"
+					}
+					onClick={() => handleOpenChange(true)}
+				>
+					{isLoading ? (
+						<Spinner size="sm" />
+					) : (
+						<>
+							<Cable className="size-4 stroke-[1.5px]" />
+							{activeConnectorsCount > 0 && (
+								<span className="absolute -top-0.5 right-0 flex items-center justify-center min-w-[16px] h-4 px-1 text-[10px] font-medium rounded-full bg-primary text-primary-foreground shadow-sm">
+									{activeConnectorsCount > 99 ? "99+" : activeConnectorsCount}
+								</span>
+							)}
+						</>
+					)}
+				</TooltipIconButton>
+			)}
 
 			<DialogContent className="max-w-3xl w-[95vw] sm:w-full h-[75vh] sm:h-[85vh] flex flex-col p-0 gap-0 overflow-hidden border border-border bg-muted text-foreground focus:outline-none focus:ring-0 focus-visible:outline-none focus-visible:ring-0 [&>button]:right-4 sm:[&>button]:right-12 [&>button]:top-6 sm:[&>button]:top-10 [&>button]:opacity-80 hover:[&>button]:opacity-100 [&>button_svg]:size-5">
 				<DialogTitle className="sr-only">Manage Connectors</DialogTitle>
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index 639d0f7ed..118ca66ce 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -1,8 +1,9 @@
 import { format } from "date-fns";
-import { useAtomValue } from "jotai";
+import { useAtom, useAtomValue } from "jotai";
 import { useRouter, useSearchParams } from "next/navigation";
 import { useCallback, useEffect, useRef, useState } from "react";
 import { toast } from "sonner";
+import { connectorDialogOpenAtom } from "@/atoms/connector-dialog/connector-dialog.atoms";
 import {
 	createConnectorMutationAtom,
 	deleteConnectorMutationAtom,
@@ -49,7 +50,8 @@ export const useConnectorDialog = () => {
 	const { mutateAsync: deleteConnector } = useAtomValue(deleteConnectorMutationAtom);
 	const { mutateAsync: createConnector } = useAtomValue(createConnectorMutationAtom);
 
-	const [isOpen, setIsOpen] = useState(false);
+	// Use global atom for dialog open state so it can be controlled from anywhere
+	const [isOpen, setIsOpen] = useAtom(connectorDialogOpenAtom);
 	const [activeTab, setActiveTab] = useState("all");
 	const [connectingId, setConnectingId] = useState<string | null>(null);
 	const [isScrolled, setIsScrolled] = useState(false);

From 878e829bdc8da815a075598082202f0a751306b0 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Wed, 4 Feb 2026 17:19:29 +0530
Subject: [PATCH 02/36] feat: enhance document filters and table components
 with search functionality and improved loading states

---
 .../(manage)/components/DocumentTypeIcon.tsx  |  10 +-
 .../(manage)/components/DocumentsFilters.tsx  | 255 ++++++++------
 .../components/DocumentsTableShell.tsx        | 321 +++++++++++++-----
 .../(manage)/components/RowActions.tsx        | 161 ++++-----
 4 files changed, 480 insertions(+), 267 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
index 246cff1c0..b5d434e92 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
@@ -4,8 +4,8 @@ import type React from "react";
 import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 
-export function getDocumentTypeIcon(type: string): React.ReactNode {
-	return getConnectorIcon(type);
+export function getDocumentTypeIcon(type: string, className?: string): React.ReactNode {
+	return getConnectorIcon(type, className);
 }
 
 export function getDocumentTypeLabel(type: string): string {
@@ -18,7 +18,7 @@ export function getDocumentTypeLabel(type: string): string {
 const MAX_LABEL_LENGTH = 28;
 
 export function DocumentTypeChip({ type, className }: { type: string; className?: string }) {
-	const icon = getDocumentTypeIcon(type);
+	const icon = getDocumentTypeIcon(type, "h-4 w-4");
 	const fullLabel = getDocumentTypeLabel(type);
 	const truncatedLabel = fullLabel.length > MAX_LABEL_LENGTH 
 		? `${fullLabel.slice(0, MAX_LABEL_LENGTH)}...` 
@@ -27,9 +27,9 @@ export function DocumentTypeChip({ type, className }: { type: string; className?
 
 	const chip = (
 		<span
-			className={`inline-flex items-center gap-1.5 rounded-md border border-border/50 bg-muted/30 px-2 py-0.5 text-xs font-medium text-muted-foreground ${className ?? ""}`}
+			className={`inline-flex items-center gap-1.5 rounded bg-muted/40 px-2 py-1 text-xs text-muted-foreground ${className ?? ""}`}
 		>
-			<span className="opacity-70 flex-shrink-0">{icon}</span>
+			<span className="opacity-80 flex-shrink-0">{icon}</span>
 			<span className="truncate">{truncatedLabel}</span>
 		</span>
 	);
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
index 87d349e38..2c3dc7eef 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
@@ -7,12 +7,14 @@ import {
 	Columns3,
 	FilePlus2,
 	FileType,
+	ListFilter,
+	Search,
 	SlidersHorizontal,
 	Trash,
 } from "lucide-react";
 import { motion } from "motion/react";
 import { useTranslations } from "next-intl";
-import React, { useMemo, useRef } from "react";
+import React, { useMemo, useRef, useState } from "react";
 import { connectorDialogOpenAtom } from "@/atoms/connector-dialog/connector-dialog.atoms";
 import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
 import {
@@ -64,10 +66,20 @@ export function DocumentsFilters({
 	const { openDialog: openUploadDialog } = useDocumentUploadDialog();
 	const setConnectorDialogOpen = useSetAtom(connectorDialogOpenAtom);
 
+	const [typeSearchQuery, setTypeSearchQuery] = useState("");
+
 	const uniqueTypes = useMemo(() => {
 		return Object.keys(typeCountsRecord).sort() as DocumentTypeEnum[];
 	}, [typeCountsRecord]);
 
+	const filteredTypes = useMemo(() => {
+		if (!typeSearchQuery.trim()) return uniqueTypes;
+		const query = typeSearchQuery.toLowerCase();
+		return uniqueTypes.filter((type) =>
+			getDocumentTypeLabel(type).toLowerCase().includes(query)
+		);
+	}, [uniqueTypes, typeSearchQuery]);
+
 	const typeCounts = useMemo(() => {
 		const map = new Map<string, number>();
 		for (const [type, count] of Object.entries(typeCountsRecord)) {
@@ -117,10 +129,13 @@ export function DocumentsFilters({
 					animate={{ opacity: 1, y: 0 }}
 					transition={{ type: "spring", stiffness: 300, damping: 30 }}
 				>
+					<div className="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3 text-muted-foreground">
+						<ListFilter size={14} aria-hidden="true" />
+					</div>
 					<Input
 						id={`${id}-input`}
 						ref={inputRef}
-						className="peer h-9 w-full pl-3 pr-9 text-sm bg-background border-border/60 focus-visible:ring-1 focus-visible:ring-ring/30"
+						className="peer h-9 w-full pl-9 pr-9 text-sm bg-background border-border/60 focus-visible:ring-1 focus-visible:ring-ring/30"
 						value={searchValue}
 						onChange={(e) => onSearch(e.target.value)}
 						placeholder="Filter by title"
@@ -148,74 +163,94 @@ export function DocumentsFilters({
 
 				{/* Filter Buttons Group */}
 				<div className="flex items-center gap-2 flex-wrap">
-					{/* Type Filter */}
-					<Popover>
-						<PopoverTrigger asChild>
-							<Button
-								variant="outline"
-								size="sm"
-								className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border"
-							>
-								<FileType size={14} className="text-muted-foreground" />
-								<span className="hidden sm:inline">Type</span>
-								{activeTypes.length > 0 && (
-									<span className="flex h-5 w-5 items-center justify-center rounded-full bg-primary text-[10px] font-medium text-primary-foreground">
-										{activeTypes.length}
-									</span>
-								)}
-							</Button>
-						</PopoverTrigger>
-						<PopoverContent className="w-64 !p-0 overflow-hidden" align="end">
-							<div className="px-2.5 pt-3">
-								<div className="mb-1.5 px-1 text-[11px] font-medium text-muted-foreground">
-									Filter by source
+				{/* Type Filter */}
+				<Popover>
+					<PopoverTrigger asChild>
+						<Button
+							variant="outline"
+							size="sm"
+							className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border"
+						>
+							<FileType size={14} className="text-muted-foreground" />
+							<span className="hidden sm:inline">Type</span>
+							{activeTypes.length > 0 && (
+								<span className="flex h-5 w-5 items-center justify-center rounded-full bg-primary text-[10px] font-medium text-primary-foreground">
+									{activeTypes.length}
+								</span>
+							)}
+						</Button>
+					</PopoverTrigger>
+					<PopoverContent className="w-64 !p-0 overflow-hidden" align="end">
+						<div>
+							{/* Search input */}
+							<div className="p-2 border-b border-border/50">
+								<div className="relative">
+									<Search className="absolute left-0.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" />
+									<Input
+										placeholder="Search types..."
+										value={typeSearchQuery}
+										onChange={(e) => setTypeSearchQuery(e.target.value)}
+										className="h-6 pl-6 text-sm bg-transparent border-0 focus-visible:ring-0"
+									/>
 								</div>
-								<div className="space-y-0.5 max-h-[300px] overflow-y-auto overflow-x-hidden">
-									{uniqueTypes.map((value: DocumentTypeEnum, i) => (
+							</div>
+
+							<div className="max-h-[300px] overflow-y-auto overflow-x-hidden py-1.5 px-1.5">
+								{filteredTypes.length === 0 ? (
+									<div className="py-6 text-center text-sm text-muted-foreground">
+										No types found
+									</div>
+								) : (
+									filteredTypes.map((value: DocumentTypeEnum, i) => (
 										<button
 											key={value}
 											type="button"
-											className="flex w-full items-center gap-2 py-1 px-2.5 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
+											className="flex w-full items-center gap-2.5 py-2 px-3 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
 											onClick={() => onToggleType(value, !activeTypes.includes(value))}
 										>
+											{/* Icon */}
+											<div className="flex h-7 w-7 shrink-0 items-center justify-center rounded-md bg-muted/50 text-foreground/80">
+												{getDocumentTypeIcon(value, "h-4 w-4")}
+											</div>
+											{/* Text content */}
+											<div className="flex flex-col min-w-0 flex-1 gap-0.5">
+												<span className="text-[13px] font-medium text-foreground truncate leading-tight">
+													{getDocumentTypeLabel(value)}
+												</span>
+												<span className="text-[11px] text-muted-foreground leading-tight">
+													{typeCounts.get(value)} document{(typeCounts.get(value) ?? 0) !== 1 ? "s" : ""}
+												</span>
+											</div>
+											{/* Checkbox */}
 											<Checkbox
 												id={`${id}-${i}`}
 												checked={activeTypes.includes(value)}
 												onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
-												className="h-3.5 w-3.5 flex-shrink-0 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
+												className="h-4 w-4 shrink-0 rounded border-muted-foreground/30 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
 											/>
-											<Label
-												htmlFor={`${id}-${i}`}
-												className="flex flex-1 items-center gap-2 font-normal text-xs cursor-pointer min-w-0"
-											>
-												<span className="opacity-60 flex-shrink-0">{getDocumentTypeIcon(value)}</span>
-												<span className="truncate min-w-0">{getDocumentTypeLabel(value)}</span>
-												<span className="text-[10px] text-muted-foreground/70 tabular-nums flex-shrink-0 ml-auto">
-													{typeCounts.get(value)}
-												</span>
-											</Label>
 										</button>
-									))}
-								</div>
-								{activeTypes.length > 0 && (
-									<div className="mt-1 pt-1 pb-1 border-t border-border/50 pb-1">
-										<Button
-											variant="ghost"
-											size="sm"
-											className="w-full h-6 text-[11px]"
-											onClick={() => {
-												activeTypes.forEach((t) => {
-													onToggleType(t, false);
-												});
-											}}
-										>
-											Clear filters
-										</Button>
-									</div>
+									))
 								)}
 							</div>
-						</PopoverContent>
-					</Popover>
+							{activeTypes.length > 0 && (
+								<div className="px-3 pt-1.5 border-t border-border/50">
+									<Button
+										variant="ghost"
+										size="sm"
+										className="w-full h-7 text-[11px] text-muted-foreground hover:text-foreground"
+										onClick={() => {
+											activeTypes.forEach((t) => {
+												onToggleType(t, false);
+											});
+										}}
+									>
+										Clear filters
+									</Button>
+								</div>
+							)}
+						</div>
+					</PopoverContent>
+				</Popover>
 
 					{/* View/Columns Popover */}
 					<Popover>
@@ -266,57 +301,69 @@ export function DocumentsFilters({
 							</div>
 						</PopoverContent>
 					</Popover>
-				</div>
 
-				{/* Bulk Delete Button */}
-				{selectedIds.size > 0 && (
-					<AlertDialog>
-						<AlertDialogTrigger asChild>
-							<motion.div
-								initial={{ opacity: 0, scale: 0.9 }}
-								animate={{ opacity: 1, scale: 1 }}
-								exit={{ opacity: 0, scale: 0.9 }}
-							>
-								<Button
-									variant="destructive"
-									size="sm"
-									className="h-9 gap-2"
+					{/* Bulk Delete Button - positioned next to View on mobile */}
+					{selectedIds.size > 0 && (
+						<AlertDialog>
+							<AlertDialogTrigger asChild>
+								<motion.div
+									initial={{ opacity: 0, scale: 0.9 }}
+									animate={{ opacity: 1, scale: 1 }}
+									exit={{ opacity: 0, scale: 0.9 }}
 								>
-									<Trash size={14} />
-									Delete
-									<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
-										{selectedIds.size}
-									</span>
-								</Button>
-							</motion.div>
-						</AlertDialogTrigger>
-						<AlertDialogContent className="max-w-md">
-							<div className="flex flex-col gap-2 sm:flex-row sm:gap-4">
-								<div
-									className="flex size-10 shrink-0 items-center justify-center rounded-full bg-destructive/10 text-destructive"
-									aria-hidden="true"
-								>
-									<CircleAlert size={18} strokeWidth={2} />
+									{/* Mobile: icon with count */}
+									<Button
+										variant="destructive"
+										size="sm"
+										className="h-9 gap-1.5 px-2.5 md:hidden"
+									>
+										<Trash size={14} />
+										<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
+											{selectedIds.size}
+										</span>
+									</Button>
+									{/* Desktop: full button */}
+									<Button
+										variant="destructive"
+										size="sm"
+										className="h-9 gap-2 hidden md:flex"
+									>
+										<Trash size={14} />
+										Delete
+										<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
+											{selectedIds.size}
+										</span>
+									</Button>
+								</motion.div>
+							</AlertDialogTrigger>
+							<AlertDialogContent className="max-w-md">
+								<div className="flex flex-col gap-2 sm:flex-row sm:gap-4">
+									<div
+										className="flex size-10 shrink-0 items-center justify-center rounded-full bg-destructive/10 text-destructive"
+										aria-hidden="true"
+									>
+										<CircleAlert size={18} strokeWidth={2} />
+									</div>
+									<AlertDialogHeader className="flex-1">
+										<AlertDialogTitle>Delete {selectedIds.size} document{selectedIds.size !== 1 ? "s" : ""}?</AlertDialogTitle>
+										<AlertDialogDescription>
+											This action cannot be undone. This will permanently delete the selected {selectedIds.size === 1 ? "document" : "documents"} from your search space.
+										</AlertDialogDescription>
+									</AlertDialogHeader>
 								</div>
-								<AlertDialogHeader className="flex-1">
-									<AlertDialogTitle>Delete {selectedIds.size} document{selectedIds.size !== 1 ? "s" : ""}?</AlertDialogTitle>
-									<AlertDialogDescription>
-										This action cannot be undone. This will permanently delete the selected {selectedIds.size === 1 ? "document" : "documents"} from your search space.
-									</AlertDialogDescription>
-								</AlertDialogHeader>
-							</div>
-							<AlertDialogFooter>
-								<AlertDialogCancel>Cancel</AlertDialogCancel>
-								<AlertDialogAction
-									onClick={onBulkDelete}
-									className="bg-destructive text-destructive-foreground hover:bg-destructive/90"
-								>
-									Delete
-								</AlertDialogAction>
-							</AlertDialogFooter>
-						</AlertDialogContent>
-					</AlertDialog>
-				)}
+								<AlertDialogFooter>
+									<AlertDialogCancel>Cancel</AlertDialogCancel>
+									<AlertDialogAction
+										onClick={onBulkDelete}
+										className="bg-destructive text-destructive-foreground hover:bg-destructive/90"
+									>
+										Delete
+									</AlertDialogAction>
+								</AlertDialogFooter>
+							</AlertDialogContent>
+						</AlertDialog>
+					)}
+				</div>
 			</div>
 		</motion.div>
 	);
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index faa7605a3..f23893fbe 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -1,16 +1,17 @@
 "use client";
 
-import { ChevronDown, ChevronUp, FileX, Plus } from "lucide-react";
+import { formatDistanceToNow } from "date-fns";
+import { Calendar, ChevronDown, ChevronUp, FileText, FileX, Link2, Plus, User } from "lucide-react";
 import { motion } from "motion/react";
 import { useParams } from "next/navigation";
 import { useTranslations } from "next-intl";
-import React, { useState } from "react";
+import React, { useRef, useState, useEffect } from "react";
 import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
 import { DocumentViewer } from "@/components/document-viewer";
 import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import { Spinner } from "@/components/ui/spinner";
+import { Skeleton } from "@/components/ui/skeleton";
 import {
 	Table,
 	TableBody,
@@ -37,35 +38,82 @@ function sortDocuments(docs: Document[], key: SortKey, desc: boolean): Document[
 	return desc ? sorted.reverse() : sorted;
 }
 
-function formatDate(dateStr: string): string {
+function formatRelativeDate(dateStr: string): string {
+	return formatDistanceToNow(new Date(dateStr), { addSuffix: true });
+}
+
+function formatAbsoluteDate(dateStr: string): string {
 	const date = new Date(dateStr);
-	return date.toLocaleDateString("en-US", {
+	return date.toLocaleString("en-US", {
 		year: "numeric",
 		month: "long",
 		day: "numeric",
+		hour: "2-digit",
+		minute: "2-digit",
+		hour12: false,
 	});
 }
 
+function TruncatedText({ text, className }: { text: string; className?: string }) {
+	const textRef = useRef<HTMLSpanElement>(null);
+	const [isTruncated, setIsTruncated] = useState(false);
+
+	useEffect(() => {
+		const checkTruncation = () => {
+			if (textRef.current) {
+				setIsTruncated(textRef.current.scrollWidth > textRef.current.clientWidth);
+			}
+		};
+		checkTruncation();
+		window.addEventListener("resize", checkTruncation);
+		return () => window.removeEventListener("resize", checkTruncation);
+	}, []);
+
+	if (isTruncated) {
+		return (
+			<Tooltip>
+				<TooltipTrigger asChild>
+					<span ref={textRef} className={className}>
+						{text}
+					</span>
+				</TooltipTrigger>
+				<TooltipContent side="top" className="max-w-xs">
+					<p className="break-words">{text}</p>
+				</TooltipContent>
+			</Tooltip>
+		);
+	}
+
+	return (
+		<span ref={textRef} className={className}>
+			{text}
+		</span>
+	);
+}
+
 function SortableHeader({
 	children,
 	sortKey,
 	currentSortKey,
 	sortDesc,
 	onSort,
+	icon,
 }: {
 	children: React.ReactNode;
 	sortKey: SortKey;
 	currentSortKey: SortKey;
 	sortDesc: boolean;
 	onSort: (key: SortKey) => void;
+	icon?: React.ReactNode;
 }) {
 	const isActive = currentSortKey === sortKey;
 	return (
 		<button
 			type="button"
 			onClick={() => onSort(sortKey)}
-			className="flex items-center gap-1.5 text-left font-medium text-muted-foreground hover:text-foreground transition-colors group"
+			className="flex items-center gap-1.5 text-left text-sm font-medium text-muted-foreground/70 hover:text-muted-foreground transition-colors group"
 		>
+			{icon && <span className="opacity-60">{icon}</span>}
 			{children}
 			<span className={`transition-opacity ${isActive ? "opacity-100" : "opacity-0 group-hover:opacity-50"}`}>
 				{isActive && sortDesc ? (
@@ -143,18 +191,119 @@ export function DocumentsTableShell({
 
 	return (
 		<motion.div
-			className="rounded-xl border border-border/50 bg-card/30 backdrop-blur-sm overflow-hidden shadow-sm"
+			className="rounded-lg border border-border/30 bg-background overflow-hidden"
 			initial={{ opacity: 0, y: 20 }}
 			animate={{ opacity: 1, y: 0 }}
 			transition={{ type: "spring", stiffness: 300, damping: 30, delay: 0.2 }}
 		>
 			{loading ? (
-				<div className="flex h-[400px] w-full items-center justify-center">
-					<div className="flex flex-col items-center gap-3">
-						<Spinner size="lg" className="text-primary" />
-						<p className="text-sm text-muted-foreground">{t("loading")}</p>
+				<>
+					{/* Desktop Skeleton View */}
+					<div className="hidden md:flex md:flex-col">
+						<Table className="table-fixed w-full">
+							<TableHeader>
+								<TableRow className="hover:bg-transparent border-b border-border/30">
+									<TableHead className="w-8 px-0 text-center border-r border-border/30">
+										<div className="flex items-center justify-center h-full">
+											<Skeleton className="h-4 w-4 rounded" />
+										</div>
+									</TableHead>
+									<TableHead className="w-[35%] max-w-0 border-r border-border/30">
+										<Skeleton className="h-3 w-20" />
+									</TableHead>
+									{columnVisibility.document_type && (
+										<TableHead className="w-44 border-r border-border/30">
+											<Skeleton className="h-3 w-14" />
+										</TableHead>
+									)}
+									{columnVisibility.created_by && (
+										<TableHead className="w-36 border-r border-border/30">
+											<Skeleton className="h-3 w-10" />
+										</TableHead>
+									)}
+									{columnVisibility.created_at && (
+										<TableHead className="w-32 border-r border-border/30">
+											<Skeleton className="h-3 w-16" />
+										</TableHead>
+									)}
+									<TableHead className="w-10 text-center">
+										<span className="sr-only">Actions</span>
+									</TableHead>
+								</TableRow>
+							</TableHeader>
+						</Table>
+						<div className="h-[50vh] overflow-auto">
+							<Table className="table-fixed w-full">
+								<TableBody>
+									{[65, 80, 45, 72, 55, 88, 40, 60, 50, 75].map((widthPercent, index) => (
+										<TableRow
+											key={`skeleton-${index}`}
+											className="border-b border-border/30 hover:bg-transparent"
+										>
+											<TableCell className="w-8 px-0 py-2.5 text-center border-r border-border/30">
+												<div className="flex items-center justify-center h-full">
+													<Skeleton className="h-4 w-4 rounded" />
+												</div>
+											</TableCell>
+											<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/30">
+												<Skeleton
+													className="h-4"
+													style={{ width: `${widthPercent}%` }}
+												/>
+											</TableCell>
+											{columnVisibility.document_type && (
+												<TableCell className="w-44 py-2.5 border-r border-border/30">
+													<Skeleton className="h-5 w-24 rounded" />
+												</TableCell>
+											)}
+											{columnVisibility.created_by && (
+												<TableCell className="w-36 py-2.5 truncate border-r border-border/30">
+													<Skeleton className="h-4 w-20" />
+												</TableCell>
+											)}
+											{columnVisibility.created_at && (
+												<TableCell className="w-32 py-2.5 border-r border-border/30">
+													<Skeleton className="h-4 w-20" />
+												</TableCell>
+											)}
+											<TableCell className="w-10 py-2.5 px-0">
+												<div className="flex justify-center">
+													<Skeleton className="h-7 w-7 rounded" />
+												</div>
+											</TableCell>
+										</TableRow>
+									))}
+								</TableBody>
+							</Table>
+						</div>
 					</div>
-				</div>
+					{/* Mobile Skeleton View */}
+					<div className="md:hidden divide-y divide-border/30 h-[50vh] overflow-auto">
+						{[70, 85, 55, 78, 62, 90].map((widthPercent, index) => (
+							<div key={`skeleton-mobile-${index}`} className="px-4 py-3">
+								<div className="flex items-start gap-3">
+									<Skeleton className="h-4 w-4 mt-0.5 rounded" />
+									<div className="flex-1 min-w-0 space-y-2">
+										<Skeleton
+											className="h-4"
+											style={{ width: `${widthPercent}%` }}
+										/>
+										<div className="flex flex-wrap items-center gap-2">
+											<Skeleton className="h-5 w-20 rounded" />
+											{columnVisibility.created_by && (
+												<Skeleton className="h-3 w-14" />
+											)}
+											{columnVisibility.created_at && (
+												<Skeleton className="h-3 w-20" />
+											)}
+										</div>
+									</div>
+									<Skeleton className="h-7 w-7 rounded" />
+								</div>
+							</div>
+						))}
+					</div>
+				</>
 			) : error ? (
 				<div className="flex h-[400px] w-full items-center justify-center">
 					<div className="flex flex-col items-center gap-3">
@@ -189,72 +338,79 @@ export function DocumentsTableShell({
 				</div>
 			) : (
 				<>
-					{/* Desktop Table View */}
+					{/* Desktop Table View - Notion Style */}
 					<div className="hidden md:flex md:flex-col">
 						{/* Fixed Header */}
-						<Table>
+						<Table className="table-fixed w-full">
 							<TableHeader>
-								<TableRow className="bg-muted/30 hover:bg-muted/30 border-b border-border/50">
-									<TableHead className="w-[40px] pl-4">
-										<Checkbox
-											checked={allSelectedOnPage || (someSelectedOnPage && "indeterminate")}
-											onCheckedChange={(v) => toggleAll(!!v)}
-											aria-label="Select all"
-											className="data-[state=checked]:bg-primary data-[state=checked]:border-primary"
-										/>
+								<TableRow className="hover:bg-transparent border-b border-border/30">
+									<TableHead className="w-8 px-0 text-center border-r border-border/30">
+										<div className="flex items-center justify-center h-full">
+											<Checkbox
+												checked={allSelectedOnPage || (someSelectedOnPage && "indeterminate")}
+												onCheckedChange={(v) => toggleAll(!!v)}
+												aria-label="Select all"
+												className="border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary"
+											/>
+										</div>
 									</TableHead>
-									<TableHead className="min-w-[200px]">
+									<TableHead className="w-[35%] border-r border-border/30">
 										<SortableHeader
 											sortKey="title"
 											currentSortKey={sortKey}
 											sortDesc={sortDesc}
 											onSort={onSortHeader}
+											icon={<FileText size={14} className="text-muted-foreground" />}
 										>
 											Document
 										</SortableHeader>
 									</TableHead>
 									{columnVisibility.document_type && (
-										<TableHead className="w-[160px]">
+										<TableHead className="w-44 border-r border-border/30">
 											<SortableHeader
 												sortKey="document_type"
 												currentSortKey={sortKey}
 												sortDesc={sortDesc}
 												onSort={onSortHeader}
+												icon={<Link2 size={14} className="text-muted-foreground" />}
 											>
 												Source
 											</SortableHeader>
 										</TableHead>
 									)}
 									{columnVisibility.created_by && (
-										<TableHead className="w-[150px]">
-											<span className="text-muted-foreground font-medium">User</span>
+										<TableHead className="w-36 border-r border-border/30">
+											<span className="flex items-center gap-1.5 text-sm font-medium text-muted-foreground/70">
+												<User size={14} className="opacity-60 text-muted-foreground" />
+												User
+											</span>
 										</TableHead>
 									)}
 									{columnVisibility.created_at && (
-										<TableHead className="w-[150px]">
+										<TableHead className="w-32 border-r border-border/30">
 											<SortableHeader
 												sortKey="created_at"
 												currentSortKey={sortKey}
 												sortDesc={sortDesc}
 												onSort={onSortHeader}
+												icon={<Calendar size={14} className="text-muted-foreground" />}
 											>
 												Created
 											</SortableHeader>
 										</TableHead>
 									)}
-									<TableHead className="w-[80px] pr-4">
+									<TableHead className="w-10 text-center">
 										<span className="sr-only">Actions</span>
 									</TableHead>
 								</TableRow>
 							</TableHeader>
 						</Table>
 						{/* Scrollable Body */}
-						<div className="max-h-[55vh] overflow-auto">
-							<Table>
+						<div className="h-[50vh] overflow-auto">
+							<Table className="table-fixed w-full">
 								<TableBody>
 									{sorted.map((doc, index) => {
 										const title = doc.title;
-										const truncatedTitle = title.length > 50 ? `${title.slice(0, 50)}...` : title;
 										const isSelected = selectedIds.has(doc.id);
 										return (
 											<motion.tr
@@ -269,26 +425,28 @@ export function DocumentsTableShell({
 												}}
 												className={`border-b border-border/30 transition-colors ${
 													isSelected
-														? "bg-primary/5 hover:bg-primary/10"
-														: "hover:bg-muted/40"
+														? "bg-primary/5 hover:bg-primary/8"
+														: "hover:bg-muted/30"
 												}`}
 											>
-												<TableCell className="w-[40px] pl-4 py-3">
-													<Checkbox
-														checked={isSelected}
-														onCheckedChange={(v) => toggleOne(doc.id, !!v)}
-														aria-label="Select row"
-														className="data-[state=checked]:bg-primary data-[state=checked]:border-primary"
-													/>
+												<TableCell className="w-8 px-0 py-2.5 text-center border-r border-border/30">
+													<div className="flex items-center justify-center h-full">
+														<Checkbox
+															checked={isSelected}
+															onCheckedChange={(v) => toggleOne(doc.id, !!v)}
+															aria-label="Select row"
+															className="border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary"
+														/>
+													</div>
 												</TableCell>
-												<TableCell className="min-w-[200px] py-3">
+												<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/30">
 													<DocumentViewer
 														title={doc.title}
 														content={doc.content}
 														trigger={
 															<button
 																type="button"
-																className="text-left font-medium text-foreground/90 hover:text-primary transition-colors cursor-pointer bg-transparent border-0 p-0"
+																className="block w-full text-left text-sm text-foreground hover:text-foreground transition-colors cursor-pointer bg-transparent border-0 p-0 truncate"
 																onClick={(e) => {
 																	// Ctrl (Win/Linux) or Cmd (Mac) + Click opens metadata
 																	if (e.ctrlKey || e.metaKey) {
@@ -305,46 +463,44 @@ export function DocumentsTableShell({
 																	}
 																}}
 															>
-																{title.length > 50 ? (
-																	<Tooltip>
-																		<TooltipTrigger asChild>
-																			<span>{truncatedTitle}</span>
-																		</TooltipTrigger>
-																		<TooltipContent side="top" className="max-w-xs">
-																			<p className="break-words">{title}</p>
-																		</TooltipContent>
-																	</Tooltip>
-																) : (
-																	title
-																)}
+																<TruncatedText text={title} className="truncate block" />
 															</button>
 														}
 													/>
 												</TableCell>
 												{columnVisibility.document_type && (
-													<TableCell className="w-[160px] py-3">
+													<TableCell className="w-44 py-2.5 border-r border-border/30">
 														<DocumentTypeChip type={doc.document_type} />
 													</TableCell>
 												)}
 												{columnVisibility.created_by && (
-													<TableCell className="w-[150px] py-3 text-sm text-muted-foreground truncate">
+													<TableCell className="w-36 py-2.5 text-sm text-foreground truncate border-r border-border/30">
 														{doc.created_by_name || "—"}
 													</TableCell>
 												)}
 												{columnVisibility.created_at && (
-													<TableCell className="w-[150px] py-3 text-sm text-muted-foreground">
-														{formatDate(doc.created_at)}
+													<TableCell className="w-32 py-2.5 text-sm text-foreground border-r border-border/30">
+														<Tooltip>
+															<TooltipTrigger asChild>
+																<span className="cursor-default">{formatRelativeDate(doc.created_at)}</span>
+															</TooltipTrigger>
+															<TooltipContent side="top">
+																{formatAbsoluteDate(doc.created_at)}
+															</TooltipContent>
+														</Tooltip>
 													</TableCell>
 												)}
-												<TableCell className="w-[80px] pr-4 py-3">
-													<RowActions
-														document={doc}
-														deleteDocument={deleteDocument}
-														refreshDocuments={async () => {
-															await onRefresh();
-														}}
-														searchSpaceId={searchSpaceId as string}
-													/>
+												<TableCell className="w-10 py-2.5 px-0">
+													<div className="flex justify-center">
+														<RowActions
+															document={doc}
+															deleteDocument={deleteDocument}
+															refreshDocuments={async () => {
+																await onRefresh();
+															}}
+															searchSpaceId={searchSpaceId as string}
+														/>
+													</div>
 												</TableCell>
 											</motion.tr>
 										);
@@ -354,8 +510,8 @@ export function DocumentsTableShell({
 						</div>
 					</div>
 
-					{/* Mobile Card View */}
-					<div className="md:hidden divide-y divide-border/30">
+					{/* Mobile Card View - Notion Style */}
+					<div className="md:hidden divide-y divide-border/30 h-[50vh] overflow-auto">
 						{sorted.map((doc, index) => {
 							const isSelected = selectedIds.has(doc.id);
 							return (
@@ -363,25 +519,25 @@ export function DocumentsTableShell({
 									key={doc.id}
 									initial={{ opacity: 0 }}
 									animate={{ opacity: 1, transition: { delay: index * 0.03 } }}
-									className={`p-4 transition-colors ${
-										isSelected ? "bg-primary/5" : "hover:bg-muted/30"
+									className={`px-4 py-3 transition-colors ${
+										isSelected ? "bg-primary/5" : "hover:bg-muted/20"
 									}`}
 								>
-									<div className="flex items-start gap-3">
+									<div className="flex items-center gap-3">
 										<Checkbox
 											checked={isSelected}
 											onCheckedChange={(v) => toggleOne(doc.id, !!v)}
 											aria-label="Select row"
-											className="mt-0.5 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
+											className="border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary"
 										/>
-										<div className="flex-1 min-w-0 space-y-2">
+										<div className="flex-1 min-w-0 space-y-1.5">
 											<DocumentViewer
 												title={doc.title}
 												content={doc.content}
 												trigger={
 													<button
 														type="button"
-														className="text-left font-medium text-sm text-foreground/90 hover:text-primary transition-colors cursor-pointer truncate block w-full bg-transparent border-0 p-0"
+														className="text-left text-sm text-foreground hover:text-foreground transition-colors cursor-pointer truncate block w-full bg-transparent border-0 p-0"
 														onClick={(e) => {
 															// Ctrl (Win/Linux) or Cmd (Mac) + Click opens metadata
 															if (e.ctrlKey || e.metaKey) {
@@ -405,14 +561,21 @@ export function DocumentsTableShell({
 											<div className="flex flex-wrap items-center gap-2">
 												<DocumentTypeChip type={doc.document_type} />
 												{columnVisibility.created_by && doc.created_by_name && (
-													<span className="text-xs text-muted-foreground">
+													<span className="text-xs text-foreground">
 														{doc.created_by_name}
 													</span>
 												)}
 												{columnVisibility.created_at && (
-													<span className="text-xs text-muted-foreground">
-														{formatDate(doc.created_at)}
-													</span>
+													<Tooltip>
+														<TooltipTrigger asChild>
+															<span className="text-xs text-foreground cursor-default">
+																{formatRelativeDate(doc.created_at)}
+															</span>
+														</TooltipTrigger>
+														<TooltipContent side="top">
+															{formatAbsoluteDate(doc.created_at)}
+														</TooltipContent>
+													</Tooltip>
 												)}
 											</div>
 										</div>
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
index cc6ed3fe8..3fd4dcac8 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
@@ -1,7 +1,6 @@
 "use client";
 
 import { MoreHorizontal, Pencil, Trash2 } from "lucide-react";
-import { motion } from "motion/react";
 import { useRouter } from "next/navigation";
 import { useState } from "react";
 import { toast } from "sonner";
@@ -21,7 +20,6 @@ import {
 	DropdownMenuItem,
 	DropdownMenuTrigger,
 } from "@/components/ui/dropdown-menu";
-import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 import type { Document } from "./types";
 
 // Only FILE and NOTE document types can be edited
@@ -74,88 +72,93 @@ export function RowActions({
 	};
 
 	return (
-		<div className="flex items-center justify-end gap-1">
+		<>
 			{/* Desktop Actions */}
-			<div className="hidden md:flex items-center gap-1">
-				{isEditable && (
-					<Tooltip>
-						<TooltipTrigger asChild>
-							<motion.div
-								whileHover={{ scale: 1.1 }}
-								whileTap={{ scale: 0.95 }}
-								transition={{ type: "spring", stiffness: 400, damping: 17 }}
-							>
-								<Button
-									variant="ghost"
-									size="icon"
-									className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80"
-									onClick={handleEdit}
-								>
-									<Pencil className="h-4 w-4" />
-									<span className="sr-only">Edit Document</span>
-								</Button>
-							</motion.div>
-						</TooltipTrigger>
-						<TooltipContent side="top">
-							<p>Edit Document</p>
-						</TooltipContent>
-					</Tooltip>
-				)}
-
-				{isDeletable && (
-					<Tooltip>
-						<TooltipTrigger asChild>
-							<motion.div
-								whileHover={{ scale: 1.1 }}
-								whileTap={{ scale: 0.95 }}
-								transition={{ type: "spring", stiffness: 400, damping: 17 }}
-							>
-								<Button
-									variant="ghost"
-									size="icon"
-									className="h-8 w-8 text-muted-foreground hover:text-destructive hover:bg-destructive/10"
-									onClick={() => setIsDeleteOpen(true)}
-									disabled={isDeleting}
-								>
-									<Trash2 className="h-4 w-4" />
-									<span className="sr-only">Delete</span>
-								</Button>
-							</motion.div>
-						</TooltipTrigger>
-						<TooltipContent side="top">
-							<p>Delete</p>
-						</TooltipContent>
-					</Tooltip>
-				)}
-			</div>
-
-			{/* Mobile Actions Dropdown */}
-			<div className="flex md:hidden">
-				<DropdownMenu>
-					<DropdownMenuTrigger asChild>
-						<Button variant="ghost" size="icon" className="h-8 w-8 text-muted-foreground">
-							<MoreHorizontal className="h-4 w-4" />
-							<span className="sr-only">Open menu</span>
-						</Button>
-					</DropdownMenuTrigger>
-					<DropdownMenuContent align="end" className="w-40">
-						{isEditable && (
+			<div className="hidden md:inline-flex items-center justify-center">
+				{isEditable ? (
+					// Editable documents: show 3-dot dropdown with edit + delete
+					<DropdownMenu>
+						<DropdownMenuTrigger asChild>
+							<Button variant="ghost" size="icon" className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80">
+								<MoreHorizontal className="h-4 w-4" />
+								<span className="sr-only">Open menu</span>
+							</Button>
+						</DropdownMenuTrigger>
+						<DropdownMenuContent align="end" className="w-40">
 							<DropdownMenuItem onClick={handleEdit}>
 								<Pencil className="mr-2 h-4 w-4" />
 								<span>Edit</span>
 							</DropdownMenuItem>
-						)}
-						{isDeletable && (
-							<DropdownMenuItem
-								onClick={() => setIsDeleteOpen(true)}
-								className="text-destructive focus:text-destructive"
-							>
-								<Trash2 className="mr-2 h-4 w-4" />
-								<span>Delete</span>
+							{isDeletable && (
+								<DropdownMenuItem
+									onClick={() => setIsDeleteOpen(true)}
+									className="text-destructive focus:text-destructive"
+								>
+									<Trash2 className="mr-2 h-4 w-4" />
+									<span>Delete</span>
+								</DropdownMenuItem>
+							)}
+						</DropdownMenuContent>
+					</DropdownMenu>
+				) : (
+					// Non-editable documents: show only delete button directly
+					isDeletable && (
+						<Button
+							variant="ghost"
+							size="icon"
+							className="h-8 w-8 text-muted-foreground hover:text-destructive hover:bg-destructive/10"
+							onClick={() => setIsDeleteOpen(true)}
+							disabled={isDeleting}
+						>
+							<Trash2 className="h-4 w-4" />
+							<span className="sr-only">Delete</span>
+						</Button>
+					)
+				)}
+			</div>
+
+			{/* Mobile Actions Dropdown */}
+			<div className="inline-flex md:hidden items-center justify-center">
+				{isEditable ? (
+					// Editable documents: show 3-dot dropdown
+					<DropdownMenu>
+						<DropdownMenuTrigger asChild>
+							<Button variant="ghost" size="icon" className="h-8 w-8 text-muted-foreground">
+								<MoreHorizontal className="h-4 w-4" />
+								<span className="sr-only">Open menu</span>
+							</Button>
+						</DropdownMenuTrigger>
+						<DropdownMenuContent align="end" className="w-40">
+							<DropdownMenuItem onClick={handleEdit}>
+								<Pencil className="mr-2 h-4 w-4" />
+								<span>Edit</span>
 							</DropdownMenuItem>
-						)}
-					</DropdownMenuContent>
-				</DropdownMenu>
+							{isDeletable && (
+								<DropdownMenuItem
+									onClick={() => setIsDeleteOpen(true)}
+									className="text-destructive focus:text-destructive"
+								>
+									<Trash2 className="mr-2 h-4 w-4" />
+									<span>Delete</span>
+								</DropdownMenuItem>
+							)}
+						</DropdownMenuContent>
+					</DropdownMenu>
+				) : (
+					// Non-editable documents: show only delete button directly
+					isDeletable && (
+						<Button
+							variant="ghost"
+							size="icon"
+							className="h-8 w-8 text-muted-foreground hover:text-destructive hover:bg-destructive/10"
+							onClick={() => setIsDeleteOpen(true)}
+							disabled={isDeleting}
+						>
+							<Trash2 className="h-4 w-4" />
+							<span className="sr-only">Delete</span>
+						</Button>
+					)
+				)}
 			</div>
 
 			<AlertDialog open={isDeleteOpen} onOpenChange={setIsDeleteOpen}>
@@ -178,6 +181,6 @@ export function RowActions({
 					</AlertDialogFooter>
 				</AlertDialogContent>
 			</AlertDialog>
-		</div>
+		</>
 	);
 }

From ab2896ec657ae271ec901d17cd897adc9ae14fd4 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Wed, 4 Feb 2026 17:27:40 +0530
Subject: [PATCH 03/36] refactor: remove redundant document deletion logic and
 optimize cache updates

---
 .../components/DocumentsTableShell.tsx        | 45 ++-----------------
 .../(manage)/components/RowActions.tsx        |  8 ++--
 .../documents/(manage)/page.tsx               | 18 +-------
 .../documents/document-mutation.atoms.ts      |  2 +-
 4 files changed, 9 insertions(+), 64 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index f23893fbe..3c9ef2cbc 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -3,7 +3,6 @@
 import { formatDistanceToNow } from "date-fns";
 import { Calendar, ChevronDown, ChevronUp, FileText, FileX, Link2, Plus, User } from "lucide-react";
 import { motion } from "motion/react";
-import { useParams } from "next/navigation";
 import { useTranslations } from "next-intl";
 import React, { useRef, useState, useEffect } from "react";
 import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
@@ -22,7 +21,6 @@ import {
 } from "@/components/ui/table";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 import { DocumentTypeChip } from "./DocumentTypeIcon";
-import { RowActions } from "./RowActions";
 import type { ColumnVisibility, Document } from "./types";
 
 export type SortKey = keyof Pick<Document, "title" | "document_type" | "created_at">;
@@ -134,7 +132,6 @@ export function DocumentsTableShell({
 	selectedIds,
 	setSelectedIds,
 	columnVisibility,
-	deleteDocument,
 	sortKey,
 	sortDesc,
 	onSortChange,
@@ -146,14 +143,11 @@ export function DocumentsTableShell({
 	selectedIds: Set<number>;
 	setSelectedIds: (update: Set<number>) => void;
 	columnVisibility: ColumnVisibility;
-	deleteDocument: (id: number) => Promise<boolean>;
 	sortKey: SortKey;
 	sortDesc: boolean;
 	onSortChange: (key: SortKey) => void;
 }) {
 	const t = useTranslations("documents");
-	const params = useParams();
-	const searchSpaceId = params.search_space_id;
 	const { openDialog } = useDocumentUploadDialog();
 
 	// State for metadata viewer (opened via Ctrl/Cmd+Click)
@@ -222,13 +216,10 @@ export function DocumentsTableShell({
 										</TableHead>
 									)}
 									{columnVisibility.created_at && (
-										<TableHead className="w-32 border-r border-border/30">
+										<TableHead className="w-32">
 											<Skeleton className="h-3 w-16" />
 										</TableHead>
 									)}
-									<TableHead className="w-10 text-center">
-										<span className="sr-only">Actions</span>
-									</TableHead>
 								</TableRow>
 							</TableHeader>
 						</Table>
@@ -262,15 +253,10 @@ export function DocumentsTableShell({
 												</TableCell>
 											)}
 											{columnVisibility.created_at && (
-												<TableCell className="w-32 py-2.5 border-r border-border/30">
+												<TableCell className="w-32 py-2.5">
 													<Skeleton className="h-4 w-20" />
 												</TableCell>
 											)}
-											<TableCell className="w-10 py-2.5 px-0">
-												<div className="flex justify-center">
-													<Skeleton className="h-7 w-7 rounded" />
-												</div>
-											</TableCell>
 										</TableRow>
 									))}
 								</TableBody>
@@ -387,7 +373,7 @@ export function DocumentsTableShell({
 										</TableHead>
 									)}
 									{columnVisibility.created_at && (
-										<TableHead className="w-32 border-r border-border/30">
+										<TableHead className="w-32">
 											<SortableHeader
 												sortKey="created_at"
 												currentSortKey={sortKey}
@@ -399,9 +385,6 @@ export function DocumentsTableShell({
 											</SortableHeader>
 										</TableHead>
 									)}
-									<TableHead className="w-10 text-center">
-										<span className="sr-only">Actions</span>
-									</TableHead>
 								</TableRow>
 							</TableHeader>
 						</Table>
@@ -479,7 +462,7 @@ export function DocumentsTableShell({
 													</TableCell>
 												)}
 												{columnVisibility.created_at && (
-													<TableCell className="w-32 py-2.5 text-sm text-foreground border-r border-border/30">
+													<TableCell className="w-32 py-2.5 text-sm text-foreground">
 														<Tooltip>
 															<TooltipTrigger asChild>
 																<span className="cursor-default">{formatRelativeDate(doc.created_at)}</span>
@@ -490,18 +473,6 @@ export function DocumentsTableShell({
 														</Tooltip>
 													</TableCell>
 												)}
-												<TableCell className="w-10 py-2.5 px-0">
-													<div className="flex justify-center">
-														<RowActions
-															document={doc}
-															deleteDocument={deleteDocument}
-															refreshDocuments={async () => {
-																await onRefresh();
-															}}
-															searchSpaceId={searchSpaceId as string}
-														/>
-													</div>
-												</TableCell>
 											</motion.tr>
 										);
 									})}
@@ -579,14 +550,6 @@ export function DocumentsTableShell({
 												)}
 											</div>
 										</div>
-										<RowActions
-											document={doc}
-											deleteDocument={deleteDocument}
-											refreshDocuments={async () => {
-												await onRefresh();
-											}}
-											searchSpaceId={searchSpaceId as string}
-										/>
 									</div>
 								</motion.div>
 							);
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
index 3fd4dcac8..88077581c 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
@@ -31,12 +31,10 @@ const NON_DELETABLE_DOCUMENT_TYPES = ["SURFSENSE_DOCS"] as const;
 export function RowActions({
 	document,
 	deleteDocument,
-	refreshDocuments,
 	searchSpaceId,
 }: {
 	document: Document;
 	deleteDocument: (id: number) => Promise<boolean>;
-	refreshDocuments: () => Promise<void>;
 	searchSpaceId: string;
 }) {
 	const [isDeleteOpen, setIsDeleteOpen] = useState(false);
@@ -55,9 +53,9 @@ export function RowActions({
 		setIsDeleting(true);
 		try {
 			const ok = await deleteDocument(document.id);
-			if (ok) toast.success("Document deleted successfully");
-			else toast.error("Failed to delete document");
-			await refreshDocuments();
+			if (!ok) toast.error("Failed to delete document");
+			// Note: Success toast is handled by the mutation atom's onSuccess callback
+			// Cache is updated optimistically by the mutation, no need to refresh
 		} catch (error) {
 			console.error("Error deleting document:", error);
 			toast.error("Failed to delete document");
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
index 269c2ca2f..0e08f7500 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@@ -187,20 +187,6 @@ export default function DocumentsTable() {
 		}
 	}, [debouncedSearch, refetchSearch, refetchDocuments, t, isRefreshing]);
 
-	// Create a delete function for single document deletion
-	const deleteDocument = useCallback(
-		async (id: number) => {
-			try {
-				await deleteDocumentMutation({ id });
-				return true;
-			} catch (error) {
-				console.error("Failed to delete document:", error);
-				return false;
-			}
-		},
-		[deleteDocumentMutation]
-	);
-
 	const onBulkDelete = async () => {
 		if (selectedIds.size === 0) {
 			toast.error(t("no_rows_selected"));
@@ -222,8 +208,7 @@ export default function DocumentsTable() {
 			if (okCount === selectedIds.size)
 				toast.success(t("delete_success_count", { count: okCount }));
 			else toast.error(t("delete_partial_failed"));
-			// Refetch the current page with appropriate method
-			await refreshCurrentView();
+			// Note: No need to call refreshCurrentView() - the mutation already updates the cache
 			setSelectedIds(new Set());
 		} catch (e) {
 			console.error(e);
@@ -282,7 +267,6 @@ export default function DocumentsTable() {
 				selectedIds={selectedIds}
 				setSelectedIds={setSelectedIds}
 				columnVisibility={columnVisibility}
-				deleteDocument={deleteDocument}
 				sortKey={sortKey}
 				sortDesc={sortDesc}
 				onSortChange={handleSortChange}
diff --git a/surfsense_web/atoms/documents/document-mutation.atoms.ts b/surfsense_web/atoms/documents/document-mutation.atoms.ts
index 09e127735..ce077cd4a 100644
--- a/surfsense_web/atoms/documents/document-mutation.atoms.ts
+++ b/surfsense_web/atoms/documents/document-mutation.atoms.ts
@@ -95,7 +95,7 @@ export const deleteDocumentMutationAtom = atomWithMutation((get) => {
 		},
 
 		onSuccess: (_, request: DeleteDocumentRequest) => {
-			toast.success("Document deleted successfully");
+			// Note: Toast is handled by the caller (page.tsx onBulkDelete) to show count info
 			queryClient.setQueryData(
 				cacheKeys.documents.globalQueryParams(documentsQueryParams),
 				(oldData: GetDocumentsResponse | undefined) => {

From 773b610009c27a10562af87f01d6adc5ed7d756b Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Wed, 4 Feb 2026 18:18:43 +0530
Subject: [PATCH 04/36] feat: update DocumentsTableShell to replace Link2 icon
 with Network icon for improved clarity

---
 .../documents/(manage)/components/DocumentsTableShell.tsx     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index 3c9ef2cbc..3f45322ef 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -1,7 +1,7 @@
 "use client";
 
 import { formatDistanceToNow } from "date-fns";
-import { Calendar, ChevronDown, ChevronUp, FileText, FileX, Link2, Plus, User } from "lucide-react";
+import {Calendar, ChevronDown, ChevronUp, FileText, FileX, Network, Plus, User } from "lucide-react";
 import { motion } from "motion/react";
 import { useTranslations } from "next-intl";
 import React, { useRef, useState, useEffect } from "react";
@@ -358,7 +358,7 @@ export function DocumentsTableShell({
 												currentSortKey={sortKey}
 												sortDesc={sortDesc}
 												onSort={onSortHeader}
-												icon={<Link2 size={14} className="text-muted-foreground" />}
+												icon={<Network size={14} className="text-muted-foreground" />}
 											>
 												Source
 											</SortableHeader>

From 6989059e94432d0ed73794b40a4cb79e1d72fc59 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Wed, 4 Feb 2026 19:08:25 +0530
Subject: [PATCH 05/36] feat: add OBSIDIAN_CONNECTOR to document type
 enumeration

---
 surfsense_web/contracts/types/document.types.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/surfsense_web/contracts/types/document.types.ts b/surfsense_web/contracts/types/document.types.ts
index 01a58173e..c181119d3 100644
--- a/surfsense_web/contracts/types/document.types.ts
+++ b/surfsense_web/contracts/types/document.types.ts
@@ -23,6 +23,7 @@ export const documentTypeEnum = z.enum([
 	"ELASTICSEARCH_CONNECTOR",
 	"BOOKSTACK_CONNECTOR",
 	"CIRCLEBACK",
+	"OBSIDIAN_CONNECTOR",
 	"SURFSENSE_DOCS",
 	"NOTE",
 	"COMPOSIO_GOOGLE_DRIVE_CONNECTOR",

From dec85b641761cfb9e28b289fffc2ac3822088507 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Wed, 4 Feb 2026 19:58:47 +0530
Subject: [PATCH 06/36] fix: stabilize sync process by rounding cutoff date to
 midnight UTC and update cleanup logic for user databases

---
 surfsense_web/hooks/use-inbox.ts     |  4 ++
 surfsense_web/lib/electric/client.ts | 82 +++++++++++++++++++++-------
 2 files changed, 67 insertions(+), 19 deletions(-)

diff --git a/surfsense_web/hooks/use-inbox.ts b/surfsense_web/hooks/use-inbox.ts
index 362feb747..56ddb46a4 100644
--- a/surfsense_web/hooks/use-inbox.ts
+++ b/surfsense_web/hooks/use-inbox.ts
@@ -38,10 +38,14 @@ function deduplicateAndSort(items: InboxItem[]): InboxItem[] {
 
 /**
  * Calculate the cutoff date for sync window
+ * IMPORTANT: Rounds to the start of the day (midnight UTC) to ensure stable values
+ * across re-renders. Without this, millisecond differences cause multiple syncs!
  */
 function getSyncCutoffDate(): string {
 	const cutoff = new Date();
 	cutoff.setDate(cutoff.getDate() - SYNC_WINDOW_DAYS);
+	// Round to start of day to prevent millisecond differences causing duplicate syncs
+	cutoff.setUTCHours(0, 0, 0, 0);
 	return cutoff.toISOString();
 }
 
diff --git a/surfsense_web/lib/electric/client.ts b/surfsense_web/lib/electric/client.ts
index 177a66d28..04f76a7f2 100644
--- a/surfsense_web/lib/electric/client.ts
+++ b/surfsense_web/lib/electric/client.ts
@@ -12,7 +12,7 @@
  * 3. Works even if logout cleanup fails
  */
 
-import { PGlite } from "@electric-sql/pglite";
+import { PGlite, type Transaction } from "@electric-sql/pglite";
 import { live } from "@electric-sql/pglite/live";
 import { electricSync } from "@electric-sql/pglite-sync";
 
@@ -56,7 +56,10 @@ const pendingSyncs = new Map<string, Promise<SyncHandle>>();
 // v2: user-specific database architecture
 // v3: consistent cutoff date for sync+queries, visibility refresh support
 // v4: heartbeat-based stale notification detection with updated_at tracking
-const SYNC_VERSION = 4;
+// v5: fixed duplicate key errors (root cause: unstable cutoff dates in use-inbox.ts)
+//     - added onMustRefetch handler for server-side refetch scenarios
+//     - fixed getSyncCutoffDate to use stable midnight UTC timestamps
+const SYNC_VERSION = 5;
 
 // Database name prefix for identifying SurfSense databases
 const DB_PREFIX = "surfsense-";
@@ -77,7 +80,7 @@ function getDbName(userId: string): string {
 }
 
 /**
- * Clean up databases from OTHER users (not the current user)
+ * Clean up databases from OTHER users AND old versions
  * This is called on login to ensure clean state
  */
 async function cleanupOtherUserDatabases(currentUserId: string): Promise<void> {
@@ -85,6 +88,10 @@ async function cleanupOtherUserDatabases(currentUserId: string): Promise<void> {
 		return;
 	}
 
+	// The exact database identifier we want to keep (current user + current version)
+	// Format: "surfsense-{userId}-v{version}"
+	const currentDbIdentifier = `${DB_PREFIX}${currentUserId}-v${SYNC_VERSION}`;
+
 	try {
 		// Try to list all databases (not supported in all browsers)
 		if (typeof window.indexedDB.databases === "function") {
@@ -95,14 +102,15 @@ async function cleanupOtherUserDatabases(currentUserId: string): Promise<void> {
 				if (!dbName) continue;
 
 				// Check if this is a SurfSense database
-				if (dbName.startsWith(DB_PREFIX) || dbName.includes("surfsense")) {
-					// Don't delete current user's database
-					if (dbName.includes(currentUserId)) {
-						console.log(`[Electric] Keeping current user's database: ${dbName}`);
+				if (dbName.includes("surfsense")) {
+					// Check if this is the current database
+					// PGlite stores with "/pglite/" prefix, so we check if the name ENDS WITH our identifier
+					if (dbName.endsWith(currentDbIdentifier)) {
+						console.log(`[Electric] Keeping current database: ${dbName}`);
 						continue;
 					}
 
-					// Delete databases from other users
+					// Delete ALL other databases (other users OR old versions of current user)
 					try {
 						console.log(`[Electric] Deleting stale database: ${dbName}`);
 						window.indexedDB.deleteDatabase(dbName);
@@ -413,7 +421,22 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								}, 5000);
 							});
 
-							// Include userId in shapeKey for user-specific sync state
+							// ROOT CAUSE FIX: The duplicate key errors were caused by unstable cutoff dates
+							// in use-inbox.ts generating different sync keys on each render.
+							// That's now fixed (rounded to midnight UTC in getSyncCutoffDate).
+							// We can safely use shapeKey for fast incremental sync.
+							
+							const shapeKey = `${userId}_v${SYNC_VERSION}_${table}_${where?.replace(/[^a-zA-Z0-9]/g, "_") || "all"}`;
+							
+							// Type assertion to PGlite with electric extension
+							const pgWithElectric = db as unknown as {
+								electric: {
+									syncShapeToTable: (
+										config: Record<string, unknown>
+									) => Promise<{ unsubscribe: () => void; isUpToDate: boolean; stream: unknown }>;
+								};
+							};
+
 							const shapeConfig = {
 								shape: {
 									url: `${electricUrl}/v1/shape`,
@@ -425,7 +448,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								},
 								table,
 								primaryKey,
-								shapeKey: `${userId}_v${SYNC_VERSION}_${table}_${where?.replace(/[^a-zA-Z0-9]/g, "_") || "all"}`, // User-specific versioned key
+								shapeKey, // Re-enabled for fast incremental sync (root cause in use-inbox.ts is fixed)
 								onInitialSync: () => {
 									console.log(
 										`[Electric] ✅ Initial sync complete for ${table} - data should now be in PGlite`
@@ -440,6 +463,36 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 									);
 									rejectInitialSync(error);
 								},
+								// Handle must-refetch: clear table data before Electric re-inserts from scratch
+								// This prevents "duplicate key" errors when the shape is invalidated
+								onMustRefetch: async (tx: Transaction) => {
+									console.log(
+										`[Electric] ⚠️ Must refetch triggered for ${table} - clearing existing data`
+									);
+									try {
+										// Delete rows matching the shape's WHERE clause
+										// If no WHERE clause, delete all rows from the table
+										if (validatedWhere) {
+											// Parse the WHERE clause to build a DELETE statement
+											// The WHERE clause is already validated and formatted
+											await tx.exec(`DELETE FROM ${table} WHERE ${validatedWhere}`);
+											console.log(
+												`[Electric] 🗑️ Cleared ${table} rows matching: ${validatedWhere}`
+											);
+										} else {
+											// No WHERE clause means we're syncing the entire table
+											await tx.exec(`DELETE FROM ${table}`);
+											console.log(`[Electric] 🗑️ Cleared all rows from ${table}`);
+										}
+									} catch (cleanupError) {
+										console.error(
+											`[Electric] ❌ Failed to clear ${table} during must-refetch:`,
+											cleanupError
+										);
+										// Re-throw to let Electric handle the error
+										throw cleanupError;
+									}
+								},
 							};
 
 							console.log(
@@ -447,15 +500,6 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								JSON.stringify(shapeConfig, null, 2)
 							);
 
-							// Type assertion to PGlite with electric extension
-							const pgWithElectric = db as PGlite & {
-								electric: {
-									syncShapeToTable: (
-										config: typeof shapeConfig
-									) => Promise<{ unsubscribe: () => void; isUpToDate: boolean; stream: unknown }>;
-								};
-							};
-
 							let shape: { unsubscribe: () => void; isUpToDate: boolean; stream: unknown };
 							try {
 								shape = await pgWithElectric.electric.syncShapeToTable(shapeConfig);

From d0a490fa50a8eb846d08074b80640f6c7d8601fe Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Wed, 4 Feb 2026 20:08:13 +0530
Subject: [PATCH 07/36] feat: implement debug logging for development
 environment in Electric client

---
 surfsense_web/lib/electric/client.ts | 113 +++++++++++++++------------
 1 file changed, 62 insertions(+), 51 deletions(-)

diff --git a/surfsense_web/lib/electric/client.ts b/surfsense_web/lib/electric/client.ts
index 04f76a7f2..7ef8f7bbf 100644
--- a/surfsense_web/lib/electric/client.ts
+++ b/surfsense_web/lib/electric/client.ts
@@ -16,6 +16,17 @@ import { PGlite, type Transaction } from "@electric-sql/pglite";
 import { live } from "@electric-sql/pglite/live";
 import { electricSync } from "@electric-sql/pglite-sync";
 
+// Debug logging - only logs in development, silent in production
+const IS_DEV = process.env.NODE_ENV === "development";
+
+function debugLog(...args: unknown[]) {
+	if (IS_DEV) console.log(...args);
+}
+
+function debugWarn(...args: unknown[]) {
+	if (IS_DEV) console.warn(...args);
+}
+
 // Types
 export interface ElectricClient {
 	db: PGlite;
@@ -106,23 +117,23 @@ async function cleanupOtherUserDatabases(currentUserId: string): Promise<void> {
 					// Check if this is the current database
 					// PGlite stores with "/pglite/" prefix, so we check if the name ENDS WITH our identifier
 					if (dbName.endsWith(currentDbIdentifier)) {
-						console.log(`[Electric] Keeping current database: ${dbName}`);
+						debugLog(`[Electric] Keeping current database: ${dbName}`);
 						continue;
 					}
 
 					// Delete ALL other databases (other users OR old versions of current user)
 					try {
-						console.log(`[Electric] Deleting stale database: ${dbName}`);
+						debugLog(`[Electric] Deleting stale database: ${dbName}`);
 						window.indexedDB.deleteDatabase(dbName);
 					} catch (deleteErr) {
-						console.warn(`[Electric] Failed to delete database ${dbName}:`, deleteErr);
+						debugWarn(`[Electric] Failed to delete database ${dbName}:`, deleteErr);
 					}
 				}
 			}
 		}
 	} catch (err) {
 		// indexedDB.databases() not supported - that's okay, login cleanup is best-effort
-		console.warn("[Electric] Could not enumerate databases for cleanup:", err);
+		debugWarn("[Electric] Could not enumerate databases for cleanup:", err);
 	}
 }
 
@@ -148,7 +159,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 
 	// If initialized for a different user, close the old client first
 	if (electricClient && currentUserId !== userId) {
-		console.log(`[Electric] User changed from ${currentUserId} to ${userId}, reinitializing...`);
+		debugLog(`[Electric] User changed from ${currentUserId} to ${userId}, reinitializing...`);
 		await cleanupElectric();
 	}
 
@@ -163,12 +174,12 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 	initPromise = (async () => {
 		try {
 			// STEP 1: Clean up databases from other users (login-time cleanup)
-			console.log("[Electric] Cleaning up databases from other users...");
+			debugLog("[Electric] Cleaning up databases from other users...");
 			await cleanupOtherUserDatabases(userId);
 
 			// STEP 2: Create user-specific PGlite database
 			const dbName = getDbName(userId);
-			console.log(`[Electric] Initializing database: ${dbName}`);
+			debugLog(`[Electric] Initializing database: ${dbName}`);
 
 			const db = await PGlite.create({
 				dataDir: dbName,
@@ -298,14 +309,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 					// Check if we already have an active sync for this shape (memory optimization)
 					const existingHandle = activeSyncHandles.get(cacheKey);
 					if (existingHandle) {
-						console.log(`[Electric] Reusing existing sync handle for: ${cacheKey}`);
+						debugLog(`[Electric] Reusing existing sync handle for: ${cacheKey}`);
 						return existingHandle;
 					}
 
 					// Check if there's already a pending sync for this shape (prevent race condition)
 					const pendingSync = pendingSyncs.get(cacheKey);
 					if (pendingSync) {
-						console.log(`[Electric] Waiting for pending sync to complete: ${cacheKey}`);
+						debugLog(`[Electric] Waiting for pending sync to complete: ${cacheKey}`);
 						return pendingSync;
 					}
 
@@ -331,7 +342,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 
 								if (singleQuoteCount % 2 !== 0) {
 									// Odd number of quotes means unterminated string literal
-									console.warn("Where clause has unmatched quotes, fixing:", where);
+									debugWarn("Where clause has unmatched quotes, fixing:", where);
 									// Add closing quote at the end
 									validatedWhere = `${where}'`;
 									params.where = validatedWhere;
@@ -345,15 +356,15 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 
 						if (columns) params.columns = columns.join(",");
 
-						console.log("[Electric] Syncing shape with params:", params);
-						console.log("[Electric] Electric URL:", `${electricUrl}/v1/shape`);
-						console.log("[Electric] Where clause:", where, "Validated:", validatedWhere);
+						debugLog("[Electric] Syncing shape with params:", params);
+						debugLog("[Electric] Electric URL:", `${electricUrl}/v1/shape`);
+						debugLog("[Electric] Where clause:", where, "Validated:", validatedWhere);
 
 						try {
 							// Debug: Test Electric SQL connection directly first (DEV ONLY - skipped in production)
 							if (process.env.NODE_ENV === "development") {
 								const testUrl = `${electricUrl}/v1/shape?table=${table}&offset=-1${validatedWhere ? `&where=${encodeURIComponent(validatedWhere)}` : ""}`;
-								console.log("[Electric] Testing Electric SQL directly:", testUrl);
+								debugLog("[Electric] Testing Electric SQL directly:", testUrl);
 								try {
 									const testResponse = await fetch(testUrl);
 									const testHeaders = {
@@ -361,9 +372,9 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 										offset: testResponse.headers.get("electric-offset"),
 										upToDate: testResponse.headers.get("electric-up-to-date"),
 									};
-									console.log("[Electric] Direct Electric SQL response headers:", testHeaders);
+									debugLog("[Electric] Direct Electric SQL response headers:", testHeaders);
 									const testData = await testResponse.json();
-									console.log(
+									debugLog(
 										"[Electric] Direct Electric SQL data count:",
 										Array.isArray(testData) ? testData.length : "not array",
 										testData
@@ -404,14 +415,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								// Shorter timeout (5 seconds) as fallback
 								setTimeout(() => {
 									if (!syncResolved) {
-										console.warn(
+										debugWarn(
 											`[Electric] ⚠️ Sync timeout for ${table} - checking isUpToDate one more time...`
 										);
 										// Check isUpToDate one more time before resolving
 										// This will be checked after shape is created
 										setTimeout(() => {
 											if (!syncResolved) {
-												console.warn(
+												debugWarn(
 													`[Electric] ⚠️ Sync timeout for ${table} - resolving anyway after 5s`
 												);
 												resolveInitialSync();
@@ -450,7 +461,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								primaryKey,
 								shapeKey, // Re-enabled for fast incremental sync (root cause in use-inbox.ts is fixed)
 								onInitialSync: () => {
-									console.log(
+									debugLog(
 										`[Electric] ✅ Initial sync complete for ${table} - data should now be in PGlite`
 									);
 									resolveInitialSync();
@@ -466,7 +477,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								// Handle must-refetch: clear table data before Electric re-inserts from scratch
 								// This prevents "duplicate key" errors when the shape is invalidated
 								onMustRefetch: async (tx: Transaction) => {
-									console.log(
+									debugLog(
 										`[Electric] ⚠️ Must refetch triggered for ${table} - clearing existing data`
 									);
 									try {
@@ -476,13 +487,13 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 											// Parse the WHERE clause to build a DELETE statement
 											// The WHERE clause is already validated and formatted
 											await tx.exec(`DELETE FROM ${table} WHERE ${validatedWhere}`);
-											console.log(
+											debugLog(
 												`[Electric] 🗑️ Cleared ${table} rows matching: ${validatedWhere}`
 											);
 										} else {
 											// No WHERE clause means we're syncing the entire table
 											await tx.exec(`DELETE FROM ${table}`);
-											console.log(`[Electric] 🗑️ Cleared all rows from ${table}`);
+											debugLog(`[Electric] 🗑️ Cleared all rows from ${table}`);
 										}
 									} catch (cleanupError) {
 										console.error(
@@ -495,7 +506,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								},
 							};
 
-							console.log(
+							debugLog(
 								"[Electric] syncShapeToTable config:",
 								JSON.stringify(shapeConfig, null, 2)
 							);
@@ -508,7 +519,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								const errorMessage =
 									syncError instanceof Error ? syncError.message : String(syncError);
 								if (errorMessage.includes("Already syncing")) {
-									console.warn(
+									debugWarn(
 										`[Electric] Already syncing ${table}, waiting for existing sync to settle...`
 									);
 
@@ -518,12 +529,12 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 									// Check if an active handle now exists (another sync might have completed)
 									const existingHandle = activeSyncHandles.get(cacheKey);
 									if (existingHandle) {
-										console.log(`[Electric] Found existing handle after waiting: ${cacheKey}`);
+										debugLog(`[Electric] Found existing handle after waiting: ${cacheKey}`);
 										return existingHandle;
 									}
 
 									// Retry once after waiting
-									console.log(`[Electric] Retrying sync for ${table}...`);
+									debugLog(`[Electric] Retrying sync for ${table}...`);
 									try {
 										shape = await pgWithElectric.electric.syncShapeToTable(shapeConfig);
 									} catch (retryError) {
@@ -531,12 +542,12 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 											retryError instanceof Error ? retryError.message : String(retryError);
 										if (retryMessage.includes("Already syncing")) {
 											// Still syncing - create a placeholder handle that indicates the table is being synced
-											console.warn(
+											debugWarn(
 												`[Electric] ${table} still syncing, creating placeholder handle`
 											);
 											const placeholderHandle: SyncHandle = {
 												unsubscribe: () => {
-													console.log(`[Electric] Placeholder unsubscribe for: ${cacheKey}`);
+													debugLog(`[Electric] Placeholder unsubscribe for: ${cacheKey}`);
 													activeSyncHandles.delete(cacheKey);
 												},
 												get isUpToDate() {
@@ -560,7 +571,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 							}
 
 							// Log the actual shape result structure
-							console.log("[Electric] Shape sync result (initial):", {
+							debugLog("[Electric] Shape sync result (initial):", {
 								hasUnsubscribe: typeof shape?.unsubscribe === "function",
 								isUpToDate: shape?.isUpToDate,
 								hasStream: !!shape?.stream,
@@ -569,7 +580,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 
 							// Recommended Approach Step 1: Check isUpToDate immediately
 							if (shape.isUpToDate) {
-								console.log(
+								debugLog(
 									`[Electric] ✅ Sync already up-to-date for ${table} (resuming from previous state)`
 								);
 								resolveInitialSync();
@@ -577,7 +588,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								// Recommended Approach Step 2: Subscribe to stream and watch for "up-to-date" message
 								if (shape?.stream) {
 									const stream = shape.stream as any;
-									console.log("[Electric] Shape stream details:", {
+									debugLog("[Electric] Shape stream details:", {
 										shapeHandle: stream?.shapeHandle,
 										lastOffset: stream?.lastOffset,
 										isUpToDate: stream?.isUpToDate,
@@ -590,14 +601,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 									// NOTE: We keep this subscription active - don't unsubscribe!
 									// The stream is what Electric SQL uses for real-time updates
 									if (typeof stream?.subscribe === "function") {
-										console.log(
+										debugLog(
 											"[Electric] Subscribing to shape stream to watch for up-to-date message..."
 										);
 										// Subscribe but don't store unsubscribe - we want it to stay active
 										stream.subscribe((messages: unknown[]) => {
 											// Continue receiving updates even after sync is resolved
 											if (!syncResolved) {
-												console.log(
+												debugLog(
 													"[Electric] 🔵 Shape stream received messages:",
 													messages?.length || 0
 												);
@@ -614,14 +625,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 														(typeof msg === "object" && "up-to-date" in msg)
 													) {
 														if (!syncResolved) {
-															console.log(`[Electric] ✅ Received up-to-date message for ${table}`);
+															debugLog(`[Electric] ✅ Received up-to-date message for ${table}`);
 															resolveInitialSync();
 														}
 														// Continue listening for real-time updates - don't return!
 													}
 												}
 												if (!syncResolved && messages.length > 0) {
-													console.log(
+													debugLog(
 														"[Electric] First message:",
 														JSON.stringify(messages[0], null, 2)
 													);
@@ -630,14 +641,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 
 											// Also check stream's isUpToDate property after receiving messages
 											if (!syncResolved && stream?.isUpToDate) {
-												console.log(`[Electric] ✅ Stream isUpToDate is true for ${table}`);
+												debugLog(`[Electric] ✅ Stream isUpToDate is true for ${table}`);
 												resolveInitialSync();
 											}
 										});
 
 										// Also check stream's isUpToDate property immediately
 										if (stream?.isUpToDate) {
-											console.log(
+											debugLog(
 												`[Electric] ✅ Stream isUpToDate is true immediately for ${table}`
 											);
 											resolveInitialSync();
@@ -652,7 +663,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 										}
 
 										if (shape.isUpToDate || stream?.isUpToDate) {
-											console.log(
+											debugLog(
 												`[Electric] ✅ Sync completed (detected via polling) for ${table}`
 											);
 											clearInterval(pollInterval);
@@ -665,7 +676,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 										clearInterval(pollInterval);
 									});
 								} else {
-									console.warn(
+									debugWarn(
 										`[Electric] ⚠️ No stream available for ${table}, relying on callback and timeout`
 									);
 								}
@@ -674,7 +685,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 							// Create the sync handle with proper cleanup
 							const syncHandle: SyncHandle = {
 								unsubscribe: () => {
-									console.log(`[Electric] Unsubscribing from: ${cacheKey}`);
+									debugLog(`[Electric] Unsubscribing from: ${cacheKey}`);
 									// Remove from cache first
 									activeSyncHandles.delete(cacheKey);
 									// Then unsubscribe from the shape
@@ -692,7 +703,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 
 							// Cache the sync handle for reuse (memory optimization)
 							activeSyncHandles.set(cacheKey, syncHandle);
-							console.log(
+							debugLog(
 								`[Electric] Cached sync handle for: ${cacheKey} (total cached: ${activeSyncHandles.size})`
 							);
 
@@ -704,7 +715,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								const response = await fetch(`${electricUrl}/v1/shape?table=${table}&offset=-1`, {
 									method: "GET",
 								});
-								console.log(
+								debugLog(
 									"[Electric] Electric SQL server response:",
 									response.status,
 									response.statusText
@@ -726,14 +737,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 					// Clean up the pending sync when done (whether success or failure)
 					syncPromise.finally(() => {
 						pendingSyncs.delete(cacheKey);
-						console.log(`[Electric] Pending sync removed for: ${cacheKey}`);
+						debugLog(`[Electric] Pending sync removed for: ${cacheKey}`);
 					});
 
 					return syncPromise;
 				},
 			};
 
-			console.log(`[Electric] ✅ Initialized successfully for user: ${userId}`);
+			debugLog(`[Electric] ✅ Initialized successfully for user: ${userId}`);
 			return electricClient;
 		} catch (error) {
 			console.error("[Electric] Failed to initialize:", error);
@@ -759,10 +770,10 @@ export async function cleanupElectric(): Promise<void> {
 	}
 
 	const userIdToClean = currentUserId;
-	console.log(`[Electric] Cleaning up for user: ${userIdToClean}`);
+	debugLog(`[Electric] Cleaning up for user: ${userIdToClean}`);
 
 	// Unsubscribe from all active sync handles first (memory cleanup)
-	console.log(`[Electric] Unsubscribing from ${activeSyncHandles.size} active sync handles`);
+	debugLog(`[Electric] Unsubscribing from ${activeSyncHandles.size} active sync handles`);
 	// Copy keys to array to avoid mutation during iteration
 	const handleKeys = Array.from(activeSyncHandles.keys());
 	for (const key of handleKeys) {
@@ -771,7 +782,7 @@ export async function cleanupElectric(): Promise<void> {
 			try {
 				handle.unsubscribe();
 			} catch (err) {
-				console.warn(`[Electric] Failed to unsubscribe from ${key}:`, err);
+				debugWarn(`[Electric] Failed to unsubscribe from ${key}:`, err);
 			}
 		}
 	}
@@ -782,7 +793,7 @@ export async function cleanupElectric(): Promise<void> {
 	try {
 		// Close the PGlite database connection
 		await electricClient.db.close();
-		console.log("[Electric] Database closed");
+		debugLog("[Electric] Database closed");
 	} catch (error) {
 		console.error("[Electric] Error closing database:", error);
 	}
@@ -798,13 +809,13 @@ export async function cleanupElectric(): Promise<void> {
 		try {
 			const dbName = `${DB_PREFIX}${userIdToClean}-v${SYNC_VERSION}`;
 			window.indexedDB.deleteDatabase(dbName);
-			console.log(`[Electric] Deleted database: ${dbName}`);
+			debugLog(`[Electric] Deleted database: ${dbName}`);
 		} catch (err) {
-			console.warn("[Electric] Failed to delete database:", err);
+			debugWarn("[Electric] Failed to delete database:", err);
 		}
 	}
 
-	console.log("[Electric] Cleanup complete");
+	debugLog("[Electric] Cleanup complete");
 }
 
 /**

From 5e4dce40bd084fca92bf123077f6d7a765dad46b Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Wed, 4 Feb 2026 20:13:33 +0530
Subject: [PATCH 08/36] feat: add loading state for chat components and
 implement skeleton loading UI

---
 .../layout/providers/LayoutDataProvider.tsx   |  3 +-
 .../layout/ui/shell/LayoutShell.tsx           |  4 +++
 .../layout/ui/sidebar/MobileSidebar.tsx       |  3 ++
 .../components/layout/ui/sidebar/Sidebar.tsx  | 32 +++++++++++++++++--
 4 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
index 2f71adad9..b87cc4883 100644
--- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
+++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
@@ -88,7 +88,7 @@ export function LayoutDataProvider({
 	});
 
 	// Fetch threads (40 total to allow up to 20 per section - shared/private)
-	const { data: threadsData } = useQuery({
+	const { data: threadsData, isPending: isLoadingThreads } = useQuery({
 		queryKey: ["threads", searchSpaceId, { limit: 40 }],
 		queryFn: () => fetchThreads(Number(searchSpaceId), 40),
 		enabled: !!searchSpaceId,
@@ -547,6 +547,7 @@ export function LayoutDataProvider({
 				theme={theme}
 				setTheme={setTheme}
 				isChatPage={isChatPage}
+				isLoadingChats={isLoadingThreads}
 				inbox={{
 					isOpen: isInboxSidebarOpen,
 					onOpenChange: setIsInboxSidebarOpen,
diff --git a/surfsense_web/components/layout/ui/shell/LayoutShell.tsx b/surfsense_web/components/layout/ui/shell/LayoutShell.tsx
index 3624c90a3..a33149669 100644
--- a/surfsense_web/components/layout/ui/shell/LayoutShell.tsx
+++ b/surfsense_web/components/layout/ui/shell/LayoutShell.tsx
@@ -73,6 +73,7 @@ interface LayoutShellProps {
 	className?: string;
 	// Inbox props
 	inbox?: InboxProps;
+	isLoadingChats?: boolean;
 }
 
 export function LayoutShell({
@@ -108,6 +109,7 @@ export function LayoutShell({
 	children,
 	className,
 	inbox,
+	isLoadingChats = false,
 }: LayoutShellProps) {
 	const isMobile = useIsMobile();
 	const [mobileMenuOpen, setMobileMenuOpen] = useState(false);
@@ -159,6 +161,7 @@ export function LayoutShell({
 							pageUsage={pageUsage}
 							theme={theme}
 							setTheme={setTheme}
+							isLoadingChats={isLoadingChats}
 						/>
 
 						<main className={cn("flex-1", isChatPage ? "overflow-hidden" : "overflow-auto")}>
@@ -228,6 +231,7 @@ export function LayoutShell({
 							theme={theme}
 							setTheme={setTheme}
 							className="hidden md:flex border-r shrink-0"
+							isLoadingChats={isLoadingChats}
 						/>
 
 						{/* Docked Inbox Sidebar - renders as flex sibling between sidebar and content */}
diff --git a/surfsense_web/components/layout/ui/sidebar/MobileSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/MobileSidebar.tsx
index 85f907611..3e9d624c9 100644
--- a/surfsense_web/components/layout/ui/sidebar/MobileSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/MobileSidebar.tsx
@@ -36,6 +36,7 @@ interface MobileSidebarProps {
 	pageUsage?: PageUsage;
 	theme?: string;
 	setTheme?: (theme: "light" | "dark" | "system") => void;
+	isLoadingChats?: boolean;
 }
 
 export function MobileSidebarTrigger({ onClick }: { onClick: () => void }) {
@@ -76,6 +77,7 @@ export function MobileSidebar({
 	pageUsage,
 	theme,
 	setTheme,
+	isLoadingChats = false,
 }: MobileSidebarProps) {
 	const handleSearchSpaceSelect = (id: number) => {
 		onSearchSpaceSelect(id);
@@ -155,6 +157,7 @@ export function MobileSidebar({
 						theme={theme}
 						setTheme={setTheme}
 						className="w-full border-none"
+						isLoadingChats={isLoadingChats}
 					/>
 				</div>
 			</SheetContent>
diff --git a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx
index db04bf6dc..fb29448c5 100644
--- a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx
@@ -3,6 +3,7 @@
 import { FolderOpen, PenSquare } from "lucide-react";
 import { useTranslations } from "next-intl";
 import { Button } from "@/components/ui/button";
+import { Skeleton } from "@/components/ui/skeleton";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 import { cn } from "@/lib/utils";
 import type { ChatItem, NavItem, PageUsage, SearchSpace, User } from "../../types/layout.types";
@@ -14,6 +15,15 @@ import { SidebarHeader } from "./SidebarHeader";
 import { SidebarSection } from "./SidebarSection";
 import { SidebarUserProfile } from "./SidebarUserProfile";
 
+function ChatListItemSkeleton() {
+	return (
+		<div className="flex w-full items-center gap-2 rounded-md p-2">
+			<Skeleton className="h-4 w-4 shrink-0 rounded" />
+			<Skeleton className="h-4 w-full max-w-[180px]" />
+		</div>
+	);
+}
+
 interface SidebarProps {
 	searchSpace: SearchSpace | null;
 	isCollapsed?: boolean;
@@ -38,6 +48,7 @@ interface SidebarProps {
 	theme?: string;
 	setTheme?: (theme: "light" | "dark" | "system") => void;
 	className?: string;
+	isLoadingChats?: boolean;
 }
 
 export function Sidebar({
@@ -64,6 +75,7 @@ export function Sidebar({
 	theme,
 	setTheme,
 	className,
+	isLoadingChats = false,
 }: SidebarProps) {
 	const t = useTranslations("sidebar");
 
@@ -151,7 +163,15 @@ export function Sidebar({
 							) : undefined
 						}
 					>
-						{sharedChats.length > 0 ? (
+						{isLoadingChats ? (
+							<div className="flex flex-col gap-0.5">
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+							</div>
+						) : sharedChats.length > 0 ? (
 							<div className="relative min-h-0 flex-1">
 								<div
 									className={`flex flex-col gap-0.5 max-h-full overflow-y-auto scrollbar-thin scrollbar-thumb-muted-foreground/20 scrollbar-track-transparent ${sharedChats.length > 4 ? "pb-8" : ""}`}
@@ -203,7 +223,15 @@ export function Sidebar({
 							) : undefined
 						}
 					>
-						{chats.length > 0 ? (
+						{isLoadingChats ? (
+							<div className="flex flex-col gap-0.5">
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+								<ChatListItemSkeleton />
+							</div>
+						) : chats.length > 0 ? (
 							<div className="relative flex-1 min-h-0">
 								<div
 									className={`flex flex-col gap-0.5 h-full overflow-y-auto scrollbar-thin scrollbar-thumb-muted-foreground/20 scrollbar-track-transparent ${chats.length > 4 ? "pb-8" : ""}`}

From 1cb578cffbf9d5c542e209e37bc877a1c7d10b23 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Wed, 4 Feb 2026 20:30:18 +0530
Subject: [PATCH 09/36] refactor: update DocumentsTableShell and
 DocumentTypeIcon for improved layout and truncation handling

---
 .../(manage)/components/DocumentTypeIcon.tsx  | 26 ++++++++++++-------
 .../components/DocumentsTableShell.tsx        |  8 +++---
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
index b5d434e92..2bba85085 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
@@ -1,6 +1,7 @@
 "use client";
 
 import type React from "react";
+import { useRef, useState, useEffect } from "react";
 import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 
@@ -15,26 +16,33 @@ export function getDocumentTypeLabel(type: string): string {
 		.join(" ");
 }
 
-const MAX_LABEL_LENGTH = 28;
-
 export function DocumentTypeChip({ type, className }: { type: string; className?: string }) {
 	const icon = getDocumentTypeIcon(type, "h-4 w-4");
 	const fullLabel = getDocumentTypeLabel(type);
-	const truncatedLabel = fullLabel.length > MAX_LABEL_LENGTH 
-		? `${fullLabel.slice(0, MAX_LABEL_LENGTH)}...` 
-		: fullLabel;
-	const needsTruncation = fullLabel.length > MAX_LABEL_LENGTH;
+	const textRef = useRef<HTMLSpanElement>(null);
+	const [isTruncated, setIsTruncated] = useState(false);
+
+	useEffect(() => {
+		const checkTruncation = () => {
+			if (textRef.current) {
+				setIsTruncated(textRef.current.scrollWidth > textRef.current.clientWidth);
+			}
+		};
+		checkTruncation();
+		window.addEventListener("resize", checkTruncation);
+		return () => window.removeEventListener("resize", checkTruncation);
+	}, []);
 
 	const chip = (
 		<span
-			className={`inline-flex items-center gap-1.5 rounded bg-muted/40 px-2 py-1 text-xs text-muted-foreground ${className ?? ""}`}
+			className={`inline-flex items-center gap-1.5 rounded bg-muted/40 px-2 py-1 text-xs text-muted-foreground max-w-full overflow-hidden ${className ?? ""}`}
 		>
 			<span className="opacity-80 flex-shrink-0">{icon}</span>
-			<span className="truncate">{truncatedLabel}</span>
+			<span ref={textRef} className="truncate min-w-0">{fullLabel}</span>
 		</span>
 	);
 
-	if (needsTruncation) {
+	if (isTruncated) {
 		return (
 			<Tooltip>
 				<TooltipTrigger asChild>{chip}</TooltipTrigger>
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index 3f45322ef..7e5f95af4 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -206,7 +206,7 @@ export function DocumentsTableShell({
 										<Skeleton className="h-3 w-20" />
 									</TableHead>
 									{columnVisibility.document_type && (
-										<TableHead className="w-44 border-r border-border/30">
+										<TableHead className="w-[20%] min-w-[120px] max-w-[200px] border-r border-border/30">
 											<Skeleton className="h-3 w-14" />
 										</TableHead>
 									)}
@@ -243,7 +243,7 @@ export function DocumentsTableShell({
 												/>
 											</TableCell>
 											{columnVisibility.document_type && (
-												<TableCell className="w-44 py-2.5 border-r border-border/30">
+												<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/30 overflow-hidden">
 													<Skeleton className="h-5 w-24 rounded" />
 												</TableCell>
 											)}
@@ -352,7 +352,7 @@ export function DocumentsTableShell({
 										</SortableHeader>
 									</TableHead>
 									{columnVisibility.document_type && (
-										<TableHead className="w-44 border-r border-border/30">
+										<TableHead className="w-[20%] min-w-[120px] max-w-[200px] border-r border-border/30">
 											<SortableHeader
 												sortKey="document_type"
 												currentSortKey={sortKey}
@@ -452,7 +452,7 @@ export function DocumentsTableShell({
 													/>
 												</TableCell>
 												{columnVisibility.document_type && (
-													<TableCell className="w-44 py-2.5 border-r border-border/30">
+													<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/30 overflow-hidden">
 														<DocumentTypeChip type={doc.document_type} />
 													</TableCell>
 												)}

From c19aa5fa997c4837ca620476939100a65bb57450 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Wed, 4 Feb 2026 22:25:42 +0530
Subject: [PATCH 10/36] feat: implement real-time document updates and lazy
 loading for document content in DocumentsTable and DocumentsTableShell

---
 .../components/DocumentsTableShell.tsx        | 219 +++++----
 .../documents/(manage)/components/types.ts    |   5 +-
 .../documents/(manage)/page.tsx               | 161 +++----
 .../documents/document-mutation.atoms.ts      |   2 +-
 .../assistant-ui/connector-popup.tsx          |   6 +-
 .../contracts/types/document.types.ts         |   2 +
 surfsense_web/hooks/use-documents-electric.ts | 185 --------
 surfsense_web/hooks/use-documents.ts          | 427 ++++++++++++++++++
 surfsense_web/lib/electric/client.ts          |   8 +-
 9 files changed, 655 insertions(+), 360 deletions(-)
 delete mode 100644 surfsense_web/hooks/use-documents-electric.ts
 create mode 100644 surfsense_web/hooks/use-documents.ts

diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index 7e5f95af4..dd32a3b78 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -1,15 +1,21 @@
 "use client";
 
 import { formatDistanceToNow } from "date-fns";
-import {Calendar, ChevronDown, ChevronUp, FileText, FileX, Network, Plus, User } from "lucide-react";
+import { Calendar, ChevronDown, ChevronUp, FileText, FileX, Loader2, Network, Plus, User } from "lucide-react";
 import { motion } from "motion/react";
 import { useTranslations } from "next-intl";
-import React, { useRef, useState, useEffect } from "react";
+import React, { useRef, useState, useEffect, useCallback } from "react";
 import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
-import { DocumentViewer } from "@/components/document-viewer";
 import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
+import { MarkdownViewer } from "@/components/markdown-viewer";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
+import {
+	Dialog,
+	DialogContent,
+	DialogHeader,
+	DialogTitle,
+} from "@/components/ui/dialog";
 import { Skeleton } from "@/components/ui/skeleton";
 import {
 	Table,
@@ -20,6 +26,7 @@ import {
 	TableRow,
 } from "@/components/ui/table";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
+import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { DocumentTypeChip } from "./DocumentTypeIcon";
 import type { ColumnVisibility, Document } from "./types";
 
@@ -153,6 +160,42 @@ export function DocumentsTableShell({
 	// State for metadata viewer (opened via Ctrl/Cmd+Click)
 	const [metadataDoc, setMetadataDoc] = useState<Document | null>(null);
 
+	// State for lazy document content viewer
+	// Real-time documents don't sync content - we fetch on-demand when viewing
+	const [viewingDoc, setViewingDoc] = useState<Document | null>(null);
+	const [viewingContent, setViewingContent] = useState<string>("");
+	const [viewingLoading, setViewingLoading] = useState(false);
+
+	// Fetch document content on-demand when viewer is opened
+	const handleViewDocument = useCallback(async (doc: Document) => {
+		setViewingDoc(doc);
+
+		// If content is already available (from API/search), use it directly
+		if (doc.content) {
+			setViewingContent(doc.content);
+			return;
+		}
+
+		// Otherwise, fetch from API (lazy loading for real-time synced documents)
+		setViewingLoading(true);
+		try {
+			const fullDoc = await documentsApiService.getDocument({ id: doc.id });
+			setViewingContent(fullDoc.content);
+		} catch (err) {
+			console.error("[DocumentsTableShell] Failed to fetch document content:", err);
+			setViewingContent("Failed to load document content.");
+		} finally {
+			setViewingLoading(false);
+		}
+	}, []);
+
+	// Close document viewer
+	const handleCloseViewer = useCallback(() => {
+		setViewingDoc(null);
+		setViewingContent("");
+		setViewingLoading(false);
+	}, []);
+
 	const sorted = React.useMemo(
 		() => sortDocuments(documents, sortKey, sortDesc),
 		[documents, sortKey, sortDesc]
@@ -185,7 +228,7 @@ export function DocumentsTableShell({
 
 	return (
 		<motion.div
-			className="rounded-lg border border-border/30 bg-background overflow-hidden"
+			className="rounded-lg border border-border/40 bg-background overflow-hidden"
 			initial={{ opacity: 0, y: 20 }}
 			animate={{ opacity: 1, y: 0 }}
 			transition={{ type: "spring", stiffness: 300, damping: 30, delay: 0.2 }}
@@ -196,22 +239,22 @@ export function DocumentsTableShell({
 					<div className="hidden md:flex md:flex-col">
 						<Table className="table-fixed w-full">
 							<TableHeader>
-								<TableRow className="hover:bg-transparent border-b border-border/30">
-									<TableHead className="w-8 px-0 text-center border-r border-border/30">
+								<TableRow className="hover:bg-transparent border-b border-border/40">
+									<TableHead className="w-8 px-0 text-center border-r border-border/40">
 										<div className="flex items-center justify-center h-full">
 											<Skeleton className="h-4 w-4 rounded" />
 										</div>
 									</TableHead>
-									<TableHead className="w-[35%] max-w-0 border-r border-border/30">
+									<TableHead className="w-[35%] max-w-0 border-r border-border/40">
 										<Skeleton className="h-3 w-20" />
 									</TableHead>
 									{columnVisibility.document_type && (
-										<TableHead className="w-[20%] min-w-[120px] max-w-[200px] border-r border-border/30">
+										<TableHead className="w-[20%] min-w-[120px] max-w-[200px] border-r border-border/40">
 											<Skeleton className="h-3 w-14" />
 										</TableHead>
 									)}
 									{columnVisibility.created_by && (
-										<TableHead className="w-36 border-r border-border/30">
+										<TableHead className="w-36 border-r border-border/40">
 											<Skeleton className="h-3 w-10" />
 										</TableHead>
 									)}
@@ -229,26 +272,26 @@ export function DocumentsTableShell({
 									{[65, 80, 45, 72, 55, 88, 40, 60, 50, 75].map((widthPercent, index) => (
 										<TableRow
 											key={`skeleton-${index}`}
-											className="border-b border-border/30 hover:bg-transparent"
+											className="border-b border-border/40 hover:bg-transparent"
 										>
-											<TableCell className="w-8 px-0 py-2.5 text-center border-r border-border/30">
+											<TableCell className="w-8 px-0 py-2.5 text-center border-r border-border/40">
 												<div className="flex items-center justify-center h-full">
 													<Skeleton className="h-4 w-4 rounded" />
 												</div>
 											</TableCell>
-											<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/30">
+											<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/40">
 												<Skeleton
 													className="h-4"
 													style={{ width: `${widthPercent}%` }}
 												/>
 											</TableCell>
 											{columnVisibility.document_type && (
-												<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/30 overflow-hidden">
+												<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/40 overflow-hidden">
 													<Skeleton className="h-5 w-24 rounded" />
 												</TableCell>
 											)}
 											{columnVisibility.created_by && (
-												<TableCell className="w-36 py-2.5 truncate border-r border-border/30">
+												<TableCell className="w-36 py-2.5 truncate border-r border-border/40">
 													<Skeleton className="h-4 w-20" />
 												</TableCell>
 											)}
@@ -329,8 +372,8 @@ export function DocumentsTableShell({
 						{/* Fixed Header */}
 						<Table className="table-fixed w-full">
 							<TableHeader>
-								<TableRow className="hover:bg-transparent border-b border-border/30">
-									<TableHead className="w-8 px-0 text-center border-r border-border/30">
+								<TableRow className="hover:bg-transparent border-b border-border/40">
+									<TableHead className="w-8 px-0 text-center border-r border-border/40">
 										<div className="flex items-center justify-center h-full">
 											<Checkbox
 												checked={allSelectedOnPage || (someSelectedOnPage && "indeterminate")}
@@ -340,7 +383,7 @@ export function DocumentsTableShell({
 											/>
 										</div>
 									</TableHead>
-									<TableHead className="w-[35%] border-r border-border/30">
+									<TableHead className="w-[35%] border-r border-border/40">
 										<SortableHeader
 											sortKey="title"
 											currentSortKey={sortKey}
@@ -352,7 +395,7 @@ export function DocumentsTableShell({
 										</SortableHeader>
 									</TableHead>
 									{columnVisibility.document_type && (
-										<TableHead className="w-[20%] min-w-[120px] max-w-[200px] border-r border-border/30">
+										<TableHead className="w-[20%] min-w-[120px] max-w-[200px] border-r border-border/40">
 											<SortableHeader
 												sortKey="document_type"
 												currentSortKey={sortKey}
@@ -365,7 +408,7 @@ export function DocumentsTableShell({
 										</TableHead>
 									)}
 									{columnVisibility.created_by && (
-										<TableHead className="w-36 border-r border-border/30">
+										<TableHead className="w-36 border-r border-border/40">
 											<span className="flex items-center gap-1.5 text-sm font-medium text-muted-foreground/70">
 												<User size={14} className="opacity-60 text-muted-foreground" />
 												User
@@ -406,13 +449,13 @@ export function DocumentsTableShell({
 														delay: index * 0.02,
 													},
 												}}
-												className={`border-b border-border/30 transition-colors ${
+												className={`border-b border-border/40 transition-colors ${
 													isSelected
 														? "bg-primary/5 hover:bg-primary/8"
 														: "hover:bg-muted/30"
 												}`}
 											>
-												<TableCell className="w-8 px-0 py-2.5 text-center border-r border-border/30">
+												<TableCell className="w-8 px-0 py-2.5 text-center border-r border-border/40">
 													<div className="flex items-center justify-center h-full">
 														<Checkbox
 															checked={isSelected}
@@ -422,42 +465,42 @@ export function DocumentsTableShell({
 														/>
 													</div>
 												</TableCell>
-												<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/30">
-													<DocumentViewer
-														title={doc.title}
-														content={doc.content}
-														trigger={
-															<button
-																type="button"
-																className="block w-full text-left text-sm text-foreground hover:text-foreground transition-colors cursor-pointer bg-transparent border-0 p-0 truncate"
-																onClick={(e) => {
-																	// Ctrl (Win/Linux) or Cmd (Mac) + Click opens metadata
-																	if (e.ctrlKey || e.metaKey) {
-																		e.preventDefault();
-																		e.stopPropagation();
-																		setMetadataDoc(doc);
-																	}
-																}}
-																onKeyDown={(e) => {
-																	// Ctrl/Cmd + Enter opens metadata
-																	if ((e.ctrlKey || e.metaKey) && e.key === "Enter") {
-																		e.preventDefault();
-																		setMetadataDoc(doc);
-																	}
-																}}
-															>
-																<TruncatedText text={title} className="truncate block" />
-															</button>
-														}
-													/>
+												<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/40">
+													<button
+														type="button"
+														className="block w-full text-left text-sm text-foreground hover:text-foreground transition-colors cursor-pointer bg-transparent border-0 p-0 truncate"
+														onClick={(e) => {
+															// Ctrl (Win/Linux) or Cmd (Mac) + Click opens metadata
+															if (e.ctrlKey || e.metaKey) {
+																e.preventDefault();
+																e.stopPropagation();
+																setMetadataDoc(doc);
+															} else {
+																// Normal click opens document viewer (lazy loads content)
+																handleViewDocument(doc);
+															}
+														}}
+														onKeyDown={(e) => {
+															// Ctrl/Cmd + Enter opens metadata
+															if ((e.ctrlKey || e.metaKey) && e.key === "Enter") {
+																e.preventDefault();
+																setMetadataDoc(doc);
+															} else if (e.key === "Enter") {
+																// Enter opens document viewer
+																handleViewDocument(doc);
+															}
+														}}
+													>
+														<TruncatedText text={title} className="truncate block" />
+													</button>
 												</TableCell>
 												{columnVisibility.document_type && (
-													<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/30 overflow-hidden">
+													<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/40 overflow-hidden">
 														<DocumentTypeChip type={doc.document_type} />
 													</TableCell>
 												)}
 												{columnVisibility.created_by && (
-													<TableCell className="w-36 py-2.5 text-sm text-foreground truncate border-r border-border/30">
+													<TableCell className="w-36 py-2.5 text-sm text-foreground truncate border-r border-border/40">
 														{doc.created_by_name || "—"}
 													</TableCell>
 												)}
@@ -482,7 +525,7 @@ export function DocumentsTableShell({
 					</div>
 
 					{/* Mobile Card View - Notion Style */}
-					<div className="md:hidden divide-y divide-border/30 h-[50vh] overflow-auto">
+					<div className="md:hidden divide-y divide-border/40 h-[50vh] overflow-auto">
 						{sorted.map((doc, index) => {
 							const isSelected = selectedIds.has(doc.id);
 							return (
@@ -502,33 +545,33 @@ export function DocumentsTableShell({
 											className="border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary"
 										/>
 										<div className="flex-1 min-w-0 space-y-1.5">
-											<DocumentViewer
-												title={doc.title}
-												content={doc.content}
-												trigger={
-													<button
-														type="button"
-														className="text-left text-sm text-foreground hover:text-foreground transition-colors cursor-pointer truncate block w-full bg-transparent border-0 p-0"
-														onClick={(e) => {
-															// Ctrl (Win/Linux) or Cmd (Mac) + Click opens metadata
-															if (e.ctrlKey || e.metaKey) {
-																e.preventDefault();
-																e.stopPropagation();
-																setMetadataDoc(doc);
-															}
-														}}
-														onKeyDown={(e) => {
-															// Ctrl/Cmd + Enter opens metadata
-															if ((e.ctrlKey || e.metaKey) && e.key === "Enter") {
-																e.preventDefault();
-																setMetadataDoc(doc);
-															}
-														}}
-													>
-														{doc.title}
-													</button>
-												}
-											/>
+											<button
+												type="button"
+												className="text-left text-sm text-foreground hover:text-foreground transition-colors cursor-pointer truncate block w-full bg-transparent border-0 p-0"
+												onClick={(e) => {
+													// Ctrl (Win/Linux) or Cmd (Mac) + Click opens metadata
+													if (e.ctrlKey || e.metaKey) {
+														e.preventDefault();
+														e.stopPropagation();
+														setMetadataDoc(doc);
+													} else {
+														// Normal click opens document viewer (lazy loads content)
+														handleViewDocument(doc);
+													}
+												}}
+												onKeyDown={(e) => {
+													// Ctrl/Cmd + Enter opens metadata
+													if ((e.ctrlKey || e.metaKey) && e.key === "Enter") {
+														e.preventDefault();
+														setMetadataDoc(doc);
+													} else if (e.key === "Enter") {
+														// Enter opens document viewer
+														handleViewDocument(doc);
+													}
+												}}
+											>
+												{doc.title}
+											</button>
 											<div className="flex flex-wrap items-center gap-2">
 												<DocumentTypeChip type={doc.document_type} />
 												{columnVisibility.created_by && doc.created_by_name && (
@@ -567,6 +610,24 @@ export function DocumentsTableShell({
 					if (!open) setMetadataDoc(null);
 				}}
 			/>
+
+			{/* Document Content Viewer - lazy loads content on-demand */}
+			<Dialog open={!!viewingDoc} onOpenChange={(open) => !open && handleCloseViewer()}>
+				<DialogContent className="max-w-4xl max-h-[80vh] overflow-y-auto">
+					<DialogHeader>
+						<DialogTitle>{viewingDoc?.title}</DialogTitle>
+					</DialogHeader>
+					<div className="mt-4">
+						{viewingLoading ? (
+							<div className="flex items-center justify-center py-12">
+								<Loader2 className="h-8 w-8 animate-spin text-muted-foreground" />
+							</div>
+						) : (
+							<MarkdownViewer content={viewingContent} />
+						)}
+					</div>
+				</DialogContent>
+			</Dialog>
 		</motion.div>
 	);
 }
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts
index b52054dcd..5485be0ef 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts
@@ -4,8 +4,9 @@ export type Document = {
 	id: number;
 	title: string;
 	document_type: DocumentType;
-	document_metadata: any;
-	content: string;
+	// Optional: Only needed when viewing document details (lazy loaded)
+	document_metadata?: any;
+	content?: string;
 	created_at: string;
 	search_space_id: number;
 	created_by_id?: string | null;
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
index 0e08f7500..31c95e5e6 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@@ -8,8 +8,8 @@ import { useTranslations } from "next-intl";
 import { useCallback, useEffect, useMemo, useState } from "react";
 import { toast } from "sonner";
 import { deleteDocumentMutationAtom } from "@/atoms/documents/document-mutation.atoms";
-import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms";
 import type { DocumentTypeEnum } from "@/contracts/types/document.types";
+import { useDocuments } from "@/hooks/use-documents";
 import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { cacheKeys } from "@/lib/query-client/cache-keys";
 import { DocumentsFilters } from "./components/DocumentsFilters";
@@ -43,21 +43,20 @@ export default function DocumentsTable() {
 	const [sortKey, setSortKey] = useState<SortKey>("created_at");
 	const [sortDesc, setSortDesc] = useState(true);
 	const [selectedIds, setSelectedIds] = useState<Set<number>>(new Set());
-	const { data: rawTypeCounts } = useAtomValue(documentTypeCountsAtom);
 	const { mutateAsync: deleteDocumentMutation } = useAtomValue(deleteDocumentMutationAtom);
 
-	// Build query parameters for fetching documents
-	const queryParams = useMemo(
-		() => ({
-			search_space_id: searchSpaceId,
-			page: pageIndex,
-			page_size: PAGE_SIZE,
-			...(activeTypes.length > 0 && { document_types: activeTypes }),
-		}),
-		[searchSpaceId, pageIndex, activeTypes]
-	);
+	// REAL-TIME: Use Electric SQL hook for live document updates (when not searching)
+	const {
+		documents: realtimeDocuments,
+		typeCounts: realtimeTypeCounts,
+		loading: realtimeLoading,
+		error: realtimeError,
+	} = useDocuments(searchSpaceId, activeTypes);
 
-	// Build search query parameters
+	// Check if we're in search mode
+	const isSearchMode = !!debouncedSearch.trim();
+
+	// Build search query parameters (only used when searching)
 	const searchQueryParams = useMemo(
 		() => ({
 			search_space_id: searchSpaceId,
@@ -69,20 +68,7 @@ export default function DocumentsTable() {
 		[searchSpaceId, pageIndex, activeTypes, debouncedSearch]
 	);
 
-	// Use query for fetching documents
-	const {
-		data: documentsResponse,
-		isLoading: isDocumentsLoading,
-		refetch: refetchDocuments,
-		error: documentsError,
-	} = useQuery({
-		queryKey: cacheKeys.documents.globalQueryParams(queryParams),
-		queryFn: () => documentsApiService.getDocuments({ queryParams }),
-		staleTime: 3 * 60 * 1000, // 3 minutes
-		enabled: !!searchSpaceId && !debouncedSearch.trim(),
-	});
-
-	// Use query for searching documents
+	// API search query (only enabled when searching - Electric doesn't do full-text search)
 	const {
 		data: searchResponse,
 		isLoading: isSearchLoading,
@@ -91,73 +77,59 @@ export default function DocumentsTable() {
 	} = useQuery({
 		queryKey: cacheKeys.documents.globalQueryParams(searchQueryParams),
 		queryFn: () => documentsApiService.searchDocuments({ queryParams: searchQueryParams }),
-		staleTime: 3 * 60 * 1000, // 3 minutes
-		enabled: !!searchSpaceId && !!debouncedSearch.trim(),
+		staleTime: 30 * 1000, // 30 seconds for search (shorter since it's on-demand)
+		enabled: !!searchSpaceId && isSearchMode,
 	});
 
-	// Determine if we should show SurfSense docs (when no type filter or SURFSENSE_DOCS is selected)
-	const showSurfsenseDocs =
-		activeTypes.length === 0 || activeTypes.includes("SURFSENSE_DOCS" as DocumentTypeEnum);
+	// Client-side sorting for real-time documents
+	const sortedRealtimeDocuments = useMemo(() => {
+		const docs = [...realtimeDocuments];
+		docs.sort((a, b) => {
+			const av = a[sortKey] ?? "";
+			const bv = b[sortKey] ?? "";
+			let cmp: number;
+			if (sortKey === "created_at") {
+				cmp = new Date(av as string).getTime() - new Date(bv as string).getTime();
+			} else {
+				cmp = String(av).localeCompare(String(bv));
+			}
+			return sortDesc ? -cmp : cmp;
+		});
+		return docs;
+	}, [realtimeDocuments, sortKey, sortDesc]);
 
-	// Use query for fetching SurfSense docs
-	// eslint-disable-next-line @typescript-eslint/no-unused-vars
-	const { data: surfsenseDocsResponse } = useQuery({
-		queryKey: ["surfsense-docs", debouncedSearch, pageIndex, PAGE_SIZE],
-		queryFn: () =>
-			documentsApiService.getSurfsenseDocs({
-				queryParams: {
-					page: pageIndex,
-					page_size: PAGE_SIZE,
-					title: debouncedSearch.trim() || undefined,
-				},
-			}),
-		staleTime: 3 * 60 * 1000, // 3 minutes
-		enabled: showSurfsenseDocs,
-	});
+	// Client-side pagination for real-time documents
+	const paginatedRealtimeDocuments = useMemo(() => {
+		const start = pageIndex * PAGE_SIZE;
+		const end = start + PAGE_SIZE;
+		return sortedRealtimeDocuments.slice(start, end);
+	}, [sortedRealtimeDocuments, pageIndex]);
 
-	// Transform SurfSense docs to match the Document type
-	// eslint-disable-next-line @typescript-eslint/no-unused-vars
-	const surfsenseDocsAsDocuments = useMemo(() => {
-		if (!surfsenseDocsResponse?.items) return [];
-		return surfsenseDocsResponse.items.map((doc) => ({
-			id: doc.id,
-			title: doc.title,
-			document_type: "SURFSENSE_DOCS",
-			document_metadata: { source: doc.source },
-			content: doc.content,
-			created_at: new Date().toISOString(),
-			search_space_id: -1, // Special value for global docs
-		}));
-	}, [surfsenseDocsResponse]);
+	// Determine what to display based on search mode
+	const displayDocs = isSearchMode
+		? (searchResponse?.items || []).map((item) => ({
+				id: item.id,
+				search_space_id: item.search_space_id,
+				document_type: item.document_type,
+				title: item.title,
+				created_by_id: item.created_by_id ?? null,
+				created_by_name: item.created_by_name ?? null,
+				created_at: item.created_at,
+			}))
+		: paginatedRealtimeDocuments;
 
-	// Merge type counts with SURFSENSE_DOCS count
-	// eslint-disable-next-line @typescript-eslint/no-unused-vars
-	const typeCounts = useMemo(() => {
-		const counts = { ...(rawTypeCounts || {}) };
-		if (surfsenseDocsResponse?.total) {
-			counts.SURFSENSE_DOCS = surfsenseDocsResponse.total;
-		}
-		return counts;
-	}, [rawTypeCounts, surfsenseDocsResponse?.total]);
+	const displayTotal = isSearchMode
+		? searchResponse?.total || 0
+		: sortedRealtimeDocuments.length;
 
-	// Extract documents and total based on search state
-	const documents = debouncedSearch.trim()
-		? searchResponse?.items || []
-		: documentsResponse?.items || [];
-	const total = debouncedSearch.trim() ? searchResponse?.total || 0 : documentsResponse?.total || 0;
+	const loading = isSearchMode ? isSearchLoading : realtimeLoading;
+	const error = isSearchMode ? searchError : realtimeError;
 
-	const loading = debouncedSearch.trim() ? isSearchLoading : isDocumentsLoading;
-	const error = debouncedSearch.trim() ? searchError : documentsError;
-
-	// Display results directly
-	const displayDocs = documents;
-	const displayTotal = total;
 	const pageEnd = Math.min((pageIndex + 1) * PAGE_SIZE, displayTotal);
 
 	const onToggleType = (type: DocumentTypeEnum, checked: boolean) => {
 		setActiveTypes((prev) => {
 			if (checked) {
-				// Only add if not already in the array
 				return prev.includes(type) ? prev : [...prev, type];
 			} else {
 				return prev.filter((t) => t !== type);
@@ -176,16 +148,15 @@ export default function DocumentsTable() {
 		if (isRefreshing) return;
 		setIsRefreshing(true);
 		try {
-			if (debouncedSearch.trim()) {
+			if (isSearchMode) {
 				await refetchSearch();
-			} else {
-				await refetchDocuments();
 			}
+			// Real-time view doesn't need manual refresh - Electric handles it
 			toast.success(t("refresh_success") || "Documents refreshed");
 		} finally {
 			setIsRefreshing(false);
 		}
-	}, [debouncedSearch, refetchSearch, refetchDocuments, t, isRefreshing]);
+	}, [isSearchMode, refetchSearch, t, isRefreshing]);
 
 	const onBulkDelete = async () => {
 		if (selectedIds.size === 0) {
@@ -208,7 +179,13 @@ export default function DocumentsTable() {
 			if (okCount === selectedIds.size)
 				toast.success(t("delete_success_count", { count: okCount }));
 			else toast.error(t("delete_partial_failed"));
-			// Note: No need to call refreshCurrentView() - the mutation already updates the cache
+			
+			// If in search mode, refetch search results to reflect deletion
+			if (isSearchMode) {
+				await refetchSearch();
+			}
+			// Real-time mode: Electric will sync the deletion automatically
+			
 			setSelectedIds(new Set());
 		} catch (e) {
 			console.error(e);
@@ -227,6 +204,12 @@ export default function DocumentsTable() {
 		});
 	}, []);
 
+	// Reset page when search changes (type filter already resets via onToggleType)
+	// biome-ignore lint/correctness/useExhaustiveDependencies: Intentionally reset page on search change
+	useEffect(() => {
+		setPageIndex(0);
+	}, [debouncedSearch]);
+
 	useEffect(() => {
 		const mq = window.matchMedia("(max-width: 768px)");
 		const apply = (isSmall: boolean) => {
@@ -245,9 +228,9 @@ export default function DocumentsTable() {
 			transition={{ duration: 0.3 }}
 			className="w-full max-w-7xl mx-auto px-6 pt-17 pb-6 space-y-6 min-h-[calc(100vh-64px)]"
 		>
-			{/* Filters */}
+			{/* Filters - use real-time type counts */}
 			<DocumentsFilters
-				typeCounts={rawTypeCounts ?? {}}
+				typeCounts={realtimeTypeCounts}
 				selectedIds={selectedIds}
 				onSearch={setSearch}
 				searchValue={search}
diff --git a/surfsense_web/atoms/documents/document-mutation.atoms.ts b/surfsense_web/atoms/documents/document-mutation.atoms.ts
index ce077cd4a..8089bacd4 100644
--- a/surfsense_web/atoms/documents/document-mutation.atoms.ts
+++ b/surfsense_web/atoms/documents/document-mutation.atoms.ts
@@ -48,7 +48,7 @@ export const uploadDocumentMutationAtom = atomWithMutation((get) => {
 		},
 
 		onSuccess: () => {
-			toast.success("Files uploaded for processing");
+			// Note: Toast notification is handled by the caller (DocumentUploadTab) to use i18n
 			// Invalidate logs summary to show new processing tasks immediately on documents page
 			queryClient.invalidateQueries({
 				queryKey: cacheKeys.logs.summary(searchSpaceId ?? undefined),
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index abb32dde1..ec8399198 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -19,7 +19,7 @@ import { Spinner } from "@/components/ui/spinner";
 import { Tabs, TabsContent } from "@/components/ui/tabs";
 import type { SearchSourceConnector } from "@/contracts/types/connector.types";
 import { useConnectorsElectric } from "@/hooks/use-connectors-electric";
-import { useDocumentsElectric } from "@/hooks/use-documents-electric";
+import { useDocuments } from "@/hooks/use-documents";
 import { useInbox } from "@/hooks/use-inbox";
 import { cn } from "@/lib/utils";
 import { ConnectorDialogHeader } from "./connector-popup/components/connector-dialog-header";
@@ -63,7 +63,9 @@ export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger
 	const llmConfigLoading = preferencesLoading || globalConfigsLoading;
 
 	// Fetch document type counts using Electric SQL + PGlite for real-time updates
-	const { documentTypeCounts, loading: documentTypesLoading } = useDocumentsElectric(searchSpaceId);
+	const { typeCounts: documentTypeCounts, loading: documentTypesLoading } = useDocuments(
+		searchSpaceId ? Number(searchSpaceId) : null
+	);
 
 	// Fetch notifications to detect indexing failures
 	const { inboxItems = [] } = useInbox(
diff --git a/surfsense_web/contracts/types/document.types.ts b/surfsense_web/contracts/types/document.types.ts
index c181119d3..b7a2d2cf8 100644
--- a/surfsense_web/contracts/types/document.types.ts
+++ b/surfsense_web/contracts/types/document.types.ts
@@ -42,6 +42,8 @@ export const document = z.object({
 	created_at: z.string(),
 	updated_at: z.string().nullable(),
 	search_space_id: z.number(),
+	created_by_id: z.string().nullable().optional(),
+	created_by_name: z.string().nullable().optional(),
 });
 
 export const extensionDocumentContent = z.object({
diff --git a/surfsense_web/hooks/use-documents-electric.ts b/surfsense_web/hooks/use-documents-electric.ts
deleted file mode 100644
index 43809499e..000000000
--- a/surfsense_web/hooks/use-documents-electric.ts
+++ /dev/null
@@ -1,185 +0,0 @@
-"use client";
-
-import { useEffect, useMemo, useRef, useState } from "react";
-import type { SyncHandle } from "@/lib/electric/client";
-import { useElectricClient } from "@/lib/electric/context";
-
-interface Document {
-	id: number;
-	search_space_id: number;
-	document_type: string;
-	created_at: string;
-}
-
-/**
- * Hook for managing documents with Electric SQL real-time sync
- *
- * Uses the Electric client from context (provided by ElectricProvider)
- * instead of initializing its own - prevents race conditions and memory leaks
- */
-export function useDocumentsElectric(searchSpaceId: number | string | null) {
-	// Get Electric client from context - ElectricProvider handles initialization
-	const electricClient = useElectricClient();
-
-	const [documents, setDocuments] = useState<Document[]>([]);
-	const [loading, setLoading] = useState(true);
-	const [error, setError] = useState<Error | null>(null);
-	const syncHandleRef = useRef<SyncHandle | null>(null);
-	const liveQueryRef = useRef<{ unsubscribe: () => void } | null>(null);
-	const syncKeyRef = useRef<string | null>(null);
-
-	// Calculate document type counts from synced documents
-	const documentTypeCounts = useMemo(() => {
-		if (!documents.length) return {};
-
-		const counts: Record<string, number> = {};
-		for (const doc of documents) {
-			counts[doc.document_type] = (counts[doc.document_type] || 0) + 1;
-		}
-		return counts;
-	}, [documents]);
-
-	// Start syncing when Electric client is available
-	useEffect(() => {
-		// Wait for both searchSpaceId and Electric client to be available
-		if (!searchSpaceId || !electricClient) {
-			setLoading(!electricClient); // Still loading if waiting for Electric
-			if (!searchSpaceId) {
-				setDocuments([]);
-			}
-			return;
-		}
-
-		// Create a unique key for this sync to prevent duplicate subscriptions
-		const syncKey = `documents_${searchSpaceId}`;
-		if (syncKeyRef.current === syncKey) {
-			// Already syncing for this search space
-			return;
-		}
-
-		let mounted = true;
-		syncKeyRef.current = syncKey;
-
-		async function startSync() {
-			try {
-				console.log("[useDocumentsElectric] Starting sync for search space:", searchSpaceId);
-
-				const handle = await electricClient.syncShape({
-					table: "documents",
-					where: `search_space_id = ${searchSpaceId}`,
-					columns: ["id", "document_type", "search_space_id", "created_at"],
-					primaryKey: ["id"],
-				});
-
-				console.log("[useDocumentsElectric] Sync started:", {
-					isUpToDate: handle.isUpToDate,
-				});
-
-				// Wait for initial sync with timeout
-				if (!handle.isUpToDate && handle.initialSyncPromise) {
-					try {
-						await Promise.race([
-							handle.initialSyncPromise,
-							new Promise((resolve) => setTimeout(resolve, 2000)),
-						]);
-					} catch (syncErr) {
-						console.error("[useDocumentsElectric] Initial sync failed:", syncErr);
-					}
-				}
-
-				if (!mounted) {
-					handle.unsubscribe();
-					return;
-				}
-
-				syncHandleRef.current = handle;
-				setLoading(false);
-				setError(null);
-
-				// Fetch initial documents
-				await fetchDocuments();
-
-				// Set up live query for real-time updates
-				await setupLiveQuery();
-			} catch (err) {
-				if (!mounted) return;
-				console.error("[useDocumentsElectric] Failed to start sync:", err);
-				setError(err instanceof Error ? err : new Error("Failed to sync documents"));
-				setLoading(false);
-			}
-		}
-
-		async function fetchDocuments() {
-			try {
-				const result = await electricClient.db.query<Document>(
-					`SELECT id, document_type, search_space_id, created_at FROM documents WHERE search_space_id = $1 ORDER BY created_at DESC`,
-					[searchSpaceId]
-				);
-				if (mounted) {
-					setDocuments(result.rows || []);
-				}
-			} catch (err) {
-				console.error("[useDocumentsElectric] Failed to fetch:", err);
-			}
-		}
-
-		async function setupLiveQuery() {
-			try {
-				// eslint-disable-next-line @typescript-eslint/no-explicit-any
-				const db = electricClient.db as any;
-
-				if (db.live?.query && typeof db.live.query === "function") {
-					const liveQuery = await db.live.query(
-						`SELECT id, document_type, search_space_id, created_at FROM documents WHERE search_space_id = $1 ORDER BY created_at DESC`,
-						[searchSpaceId]
-					);
-
-					if (!mounted) {
-						liveQuery.unsubscribe?.();
-						return;
-					}
-
-					// Set initial results
-					if (liveQuery.initialResults?.rows) {
-						setDocuments(liveQuery.initialResults.rows);
-					} else if (liveQuery.rows) {
-						setDocuments(liveQuery.rows);
-					}
-
-					// Subscribe to changes
-					if (typeof liveQuery.subscribe === "function") {
-						liveQuery.subscribe((result: { rows: Document[] }) => {
-							if (mounted && result.rows) {
-								setDocuments(result.rows);
-							}
-						});
-					}
-
-					if (typeof liveQuery.unsubscribe === "function") {
-						liveQueryRef.current = liveQuery;
-					}
-				}
-			} catch (liveErr) {
-				console.error("[useDocumentsElectric] Failed to set up live query:", liveErr);
-			}
-		}
-
-		startSync();
-
-		return () => {
-			mounted = false;
-			syncKeyRef.current = null;
-
-			if (syncHandleRef.current) {
-				syncHandleRef.current.unsubscribe();
-				syncHandleRef.current = null;
-			}
-			if (liveQueryRef.current) {
-				liveQueryRef.current.unsubscribe();
-				liveQueryRef.current = null;
-			}
-		};
-	}, [searchSpaceId, electricClient]);
-
-	return { documentTypeCounts, loading, error };
-}
diff --git a/surfsense_web/hooks/use-documents.ts b/surfsense_web/hooks/use-documents.ts
new file mode 100644
index 000000000..4d1f8f67c
--- /dev/null
+++ b/surfsense_web/hooks/use-documents.ts
@@ -0,0 +1,427 @@
+"use client";
+
+import { useCallback, useEffect, useMemo, useRef, useState } from "react";
+import type { DocumentTypeEnum } from "@/contracts/types/document.types";
+import { documentsApiService } from "@/lib/apis/documents-api.service";
+import type { SyncHandle } from "@/lib/electric/client";
+import { useElectricClient } from "@/lib/electric/context";
+
+const PAGE_SIZE = 100;
+
+// Stable empty array to prevent infinite re-renders when no typeFilter is provided
+const EMPTY_TYPE_FILTER: DocumentTypeEnum[] = [];
+
+// Document from Electric sync (lightweight table columns - NO content/metadata)
+interface DocumentElectric {
+	id: number;
+	search_space_id: number;
+	document_type: string;
+	title: string;
+	created_by_id: string | null;
+	created_at: string;
+}
+
+// Document for display (with resolved user name)
+export interface DocumentDisplay {
+	id: number;
+	search_space_id: number;
+	document_type: string;
+	title: string;
+	created_by_id: string | null;
+	created_by_name: string | null;
+	created_at: string;
+}
+
+/**
+ * Deduplicate by ID and sort by created_at descending (newest first)
+ */
+function deduplicateAndSort<T extends { id: number; created_at: string }>(items: T[]): T[] {
+	const seen = new Map<number, T>();
+	for (const item of items) {
+		// Keep the most recent version if duplicate
+		const existing = seen.get(item.id);
+		if (!existing || new Date(item.created_at) > new Date(existing.created_at)) {
+			seen.set(item.id, item);
+		}
+	}
+	return Array.from(seen.values()).sort(
+		(a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime()
+	);
+}
+
+/**
+ * Check if a document has valid/complete data
+ */
+function isValidDocument(doc: DocumentElectric): boolean {
+	return doc.id != null && doc.title != null && doc.title !== "";
+}
+
+/**
+ * Real-time documents hook with Electric SQL
+ *
+ * Architecture (100% Reliable):
+ * 1. API is the PRIMARY source of truth - always loads first
+ * 2. Electric provides REAL-TIME updates for additions and deletions
+ * 3. Use syncHandle.isUpToDate to determine if deletions can be trusted
+ * 4. Handles bulk deletions correctly by checking sync state
+ *
+ * @param searchSpaceId - The search space ID to filter documents
+ * @param typeFilter - Optional document types to filter by
+ */
+export function useDocuments(
+	searchSpaceId: number | null,
+	typeFilter: DocumentTypeEnum[] = EMPTY_TYPE_FILTER
+) {
+	const electricClient = useElectricClient();
+
+	const [documents, setDocuments] = useState<DocumentDisplay[]>([]);
+	const [loading, setLoading] = useState(true);
+	const [error, setError] = useState<Error | null>(null);
+
+	// Track if initial API load is complete (source of truth)
+	const apiLoadedRef = useRef(false);
+
+	// User cache: userId → displayName
+	const userCacheRef = useRef<Map<string, string>>(new Map());
+
+	// Electric sync refs
+	const syncHandleRef = useRef<SyncHandle | null>(null);
+	const liveQueryRef = useRef<{ unsubscribe?: () => void } | null>(null);
+
+	// Real-time type counts
+	const typeCounts = useMemo(() => {
+		const counts: Record<string, number> = {};
+		for (const doc of documents) {
+			counts[doc.document_type] = (counts[doc.document_type] || 0) + 1;
+		}
+		return counts;
+	}, [documents]);
+
+	// Populate user cache from API response
+	const populateUserCache = useCallback(
+		(items: Array<{ created_by_id?: string | null; created_by_name?: string | null }>) => {
+			for (const item of items) {
+				if (item.created_by_id && item.created_by_name) {
+					userCacheRef.current.set(item.created_by_id, item.created_by_name);
+				}
+			}
+		},
+		[]
+	);
+
+	// Convert API item to display doc
+	const apiToDisplayDoc = useCallback(
+		(item: {
+			id: number;
+			search_space_id: number;
+			document_type: string;
+			title: string;
+			created_by_id?: string | null;
+			created_by_name?: string | null;
+			created_at: string;
+		}): DocumentDisplay => ({
+			id: item.id,
+			search_space_id: item.search_space_id,
+			document_type: item.document_type,
+			title: item.title,
+			created_by_id: item.created_by_id ?? null,
+			created_by_name: item.created_by_name ?? null,
+			created_at: item.created_at,
+		}),
+		[]
+	);
+
+	// Convert Electric doc to display doc
+	const electricToDisplayDoc = useCallback(
+		(doc: DocumentElectric): DocumentDisplay => ({
+			...doc,
+			created_by_name: doc.created_by_id
+				? userCacheRef.current.get(doc.created_by_id) ?? null
+				: null,
+		}),
+		[]
+	);
+
+	// EFFECT 1: Load from API (PRIMARY source of truth)
+	useEffect(() => {
+		if (!searchSpaceId) {
+			setLoading(false);
+			return;
+		}
+
+		// Capture validated value for async closure
+		const spaceId = searchSpaceId;
+		const currentTypeFilter = typeFilter;
+
+		let mounted = true;
+		apiLoadedRef.current = false;
+
+		async function loadFromApi() {
+			try {
+				setLoading(true);
+				console.log("[useDocuments] Loading from API (source of truth):", spaceId);
+
+				const response = await documentsApiService.getDocuments({
+					queryParams: {
+						search_space_id: spaceId,
+						page: 0,
+						page_size: PAGE_SIZE,
+						...(currentTypeFilter.length > 0 && { document_types: currentTypeFilter }),
+					},
+				});
+
+				if (!mounted) return;
+
+				populateUserCache(response.items);
+				const docs = response.items.map(apiToDisplayDoc);
+				setDocuments(docs);
+				apiLoadedRef.current = true;
+				setError(null);
+				console.log("[useDocuments] API loaded", docs.length, "documents");
+			} catch (err) {
+				if (!mounted) return;
+				console.error("[useDocuments] API load failed:", err);
+				setError(err instanceof Error ? err : new Error("Failed to load documents"));
+			} finally {
+				if (mounted) setLoading(false);
+			}
+		}
+
+		loadFromApi();
+
+		return () => {
+			mounted = false;
+		};
+	}, [searchSpaceId, typeFilter, populateUserCache, apiToDisplayDoc]);
+
+	// EFFECT 2: Start Electric sync + live query for real-time updates
+	useEffect(() => {
+		if (!searchSpaceId || !electricClient) return;
+
+		// Capture validated values for async closure
+		const spaceId = searchSpaceId;
+		const client = electricClient;
+		const currentTypeFilter = typeFilter;
+
+		let mounted = true;
+
+		async function setupElectricRealtime() {
+			// Cleanup previous subscriptions
+			if (syncHandleRef.current) {
+				syncHandleRef.current.unsubscribe();
+				syncHandleRef.current = null;
+			}
+			if (liveQueryRef.current) {
+				liveQueryRef.current.unsubscribe?.();
+				liveQueryRef.current = null;
+			}
+
+			try {
+				console.log("[useDocuments] Starting Electric sync for real-time updates");
+
+				// Start Electric sync
+				const handle = await client.syncShape({
+					table: "documents",
+					where: `search_space_id = ${spaceId}`,
+					columns: ["id", "document_type", "search_space_id", "title", "created_by_id", "created_at"],
+					primaryKey: ["id"],
+				});
+
+				if (!mounted) {
+					handle.unsubscribe();
+					return;
+				}
+
+				syncHandleRef.current = handle;
+				console.log("[useDocuments] Sync started, isUpToDate:", handle.isUpToDate);
+
+				// Wait for initial sync (with timeout)
+				if (!handle.isUpToDate && handle.initialSyncPromise) {
+					await Promise.race([
+						handle.initialSyncPromise,
+						new Promise((resolve) => setTimeout(resolve, 5000)),
+					]);
+					console.log("[useDocuments] Initial sync complete, isUpToDate:", handle.isUpToDate);
+				}
+
+				if (!mounted) return;
+
+				// Set up live query
+				const db = client.db as {
+					live?: {
+						query: <T>(sql: string, params?: (number | string)[]) => Promise<{
+							subscribe: (cb: (result: { rows: T[] }) => void) => void;
+							unsubscribe?: () => void;
+						}>;
+					};
+				};
+
+				if (!db.live?.query) {
+					console.warn("[useDocuments] Live queries not available");
+					return;
+				}
+
+				let query = `SELECT id, document_type, search_space_id, title, created_by_id, created_at
+					FROM documents 
+					WHERE search_space_id = $1`;
+
+				const params: (number | string)[] = [spaceId];
+
+				if (currentTypeFilter.length > 0) {
+					const placeholders = currentTypeFilter.map((_, i) => `$${i + 2}`).join(", ");
+					query += ` AND document_type IN (${placeholders})`;
+					params.push(...currentTypeFilter);
+				}
+
+				query += ` ORDER BY created_at DESC`;
+
+				const liveQuery = await db.live.query<DocumentElectric>(query, params);
+
+				if (!mounted) {
+					liveQuery.unsubscribe?.();
+					return;
+				}
+
+				console.log("[useDocuments] Live query subscribed");
+
+				liveQuery.subscribe((result: { rows: DocumentElectric[] }) => {
+					if (!mounted || !result.rows) return;
+
+					// DEBUG: Log first few raw documents to see what's coming from Electric
+  					console.log("[useDocuments] Raw data sample:", result.rows.slice(0, 3));
+  
+
+					const validItems = result.rows.filter(isValidDocument);
+					const isFullySynced = syncHandleRef.current?.isUpToDate ?? false;
+
+					console.log(
+						`[useDocuments] Live update: ${result.rows.length} raw, ${validItems.length} valid, synced: ${isFullySynced}`
+					);
+
+					// Fetch user names for new users (non-blocking)
+					const unknownUserIds = validItems
+						.filter((doc): doc is DocumentElectric & { created_by_id: string } => 
+							doc.created_by_id !== null && !userCacheRef.current.has(doc.created_by_id)
+						)
+						.map((doc) => doc.created_by_id);
+
+					if (unknownUserIds.length > 0) {
+						documentsApiService
+							.getDocuments({
+								queryParams: { search_space_id: spaceId, page: 0, page_size: 20 },
+							})
+							.then((response) => {
+								populateUserCache(response.items);
+								if (mounted) {
+									setDocuments((prev) =>
+										prev.map((doc) => ({
+											...doc,
+											created_by_name: doc.created_by_id
+												? userCacheRef.current.get(doc.created_by_id) ?? null
+												: null,
+										}))
+									);
+								}
+							})
+							.catch(() => {});
+					}
+
+					// Smart update logic based on sync state
+					setDocuments((prev) => {
+						// Don't process if API hasn't loaded yet
+						if (!apiLoadedRef.current) {
+							console.log("[useDocuments] Waiting for API load, skipping live update");
+							return prev;
+						}
+
+						// Case 1: Live query is empty
+						if (validItems.length === 0) {
+							if (isFullySynced && prev.length > 0) {
+								// Electric is fully synced and says 0 items - trust it (all deleted)
+								console.log("[useDocuments] All documents deleted (Electric synced)");
+								return [];
+							}
+							// Partial sync or error - keep existing
+							console.log("[useDocuments] Empty live result, keeping existing");
+							return prev;
+						}
+
+						// Case 2: Electric is fully synced - TRUST IT COMPLETELY (handles bulk deletes)
+						if (isFullySynced) {
+							const liveDocs = deduplicateAndSort(validItems.map(electricToDisplayDoc));
+							console.log(`[useDocuments] Synced update: ${liveDocs.length} docs (was ${prev.length})`);
+							return liveDocs;
+						}
+
+						// Case 3: Partial sync - only ADD new items, don't remove any
+						const existingIds = new Set(prev.map((d) => d.id));
+						const liveIds = new Set(validItems.map((d) => d.id));
+
+						// Find new items (in live but not in prev)
+						const newItems = validItems
+							.filter((item) => !existingIds.has(item.id))
+							.map(electricToDisplayDoc);
+
+						// Find updated items (in both, update with latest data)
+						const updatedPrev = prev.map((doc) => {
+							if (liveIds.has(doc.id)) {
+								const liveItem = validItems.find((v) => v.id === doc.id);
+								if (liveItem) {
+									return electricToDisplayDoc(liveItem);
+								}
+							}
+							return doc;
+						});
+
+						if (newItems.length > 0) {
+							console.log(`[useDocuments] Adding ${newItems.length} new items (partial sync)`);
+							return deduplicateAndSort([...newItems, ...updatedPrev]);
+						}
+
+						return updatedPrev;
+					});
+				});
+
+				liveQueryRef.current = liveQuery;
+			} catch (err) {
+				console.error("[useDocuments] Electric setup failed:", err);
+				// Don't set error - API data is already loaded
+			}
+		}
+
+		setupElectricRealtime();
+
+		return () => {
+			mounted = false;
+			if (syncHandleRef.current) {
+				syncHandleRef.current.unsubscribe();
+				syncHandleRef.current = null;
+			}
+			if (liveQueryRef.current) {
+				liveQueryRef.current.unsubscribe?.();
+				liveQueryRef.current = null;
+			}
+		};
+	}, [searchSpaceId, electricClient, typeFilter, electricToDisplayDoc, populateUserCache]);
+
+	// Track previous searchSpaceId to detect actual changes
+	const prevSearchSpaceIdRef = useRef<number | null>(null);
+
+	// Reset on search space change (not on initial mount)
+	useEffect(() => {
+		if (prevSearchSpaceIdRef.current !== null && prevSearchSpaceIdRef.current !== searchSpaceId) {
+			setDocuments([]);
+			apiLoadedRef.current = false;
+			userCacheRef.current.clear();
+		}
+		prevSearchSpaceIdRef.current = searchSpaceId;
+	}, [searchSpaceId]);
+
+	return {
+		documents,
+		typeCounts,
+		total: documents.length,
+		loading,
+		error,
+	};
+}
diff --git a/surfsense_web/lib/electric/client.ts b/surfsense_web/lib/electric/client.ts
index 7ef8f7bbf..788a9444d 100644
--- a/surfsense_web/lib/electric/client.ts
+++ b/surfsense_web/lib/electric/client.ts
@@ -70,7 +70,9 @@ const pendingSyncs = new Map<string, Promise<SyncHandle>>();
 // v5: fixed duplicate key errors (root cause: unstable cutoff dates in use-inbox.ts)
 //     - added onMustRefetch handler for server-side refetch scenarios
 //     - fixed getSyncCutoffDate to use stable midnight UTC timestamps
-const SYNC_VERSION = 5;
+// v6: real-time documents table - added title and created_by_id columns for live document display
+// v7: removed use-documents-electric.ts - consolidated to single documents sync to prevent conflicts
+const SYNC_VERSION = 7;
 
 // Database name prefix for identifying SurfSense databases
 const DB_PREFIX = "surfsense-";
@@ -235,12 +237,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 			`);
 
 			// Create the documents table schema in PGlite
-			// Only sync minimal fields needed for type counts: id, document_type, search_space_id
+			// Sync columns needed for real-time table display (lightweight - no content/metadata)
 			await db.exec(`
 				CREATE TABLE IF NOT EXISTS documents (
 					id INTEGER PRIMARY KEY,
 					search_space_id INTEGER NOT NULL,
 					document_type TEXT NOT NULL,
+					title TEXT NOT NULL DEFAULT '',
+					created_by_id TEXT,
 					created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
 				);
 				

From c706b5f417c31562ee41b722668b44cc06a41e0b Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Wed, 4 Feb 2026 22:34:03 +0530
Subject: [PATCH 11/36] feat: enhance metadata viewing in DocumentsTableShell
 with on-demand fetching and loading state

---
 .../components/DocumentsTableShell.tsx        | 47 ++++++++++++++++---
 .../components/json-metadata-viewer.tsx       | 12 ++++-
 2 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index dd32a3b78..1be226a56 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -158,7 +158,10 @@ export function DocumentsTableShell({
 	const { openDialog } = useDocumentUploadDialog();
 
 	// State for metadata viewer (opened via Ctrl/Cmd+Click)
+	// Real-time documents don't sync metadata - we fetch on-demand when viewing
 	const [metadataDoc, setMetadataDoc] = useState<Document | null>(null);
+	const [metadataContent, setMetadataContent] = useState<any>(null);
+	const [metadataLoading, setMetadataLoading] = useState(false);
 
 	// State for lazy document content viewer
 	// Real-time documents don't sync content - we fetch on-demand when viewing
@@ -166,6 +169,36 @@ export function DocumentsTableShell({
 	const [viewingContent, setViewingContent] = useState<string>("");
 	const [viewingLoading, setViewingLoading] = useState(false);
 
+	// Fetch document metadata on-demand when metadata viewer is opened
+	const handleViewMetadata = useCallback(async (doc: Document) => {
+		setMetadataDoc(doc);
+
+		// If metadata is already available (from API/search), use it directly
+		if (doc.document_metadata) {
+			setMetadataContent(doc.document_metadata);
+			return;
+		}
+
+		// Otherwise, fetch from API (lazy loading for real-time synced documents)
+		setMetadataLoading(true);
+		try {
+			const fullDoc = await documentsApiService.getDocument({ id: doc.id });
+			setMetadataContent(fullDoc.document_metadata);
+		} catch (err) {
+			console.error("[DocumentsTableShell] Failed to fetch document metadata:", err);
+			setMetadataContent(null);
+		} finally {
+			setMetadataLoading(false);
+		}
+	}, []);
+
+	// Close metadata viewer
+	const handleCloseMetadata = useCallback(() => {
+		setMetadataDoc(null);
+		setMetadataContent(null);
+		setMetadataLoading(false);
+	}, []);
+
 	// Fetch document content on-demand when viewer is opened
 	const handleViewDocument = useCallback(async (doc: Document) => {
 		setViewingDoc(doc);
@@ -474,7 +507,7 @@ export function DocumentsTableShell({
 															if (e.ctrlKey || e.metaKey) {
 																e.preventDefault();
 																e.stopPropagation();
-																setMetadataDoc(doc);
+																handleViewMetadata(doc);
 															} else {
 																// Normal click opens document viewer (lazy loads content)
 																handleViewDocument(doc);
@@ -484,7 +517,7 @@ export function DocumentsTableShell({
 															// Ctrl/Cmd + Enter opens metadata
 															if ((e.ctrlKey || e.metaKey) && e.key === "Enter") {
 																e.preventDefault();
-																setMetadataDoc(doc);
+																handleViewMetadata(doc);
 															} else if (e.key === "Enter") {
 																// Enter opens document viewer
 																handleViewDocument(doc);
@@ -553,7 +586,7 @@ export function DocumentsTableShell({
 													if (e.ctrlKey || e.metaKey) {
 														e.preventDefault();
 														e.stopPropagation();
-														setMetadataDoc(doc);
+														handleViewMetadata(doc);
 													} else {
 														// Normal click opens document viewer (lazy loads content)
 														handleViewDocument(doc);
@@ -563,7 +596,7 @@ export function DocumentsTableShell({
 													// Ctrl/Cmd + Enter opens metadata
 													if ((e.ctrlKey || e.metaKey) && e.key === "Enter") {
 														e.preventDefault();
-														setMetadataDoc(doc);
+														handleViewMetadata(doc);
 													} else if (e.key === "Enter") {
 														// Enter opens document viewer
 														handleViewDocument(doc);
@@ -602,12 +635,14 @@ export function DocumentsTableShell({
 			)}
 
 			{/* Metadata Viewer - opened via Ctrl/Cmd+Click on document title */}
+			{/* Lazy loads metadata from API for real-time synced documents */}
 			<JsonMetadataViewer
 				title={metadataDoc?.title ?? ""}
-				metadata={metadataDoc?.document_metadata}
+				metadata={metadataContent}
+				loading={metadataLoading}
 				open={!!metadataDoc}
 				onOpenChange={(open) => {
-					if (!open) setMetadataDoc(null);
+					if (!open) handleCloseMetadata();
 				}}
 			/>
 
diff --git a/surfsense_web/components/json-metadata-viewer.tsx b/surfsense_web/components/json-metadata-viewer.tsx
index 982d16786..faab000ad 100644
--- a/surfsense_web/components/json-metadata-viewer.tsx
+++ b/surfsense_web/components/json-metadata-viewer.tsx
@@ -1,4 +1,4 @@
-import { FileJson } from "lucide-react";
+import { FileJson, Loader2 } from "lucide-react";
 import React from "react";
 import { defaultStyles, JsonView } from "react-json-view-lite";
 import { Button } from "@/components/ui/button";
@@ -17,6 +17,7 @@ interface JsonMetadataViewerProps {
 	trigger?: React.ReactNode;
 	open?: boolean;
 	onOpenChange?: (open: boolean) => void;
+	loading?: boolean;
 }
 
 export function JsonMetadataViewer({
@@ -25,6 +26,7 @@ export function JsonMetadataViewer({
 	trigger,
 	open,
 	onOpenChange,
+	loading,
 }: JsonMetadataViewerProps) {
 	// Ensure metadata is a valid object
 	const jsonData = React.useMemo(() => {
@@ -54,7 +56,13 @@ export function JsonMetadataViewer({
 						</DialogTitle>
 					</DialogHeader>
 					<div className="mt-2 sm:mt-4 p-2 sm:p-4 bg-muted/30 rounded-md text-xs sm:text-sm">
-						<JsonView data={jsonData} style={defaultStyles} />
+						{loading ? (
+							<div className="flex items-center justify-center py-12">
+								<Loader2 className="h-8 w-8 animate-spin text-muted-foreground" />
+							</div>
+						) : (
+							<JsonView data={jsonData} style={defaultStyles} />
+						)}
 					</div>
 				</DialogContent>
 			</Dialog>

From d5fd4c2863354a8f1029a6828676ce7dd2a99070 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Wed, 4 Feb 2026 23:19:45 +0530
Subject: [PATCH 12/36] refactor: change page size parameter to -1 for fetching
 all documents in useDocuments hook

---
 surfsense_web/hooks/use-documents.ts | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/surfsense_web/hooks/use-documents.ts b/surfsense_web/hooks/use-documents.ts
index 4d1f8f67c..99fb99e13 100644
--- a/surfsense_web/hooks/use-documents.ts
+++ b/surfsense_web/hooks/use-documents.ts
@@ -6,8 +6,6 @@ import { documentsApiService } from "@/lib/apis/documents-api.service";
 import type { SyncHandle } from "@/lib/electric/client";
 import { useElectricClient } from "@/lib/electric/context";
 
-const PAGE_SIZE = 100;
-
 // Stable empty array to prevent infinite re-renders when no typeFilter is provided
 const EMPTY_TYPE_FILTER: DocumentTypeEnum[] = [];
 
@@ -165,7 +163,7 @@ export function useDocuments(
 					queryParams: {
 						search_space_id: spaceId,
 						page: 0,
-						page_size: PAGE_SIZE,
+						page_size: -1, // Fetch all documents
 						...(currentTypeFilter.length > 0 && { document_types: currentTypeFilter }),
 					},
 				});

From e615a6478cc6378ed8dcc86da561e5b798167c68 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Thu, 5 Feb 2026 01:29:15 +0530
Subject: [PATCH 13/36] feat: implement document deletion functionality and
 streamline column visibility management in DocumentsTable

---
 .../(manage)/components/DocumentsFilters.tsx  | 59 +------------------
 .../components/DocumentsTableShell.tsx        | 34 +++++++++--
 .../documents/(manage)/page.tsx               | 25 ++++++--
 3 files changed, 50 insertions(+), 68 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
index 2c3dc7eef..ed882916e 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
@@ -4,7 +4,6 @@ import { useSetAtom } from "jotai";
 import {
 	CircleAlert,
 	CircleX,
-	Columns3,
 	FilePlus2,
 	FileType,
 	ListFilter,
@@ -31,11 +30,9 @@ import {
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
 import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
 import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
 import type { DocumentTypeEnum } from "@/contracts/types/document.types";
 import { getDocumentTypeIcon, getDocumentTypeLabel } from "./DocumentTypeIcon";
-import type { ColumnVisibility } from "./types";
 
 export function DocumentsFilters({
 	typeCounts: typeCountsRecord,
@@ -45,8 +42,6 @@ export function DocumentsFilters({
 	onBulkDelete,
 	onToggleType,
 	activeTypes,
-	columnVisibility,
-	onToggleColumn,
 }: {
 	typeCounts: Partial<Record<DocumentTypeEnum, number>>;
 	selectedIds: Set<number>;
@@ -55,8 +50,6 @@ export function DocumentsFilters({
 	onBulkDelete: () => Promise<void>;
 	onToggleType: (type: DocumentTypeEnum, checked: boolean) => void;
 	activeTypes: DocumentTypeEnum[];
-	columnVisibility: ColumnVisibility;
-	onToggleColumn: (id: keyof ColumnVisibility, checked: boolean) => void;
 }) {
 	const t = useTranslations("documents");
 	const id = React.useId();
@@ -252,57 +245,7 @@ export function DocumentsFilters({
 					</PopoverContent>
 				</Popover>
 
-					{/* View/Columns Popover */}
-					<Popover>
-						<PopoverTrigger asChild>
-							<Button
-								variant="outline"
-								size="sm"
-								className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border"
-							>
-								<Columns3 size={14} className="text-muted-foreground" />
-								<span className="hidden sm:inline">View</span>
-							</Button>
-						</PopoverTrigger>
-						<PopoverContent className="w-36 !p-0 overflow-hidden" align="end">
-							<div className="px-2.5 pt-3 pb-2">
-								<div className="mb-1.5 px-1 text-[11px] font-medium text-muted-foreground">
-									Toggle columns
-								</div>
-								<div className="space-y-0.5">
-									{(
-										[
-											["document_type", "Source"],
-											["created_by", "User"],
-											["created_at", "Created"],
-										] as Array<[keyof ColumnVisibility, string]>
-									).map(([key, label], i) => (
-										<button
-											key={key}
-											type="button"
-											className="flex w-full items-center gap-2 py-1 px-2.5 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
-											onClick={() => onToggleColumn(key, !columnVisibility[key])}
-										>
-											<Checkbox
-												id={`${id}-col-${i}`}
-												checked={columnVisibility[key]}
-												onCheckedChange={(checked: boolean) => onToggleColumn(key, !!checked)}
-												className="h-3.5 w-3.5 flex-shrink-0 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
-											/>
-											<Label
-												htmlFor={`${id}-col-${i}`}
-												className="flex flex-1 items-center gap-2 font-normal text-xs cursor-pointer min-w-0"
-											>
-												<span className="truncate min-w-0">{label}</span>
-											</Label>
-										</button>
-									))}
-								</div>
-							</div>
-						</PopoverContent>
-					</Popover>
-
-					{/* Bulk Delete Button - positioned next to View on mobile */}
+					{/* Bulk Delete Button */}
 					{selectedIds.size > 0 && (
 						<AlertDialog>
 							<AlertDialogTrigger asChild>
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index 1be226a56..e79b4d104 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -28,6 +28,7 @@ import {
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { DocumentTypeChip } from "./DocumentTypeIcon";
+import { RowActions } from "./RowActions";
 import type { ColumnVisibility, Document } from "./types";
 
 export type SortKey = keyof Pick<Document, "title" | "document_type" | "created_at">;
@@ -142,6 +143,8 @@ export function DocumentsTableShell({
 	sortKey,
 	sortDesc,
 	onSortChange,
+	deleteDocument,
+	searchSpaceId,
 }: {
 	documents: Document[];
 	loading: boolean;
@@ -153,6 +156,8 @@ export function DocumentsTableShell({
 	sortKey: SortKey;
 	sortDesc: boolean;
 	onSortChange: (key: SortKey) => void;
+	deleteDocument: (id: number) => Promise<boolean>;
+	searchSpaceId: string;
 }) {
 	const t = useTranslations("documents");
 	const { openDialog } = useDocumentUploadDialog();
@@ -273,7 +278,7 @@ export function DocumentsTableShell({
 						<Table className="table-fixed w-full">
 							<TableHeader>
 								<TableRow className="hover:bg-transparent border-b border-border/40">
-									<TableHead className="w-8 px-0 text-center border-r border-border/40">
+									<TableHead className="w-8 px-0 text-center">
 										<div className="flex items-center justify-center h-full">
 											<Skeleton className="h-4 w-4 rounded" />
 										</div>
@@ -296,6 +301,9 @@ export function DocumentsTableShell({
 											<Skeleton className="h-3 w-16" />
 										</TableHead>
 									)}
+									<TableHead className="w-10">
+										<span className="sr-only">Actions</span>
+									</TableHead>
 								</TableRow>
 							</TableHeader>
 						</Table>
@@ -307,7 +315,7 @@ export function DocumentsTableShell({
 											key={`skeleton-${index}`}
 											className="border-b border-border/40 hover:bg-transparent"
 										>
-											<TableCell className="w-8 px-0 py-2.5 text-center border-r border-border/40">
+											<TableCell className="w-8 px-0 py-2.5 text-center">
 												<div className="flex items-center justify-center h-full">
 													<Skeleton className="h-4 w-4 rounded" />
 												</div>
@@ -333,6 +341,9 @@ export function DocumentsTableShell({
 													<Skeleton className="h-4 w-20" />
 												</TableCell>
 											)}
+											<TableCell className="w-10 py-2.5 text-center">
+												<Skeleton className="h-6 w-6 mx-auto rounded" />
+											</TableCell>
 										</TableRow>
 									))}
 								</TableBody>
@@ -406,7 +417,7 @@ export function DocumentsTableShell({
 						<Table className="table-fixed w-full">
 							<TableHeader>
 								<TableRow className="hover:bg-transparent border-b border-border/40">
-									<TableHead className="w-8 px-0 text-center border-r border-border/40">
+									<TableHead className="w-8 px-0 text-center">
 										<div className="flex items-center justify-center h-full">
 											<Checkbox
 												checked={allSelectedOnPage || (someSelectedOnPage && "indeterminate")}
@@ -461,6 +472,9 @@ export function DocumentsTableShell({
 											</SortableHeader>
 										</TableHead>
 									)}
+									<TableHead className="w-10">
+										<span className="sr-only">Actions</span>
+									</TableHead>
 								</TableRow>
 							</TableHeader>
 						</Table>
@@ -488,7 +502,7 @@ export function DocumentsTableShell({
 														: "hover:bg-muted/30"
 												}`}
 											>
-												<TableCell className="w-8 px-0 py-2.5 text-center border-r border-border/40">
+												<TableCell className="w-8 px-0 py-2.5 text-center">
 													<div className="flex items-center justify-center h-full">
 														<Checkbox
 															checked={isSelected}
@@ -549,6 +563,13 @@ export function DocumentsTableShell({
 														</Tooltip>
 													</TableCell>
 												)}
+												<TableCell className="w-10 py-2.5 text-center">
+													<RowActions
+														document={doc}
+														deleteDocument={deleteDocument}
+														searchSpaceId={searchSpaceId}
+													/>
+												</TableCell>
 											</motion.tr>
 										);
 									})}
@@ -626,6 +647,11 @@ export function DocumentsTableShell({
 												)}
 											</div>
 										</div>
+										<RowActions
+											document={doc}
+											deleteDocument={deleteDocument}
+											searchSpaceId={searchSpaceId}
+										/>
 									</div>
 								</motion.div>
 							);
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
index 31c95e5e6..ab92d1b94 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@@ -138,10 +138,6 @@ export default function DocumentsTable() {
 		setPageIndex(0);
 	};
 
-	const onToggleColumn = (id: keyof ColumnVisibility, checked: boolean) => {
-		setColumnVisibility((prev) => ({ ...prev, [id]: checked }));
-	};
-
 	const [isRefreshing, setIsRefreshing] = useState(false);
 
 	const refreshCurrentView = useCallback(async () => {
@@ -193,6 +189,23 @@ export default function DocumentsTable() {
 		}
 	};
 
+	// Single document delete handler for RowActions
+	const handleDeleteDocument = useCallback(async (id: number): Promise<boolean> => {
+		try {
+			await deleteDocumentMutation({ id });
+			toast.success(t("delete_success") || "Document deleted");
+			// If in search mode, refetch search results to reflect deletion
+			if (isSearchMode) {
+				await refetchSearch();
+			}
+			// Real-time mode: Electric will sync the deletion automatically
+			return true;
+		} catch (e) {
+			console.error("Error deleting document:", e);
+			return false;
+		}
+	}, [deleteDocumentMutation, isSearchMode, refetchSearch, t]);
+
 	const handleSortChange = useCallback((key: SortKey) => {
 		setSortKey((currentKey) => {
 			if (currentKey === key) {
@@ -237,8 +250,6 @@ export default function DocumentsTable() {
 				onBulkDelete={onBulkDelete}
 				onToggleType={onToggleType}
 				activeTypes={activeTypes}
-				columnVisibility={columnVisibility}
-				onToggleColumn={onToggleColumn}
 			/>
 
 			{/* Table */}
@@ -253,6 +264,8 @@ export default function DocumentsTable() {
 				sortKey={sortKey}
 				sortDesc={sortDesc}
 				onSortChange={handleSortChange}
+				deleteDocument={handleDeleteDocument}
+				searchSpaceId={String(searchSpaceId)}
 			/>
 
 			{/* Pagination */}

From 04884caeef2b36d86c6b7680a43848536f224d4d Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Thu, 5 Feb 2026 02:30:20 +0530
Subject: [PATCH 14/36] refactor: simplify document title assignment across
 various connectors by removing prefix formatting

---
 .../connectors/composio_gmail_connector.py    |  4 +--
 .../composio_google_calendar_connector.py     |  4 +--
 .../composio_google_drive_connector.py        |  4 +--
 .../app/services/connector_service.py         | 28 +++++++++----------
 .../connector_indexers/airtable_indexer.py    |  6 ++--
 .../connector_indexers/bookstack_indexer.py   |  4 +--
 .../connector_indexers/clickup_indexer.py     |  4 +--
 .../connector_indexers/confluence_indexer.py  |  4 +--
 .../connector_indexers/discord_indexer.py     |  2 +-
 .../connector_indexers/github_indexer.py      |  2 +-
 .../google_calendar_indexer.py                |  4 +--
 .../google_gmail_indexer.py                   |  4 +--
 .../tasks/connector_indexers/jira_indexer.py  |  6 ++--
 .../connector_indexers/linear_indexer.py      |  6 ++--
 .../tasks/connector_indexers/luma_indexer.py  |  4 +--
 .../connector_indexers/notion_indexer.py      |  4 +--
 .../tasks/connector_indexers/slack_indexer.py |  2 +-
 .../tasks/connector_indexers/teams_indexer.py |  2 +-
 18 files changed, 44 insertions(+), 50 deletions(-)

diff --git a/surfsense_backend/app/connectors/composio_gmail_connector.py b/surfsense_backend/app/connectors/composio_gmail_connector.py
index 1964a4d45..05395bfba 100644
--- a/surfsense_backend/app/connectors/composio_gmail_connector.py
+++ b/surfsense_backend/app/connectors/composio_gmail_connector.py
@@ -319,7 +319,7 @@ async def _process_gmail_message_batch(
 
                 chunks = await create_document_chunks(markdown_content)
 
-                existing_document.title = f"Gmail: {subject}"
+                existing_document.title = subject
                 existing_document.content = summary_content
                 existing_document.content_hash = content_hash
                 existing_document.embedding = summary_embedding
@@ -375,7 +375,7 @@ async def _process_gmail_message_batch(
 
             document = Document(
                 search_space_id=search_space_id,
-                title=f"Gmail: {subject}",
+                title=subject,
                 document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["gmail"]),
                 document_metadata={
                     "message_id": message_id,
diff --git a/surfsense_backend/app/connectors/composio_google_calendar_connector.py b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
index 78ff360ca..4b09f0790 100644
--- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@@ -346,7 +346,7 @@ async def index_composio_google_calendar(
 
                     chunks = await create_document_chunks(markdown_content)
 
-                    existing_document.title = f"Calendar: {summary}"
+                    existing_document.title = summary
                     existing_document.content = summary_content
                     existing_document.content_hash = content_hash
                     existing_document.embedding = summary_embedding
@@ -422,7 +422,7 @@ async def index_composio_google_calendar(
 
                 document = Document(
                     search_space_id=search_space_id,
-                    title=f"Calendar: {summary}",
+                    title=summary,
                     document_type=DocumentType(
                         TOOLKIT_TO_DOCUMENT_TYPE["googlecalendar"]
                     ),
diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py
index 66669e4e0..d7299fbfe 100644
--- a/surfsense_backend/app/connectors/composio_google_drive_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py
@@ -1276,7 +1276,7 @@ async def _process_single_drive_file(
 
         chunks = await create_document_chunks(markdown_content)
 
-        existing_document.title = f"Drive: {file_name}"
+        existing_document.title = file_name
         existing_document.content = summary_content
         existing_document.content_hash = content_hash
         existing_document.embedding = summary_embedding
@@ -1329,7 +1329,7 @@ async def _process_single_drive_file(
 
     document = Document(
         search_space_id=search_space_id,
-        title=f"Drive: {file_name}",
+        title=file_name,
         document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
         document_metadata={
             "file_id": file_id,
diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py
index 4c5599815..6967902d1 100644
--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@@ -982,7 +982,7 @@ class ConnectorService:
         def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
             channel_name = metadata.get("channel_name", "Unknown Channel")
             message_date = metadata.get("start_date", "")
-            title = f"Slack: {channel_name}"
+            title = channel_name
             if message_date:
                 title += f" ({message_date})"
             return title
@@ -1056,7 +1056,7 @@ class ConnectorService:
         def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
             page_title = metadata.get("page_title", "Untitled Page")
             indexed_at = metadata.get("indexed_at", "")
-            title = f"Notion: {page_title}"
+            title = page_title
             if indexed_at:
                 title += f" (indexed: {indexed_at})"
             return title
@@ -1366,9 +1366,9 @@ class ConnectorService:
             issue_title = metadata.get("issue_title", "Untitled Issue")
             issue_state = metadata.get("state", "")
             title = (
-                f"Linear: {issue_identifier} - {issue_title}"
+                f"{issue_identifier} - {issue_title}"
                 if issue_identifier
-                else f"Linear: {issue_title}"
+                else issue_title
             )
             if issue_state:
                 title += f" ({issue_state})"
@@ -1466,9 +1466,9 @@ class ConnectorService:
             issue_title = metadata.get("issue_title", "Untitled Issue")
             status = metadata.get("status", "")
             title = (
-                f"Jira: {issue_key} - {issue_title}"
+                f"{issue_key} - {issue_title}"
                 if issue_key
-                else f"Jira: {issue_title}"
+                else issue_title
             )
             if status:
                 title += f" ({status})"
@@ -1570,7 +1570,7 @@ class ConnectorService:
         def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
             event_summary = metadata.get("event_summary", "Untitled Event")
             start_time = metadata.get("start_time", "")
-            title = f"Calendar: {event_summary}"
+            title = event_summary
             if start_time:
                 title += f" ({start_time})"
             return title
@@ -1675,7 +1675,7 @@ class ConnectorService:
 
         def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
             record_id = metadata.get("record_id", "")
-            return f"Airtable Record: {record_id}" if record_id else "Airtable Record"
+            return record_id if record_id else "Airtable Record"
 
         def _description_fn(
             _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
@@ -1952,7 +1952,7 @@ class ConnectorService:
         def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
             page_title = metadata.get("page_title", "Untitled Page")
             space_key = metadata.get("space_key", "")
-            title = f"Confluence: {page_title}"
+            title = page_title
             if space_key:
                 title += f" ({space_key})"
             return title
@@ -2238,7 +2238,7 @@ class ConnectorService:
         def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
             channel_name = metadata.get("channel_name", "Unknown Channel")
             message_date = metadata.get("start_date", "")
-            title = f"Discord: {channel_name}"
+            title = channel_name
             if message_date:
                 title += f" ({message_date})"
             return title
@@ -2314,7 +2314,7 @@ class ConnectorService:
             team_name = metadata.get("team_name", "Unknown Team")
             channel_name = metadata.get("channel_name", "Unknown Channel")
             message_date = metadata.get("start_date", "")
-            title = f"Teams: {team_name} - {channel_name}"
+            title = f"{team_name} - {channel_name}"
             if message_date:
                 title += f" ({message_date})"
             return title
@@ -2388,9 +2388,9 @@ class ConnectorService:
             event_name = metadata.get("event_name", "Untitled Event")
             start_time = metadata.get("start_time", "")
             return (
-                f"Luma: {event_name} ({start_time})"
+                f"{event_name} ({start_time})"
                 if start_time
-                else f"Luma: {event_name}"
+                else event_name
             )
 
         def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
@@ -2651,7 +2651,7 @@ class ConnectorService:
 
         def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
             page_name = metadata.get("page_name", "Untitled Page")
-            return f"BookStack: {page_name}"
+            return page_name
 
         def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
             page_slug = metadata.get("page_slug", "")
diff --git a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
index 029c4a87c..54b1afd26 100644
--- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
@@ -327,9 +327,7 @@ async def index_airtable_records(
                                     )
 
                                     # Update existing document
-                                    existing_document.title = (
-                                        f"Airtable Record: {record_id}"
-                                    )
+                                    existing_document.title = record_id
                                     existing_document.content = summary_content
                                     existing_document.content_hash = content_hash
                                     existing_document.embedding = summary_embedding
@@ -405,7 +403,7 @@ async def index_airtable_records(
                             )
                             document = Document(
                                 search_space_id=search_space_id,
-                                title=f"Airtable Record: {record_id}",
+                                title=record_id,
                                 document_type=DocumentType.AIRTABLE_CONNECTOR,
                                 document_metadata={
                                     "record_id": record_id,
diff --git a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
index fe608a8c9..f1338564e 100644
--- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
@@ -317,7 +317,7 @@ async def index_bookstack_pages(
                         chunks = await create_document_chunks(full_content)
 
                         # Update existing document
-                        existing_document.title = f"BookStack - {page_name}"
+                        existing_document.title = page_name
                         existing_document.content = summary_content
                         existing_document.content_hash = content_hash
                         existing_document.embedding = summary_embedding
@@ -387,7 +387,7 @@ async def index_bookstack_pages(
                 logger.info(f"Creating new document for page {page_name}")
                 document = Document(
                     search_space_id=search_space_id,
-                    title=f"BookStack - {page_name}",
+                    title=page_name,
                     document_type=DocumentType.BOOKSTACK_CONNECTOR,
                     document_metadata=doc_metadata,
                     content=summary_content,
diff --git a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
index a8991647c..2b8789e0c 100644
--- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
@@ -299,7 +299,7 @@ async def index_clickup_tasks(
                             chunks = await create_document_chunks(task_content)
 
                             # Update existing document
-                            existing_document.title = f"Task - {task_name}"
+                            existing_document.title = task_name
                             existing_document.content = summary_content
                             existing_document.content_hash = content_hash
                             existing_document.embedding = summary_embedding
@@ -376,7 +376,7 @@ async def index_clickup_tasks(
 
                     document = Document(
                         search_space_id=search_space_id,
-                        title=f"Task - {task_name}",
+                        title=task_name,
                         document_type=DocumentType.CLICKUP_CONNECTOR,
                         document_metadata={
                             "task_id": task_id,
diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
index 24859e685..74b4cc23d 100644
--- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
@@ -307,7 +307,7 @@ async def index_confluence_pages(
                         chunks = await create_document_chunks(full_content)
 
                         # Update existing document
-                        existing_document.title = f"Confluence - {page_title}"
+                        existing_document.title = page_title
                         existing_document.content = summary_content
                         existing_document.content_hash = content_hash
                         existing_document.embedding = summary_embedding
@@ -387,7 +387,7 @@ async def index_confluence_pages(
                 logger.info(f"Creating new document for page {page_title}")
                 document = Document(
                     search_space_id=search_space_id,
-                    title=f"Confluence - {page_title}",
+                    title=page_title,
                     document_type=DocumentType.CONFLUENCE_CONNECTOR,
                     document_metadata={
                         "page_id": page_id,
diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
index 4999ba6d4..f9a6918a7 100644
--- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
@@ -507,7 +507,7 @@ async def index_discord_messages(
                             # Create and store new document
                             document = Document(
                                 search_space_id=search_space_id,
-                                title=f"Discord - {guild_name}#{channel_name}",
+                                title=f"{guild_name}#{channel_name}",
                                 document_type=DocumentType.DISCORD_CONNECTOR,
                                 document_metadata={
                                     "guild_name": guild_name,
diff --git a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
index d82f18944..848db7623 100644
--- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
@@ -418,7 +418,7 @@ async def _process_repository_digest(
     }
 
     document = Document(
-        title=f"GitHub Repository: {repo_full_name}",
+        title=repo_full_name,
         document_type=DocumentType.GITHUB_CONNECTOR,
         document_metadata=doc_metadata,
         content=summary_text,
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
index 386c9de43..8d7b8b045 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@@ -396,7 +396,7 @@ async def index_google_calendar_events(
                         chunks = await create_document_chunks(event_markdown)
 
                         # Update existing document
-                        existing_document.title = f"Calendar Event - {event_summary}"
+                        existing_document.title = event_summary
                         existing_document.content = summary_content
                         existing_document.content_hash = content_hash
                         existing_document.embedding = summary_embedding
@@ -482,7 +482,7 @@ async def index_google_calendar_events(
 
                 document = Document(
                     search_space_id=search_space_id,
-                    title=f"Calendar Event - {event_summary}",
+                    title=event_summary,
                     document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR,
                     document_metadata={
                         "event_id": event_id,
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
index 34d06d796..805be5781 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@@ -332,7 +332,7 @@ async def index_google_gmail_messages(
                         chunks = await create_document_chunks(markdown_content)
 
                         # Update existing document
-                        existing_document.title = f"Gmail: {subject}"
+                        existing_document.title = subject
                         existing_document.content = summary_content
                         existing_document.content_hash = content_hash
                         existing_document.embedding = summary_embedding
@@ -405,7 +405,7 @@ async def index_google_gmail_messages(
                 logger.info(f"Creating new document for Gmail message: {subject}")
                 document = Document(
                     search_space_id=search_space_id,
-                    title=f"Gmail: {subject}",
+                    title=subject,
                     document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
                     document_metadata={
                         "message_id": message_id,
diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
index 6971703c1..508834b4f 100644
--- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
@@ -282,9 +282,7 @@ async def index_jira_issues(
                         chunks = await create_document_chunks(issue_content)
 
                         # Update existing document
-                        existing_document.title = (
-                            f"Jira - {issue_identifier}: {issue_title}"
-                        )
+                        existing_document.title = f"{issue_identifier}: {issue_title}"
                         existing_document.content = summary_content
                         existing_document.content_hash = content_hash
                         existing_document.embedding = summary_embedding
@@ -364,7 +362,7 @@ async def index_jira_issues(
                 )
                 document = Document(
                     search_space_id=search_space_id,
-                    title=f"Jira - {issue_identifier}: {issue_title}",
+                    title=f"{issue_identifier}: {issue_title}",
                     document_type=DocumentType.JIRA_CONNECTOR,
                     document_metadata={
                         "issue_id": issue_id,
diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
index a94420bc2..c28f151ca 100644
--- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
@@ -314,9 +314,7 @@ async def index_linear_issues(
                         chunks = await create_document_chunks(issue_content)
 
                         # Update existing document
-                        existing_document.title = (
-                            f"Linear - {issue_identifier}: {issue_title}"
-                        )
+                        existing_document.title = f"{issue_identifier}: {issue_title}"
                         existing_document.content = summary_content
                         existing_document.content_hash = content_hash
                         existing_document.embedding = summary_embedding
@@ -397,7 +395,7 @@ async def index_linear_issues(
                 )
                 document = Document(
                     search_space_id=search_space_id,
-                    title=f"Linear - {issue_identifier}: {issue_title}",
+                    title=f"{issue_identifier}: {issue_title}",
                     document_type=DocumentType.LINEAR_CONNECTOR,
                     document_metadata={
                         "issue_id": issue_id,
diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
index c0eb58d1d..f4527843c 100644
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@@ -361,7 +361,7 @@ async def index_luma_events(
                         chunks = await create_document_chunks(event_markdown)
 
                         # Update existing document
-                        existing_document.title = f"Luma Event - {event_name}"
+                        existing_document.title = event_name
                         existing_document.content = summary_content
                         existing_document.content_hash = content_hash
                         existing_document.embedding = summary_embedding
@@ -455,7 +455,7 @@ async def index_luma_events(
 
                 document = Document(
                     search_space_id=search_space_id,
-                    title=f"Luma Event - {event_name}",
+                    title=event_name,
                     document_type=DocumentType.LUMA_CONNECTOR,
                     document_metadata={
                         "event_id": event_id,
diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
index b1adeb035..8d4d7650a 100644
--- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
@@ -387,7 +387,7 @@ async def index_notion_pages(
                         chunks = await create_document_chunks(markdown_content)
 
                         # Update existing document
-                        existing_document.title = f"Notion - {page_title}"
+                        existing_document.title = page_title
                         existing_document.content = summary_content
                         existing_document.content_hash = content_hash
                         existing_document.embedding = summary_embedding
@@ -458,7 +458,7 @@ async def index_notion_pages(
                 # Create and store new document
                 document = Document(
                     search_space_id=search_space_id,
-                    title=f"Notion - {page_title}",
+                    title=page_title,
                     document_type=DocumentType.NOTION_CONNECTOR,
                     document_metadata={
                         "page_title": page_title,
diff --git a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
index 3cb4e3c85..010d1eff4 100644
--- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
@@ -373,7 +373,7 @@ async def index_slack_messages(
                     # Create and store new document
                     document = Document(
                         search_space_id=search_space_id,
-                        title=f"Slack - {channel_name}",
+                        title=channel_name,
                         document_type=DocumentType.SLACK_CONNECTOR,
                         document_metadata={
                             "channel_name": channel_name,
diff --git a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
index 1e26fbc42..d42c5b7f1 100644
--- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
@@ -410,7 +410,7 @@ async def index_teams_messages(
                             # Create and store new document
                             document = Document(
                                 search_space_id=search_space_id,
-                                title=f"Teams - {team_name} - {channel_name}",
+                                title=f"{team_name} - {channel_name}",
                                 document_type=DocumentType.TEAMS_CONNECTOR,
                                 document_metadata={
                                     "team_name": team_name,

From aef59d04ebaf3955f9a03fceeb96b3e138738f01 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Thu, 5 Feb 2026 21:59:31 +0530
Subject: [PATCH 15/36] feat: add document status management with JSONB column
 for processing states in documents

---
 .../versions/92_add_document_status_column.py |  80 ++++++
 .../composio_google_calendar_connector.py     | 243 ++++++++++--------
 surfsense_backend/app/db.py                   |  85 ++++++
 .../app/routes/documents_routes.py            |  28 ++
 surfsense_backend/app/schemas/__init__.py     |   2 +
 surfsense_backend/app/schemas/documents.py    |   7 +
 .../app/tasks/connector_indexers/base.py      |  28 ++
 .../components/DocumentsTableShell.tsx        |  88 ++++++-
 .../(manage)/components/RowActions.tsx        |  39 +--
 .../documents/(manage)/components/types.ts    |   7 +
 .../documents/(manage)/page.tsx               |  31 ++-
 surfsense_web/hooks/use-documents.ts          |  15 +-
 surfsense_web/lib/electric/client.ts          |   8 +-
 13 files changed, 526 insertions(+), 135 deletions(-)
 create mode 100644 surfsense_backend/alembic/versions/92_add_document_status_column.py

diff --git a/surfsense_backend/alembic/versions/92_add_document_status_column.py b/surfsense_backend/alembic/versions/92_add_document_status_column.py
new file mode 100644
index 000000000..550faa3c3
--- /dev/null
+++ b/surfsense_backend/alembic/versions/92_add_document_status_column.py
@@ -0,0 +1,80 @@
+"""Add status column to documents table for per-document processing status
+
+Revision ID: 92
+Revises: 91
+Create Date: 2026-02-05
+
+Changes:
+1. Add status column (JSONB) to documents table
+2. Default value is {"state": "ready"} for backward compatibility
+3. Existing documents are set to ready status
+4. Index created for efficient status filtering
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "92"
+down_revision: str | None = "91"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Add status column to documents with default ready state."""
+
+    # 1. Add status column with default value for new rows
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF NOT EXISTS (
+                SELECT 1 FROM information_schema.columns
+                WHERE table_name = 'documents' AND column_name = 'status'
+            ) THEN
+                ALTER TABLE documents
+                ADD COLUMN status JSONB NOT NULL DEFAULT '{"state": "ready"}'::jsonb;
+            END IF;
+        END$$;
+        """
+    )
+
+    # 2. Create index on status for efficient filtering by state
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS ix_documents_status
+        ON documents ((status->>'state'));
+        """
+    )
+
+
+def downgrade() -> None:
+    """Remove status column from documents."""
+
+    # Drop index
+    op.execute(
+        """
+        DROP INDEX IF EXISTS ix_documents_status;
+        """
+    )
+
+    # Drop column
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF EXISTS (
+                SELECT 1 FROM information_schema.columns
+                WHERE table_name = 'documents' AND column_name = 'status'
+            ) THEN
+                ALTER TABLE documents
+                DROP COLUMN status;
+            END IF;
+        END$$;
+        """
+    )
+
diff --git a/surfsense_backend/app/connectors/composio_google_calendar_connector.py b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
index 4b09f0790..dc9c18c99 100644
--- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@@ -16,13 +16,14 @@ from sqlalchemy.orm import selectinload
 
 from app.config import config
 from app.connectors.composio_connector import ComposioConnector
-from app.db import Document, DocumentType
+from app.db import Document, DocumentStatus, DocumentType
 from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.tasks.connector_indexers.base import (
     calculate_date_range,
     check_duplicate_document_by_hash,
+    safe_set_chunks,
 )
 from app.utils.document_converters import (
     create_document_chunks,
@@ -266,18 +267,18 @@ async def index_composio_google_calendar(
 
         documents_indexed = 0
         documents_skipped = 0
-        duplicate_content_count = (
-            0  # Track events skipped due to duplicate content_hash
-        )
+        documents_failed = 0  # Track events that failed processing
+        duplicate_content_count = 0  # Track events skipped due to duplicate content_hash
         last_heartbeat_time = time.time()
 
+        # =======================================================================
+        # PHASE 1: Analyze all events, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        events_to_process = []  # List of dicts with document and event data
+        new_documents_created = False
+
         for event in events:
-            # Send heartbeat periodically to indicate task is still alive
-            if on_heartbeat_callback:
-                current_time = time.time()
-                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
-                    await on_heartbeat_callback(documents_indexed)
-                    last_heartbeat_time = current_time
             try:
                 # Handle both standard Google API and potential Composio variations
                 event_id = event.get("id", "") or event.get("eventId", "")
@@ -315,61 +316,24 @@ async def index_composio_google_calendar(
 
                 if existing_document:
                     if existing_document.content_hash == content_hash:
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            existing_document.status = DocumentStatus.ready()
                         documents_skipped += 1
                         continue
 
-                    # Update existing
-                    user_llm = await get_user_long_context_llm(
-                        session, user_id, search_space_id
-                    )
-
-                    if user_llm:
-                        document_metadata = {
-                            "event_id": event_id,
-                            "summary": summary,
-                            "start_time": start_time,
-                            "document_type": "Google Calendar Event (Composio)",
-                        }
-                        (
-                            summary_content,
-                            summary_embedding,
-                        ) = await generate_document_summary(
-                            markdown_content, user_llm, document_metadata
-                        )
-                    else:
-                        summary_content = f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
-                        if location:
-                            summary_content += f"\nLocation: {location}"
-                        summary_embedding = config.embedding_model_instance.embed(
-                            summary_content
-                        )
-
-                    chunks = await create_document_chunks(markdown_content)
-
-                    existing_document.title = summary
-                    existing_document.content = summary_content
-                    existing_document.content_hash = content_hash
-                    existing_document.embedding = summary_embedding
-                    existing_document.document_metadata = {
-                        "event_id": event_id,
-                        "summary": summary,
-                        "start_time": start_time,
-                        "end_time": end_time,
-                        "location": location,
-                        "connector_id": connector_id,
-                        "source": "composio",
-                    }
-                    existing_document.chunks = chunks
-                    existing_document.updated_at = get_current_timestamp()
-
-                    documents_indexed += 1
-
-                    # Batch commit every 10 documents
-                    if documents_indexed % 10 == 0:
-                        logger.info(
-                            f"Committing batch: {documents_indexed} Google Calendar events processed so far"
-                        )
-                        await session.commit()
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    events_to_process.append({
+                        'document': existing_document,
+                        'is_new': False,
+                        'markdown_content': markdown_content,
+                        'content_hash': content_hash,
+                        'event_id': event_id,
+                        'summary': summary,
+                        'start_time': start_time,
+                        'end_time': end_time,
+                        'location': location,
+                    })
                     continue
 
                 # Document doesn't exist by unique_identifier_hash
@@ -380,46 +344,16 @@ async def index_composio_google_calendar(
                     )
 
                 if duplicate_by_content:
-                    # A document with the same content already exists (likely from standard connector)
                     logger.info(
                         f"Event {summary} already indexed by another connector "
                         f"(existing document ID: {duplicate_by_content.id}, "
-                        f"type: {duplicate_by_content.document_type}). Skipping to avoid duplicate content."
+                        f"type: {duplicate_by_content.document_type}). Skipping."
                     )
                     duplicate_content_count += 1
                     documents_skipped += 1
                     continue
 
-                # Create new document
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "event_id": event_id,
-                        "summary": summary,
-                        "start_time": start_time,
-                        "document_type": "Google Calendar Event (Composio)",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        markdown_content, user_llm, document_metadata
-                    )
-                else:
-                    summary_content = (
-                        f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
-                    )
-                    if location:
-                        summary_content += f"\nLocation: {location}"
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                chunks = await create_document_chunks(markdown_content)
-
+                # Create new document with PENDING status (visible in UI immediately)
                 document = Document(
                     search_space_id=search_space_id,
                     title=summary,
@@ -436,19 +370,107 @@ async def index_composio_google_calendar(
                         "toolkit_id": "googlecalendar",
                         "source": "composio",
                     },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                     unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                     updated_at=get_current_timestamp(),
                     created_by_id=user_id,
                     connector_id=connector_id,
                 )
                 session.add(document)
+                new_documents_created = True
+
+                events_to_process.append({
+                    'document': document,
+                    'is_new': True,
+                    'markdown_content': markdown_content,
+                    'content_hash': content_hash,
+                    'event_id': event_id,
+                    'summary': summary,
+                    'start_time': start_time,
+                    'end_time': end_time,
+                    'location': location,
+                })
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(events_to_process)} documents")
+
+        for item in events_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "event_id": item['event_id'],
+                        "summary": item['summary'],
+                        "start_time": item['start_time'],
+                        "document_type": "Google Calendar Event (Composio)",
+                    }
+                    summary_content, summary_embedding = await generate_document_summary(
+                        item['markdown_content'], user_llm, document_metadata_for_summary
+                    )
+                else:
+                    summary_content = f"Calendar: {item['summary']}\n\nStart: {item['start_time']}\nEnd: {item['end_time']}"
+                    if item['location']:
+                        summary_content += f"\nLocation: {item['location']}"
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(item['markdown_content'])
+
+                # Update document to READY with actual content
+                document.title = item['summary']
+                document.content = summary_content
+                document.content_hash = item['content_hash']
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "event_id": item['event_id'],
+                    "summary": item['summary'],
+                    "start_time": item['start_time'],
+                    "end_time": item['end_time'],
+                    "location": item['location'],
+                    "connector_id": connector_id,
+                    "source": "composio",
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
                 documents_indexed += 1
 
-                # Batch commit every 10 documents
+                # Batch commit every 10 documents (for ready status updates)
                 if documents_indexed % 10 == 0:
                     logger.info(
                         f"Committing batch: {documents_indexed} Google Calendar events processed so far"
@@ -457,7 +479,13 @@ async def index_composio_google_calendar(
 
             except Exception as e:
                 logger.error(f"Error processing Calendar event: {e!s}", exc_info=True)
-                documents_skipped += 1
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
+                documents_failed += 1
                 continue
 
         # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
@@ -490,10 +518,13 @@ async def index_composio_google_calendar(
             else:
                 raise
 
-        # Build warning message if duplicates were found
-        warning_message = None
+        # Build warning message if there were issues
+        warning_parts = []
         if duplicate_content_count > 0:
-            warning_message = f"{duplicate_content_count} skipped (duplicate)"
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
 
         await task_logger.log_task_success(
             log_entry,
@@ -501,13 +532,15 @@ async def index_composio_google_calendar(
             {
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                 "duplicate_content_count": duplicate_content_count,
             },
         )
 
         logger.info(
-            f"Composio Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped "
-            f"({duplicate_content_count} due to duplicate content from other connectors)"
+            f"Composio Google Calendar indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
         )
         return documents_indexed, warning_message
 
diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index 5cdb712db..fb5c711ed 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -100,6 +100,80 @@ class PodcastStatus(str, Enum):
     FAILED = "failed"
 
 
+class DocumentStatus:
+    """
+    Helper class for document processing status (stored as JSONB).
+    
+    Status values:
+    - {"state": "ready"} - Document is fully processed and searchable
+    - {"state": "pending"} - Document is queued, waiting to be processed
+    - {"state": "processing"} - Document is currently being processed (only 1 at a time)
+    - {"state": "failed", "reason": "..."} - Processing failed with reason
+    
+    Usage:
+        document.status = DocumentStatus.pending()
+        document.status = DocumentStatus.processing()
+        document.status = DocumentStatus.ready()
+        document.status = DocumentStatus.failed("LLM rate limit exceeded")
+    """
+    
+    # State constants
+    READY = "ready"
+    PENDING = "pending"
+    PROCESSING = "processing"
+    FAILED = "failed"
+    
+    @staticmethod
+    def ready() -> dict:
+        """Return status dict for a ready/searchable document."""
+        return {"state": DocumentStatus.READY}
+    
+    @staticmethod
+    def pending() -> dict:
+        """Return status dict for a document waiting to be processed."""
+        return {"state": DocumentStatus.PENDING}
+    
+    @staticmethod
+    def processing() -> dict:
+        """Return status dict for a document being processed."""
+        return {"state": DocumentStatus.PROCESSING}
+    
+    @staticmethod
+    def failed(reason: str, **extra_details) -> dict:
+        """
+        Return status dict for a failed document.
+        
+        Args:
+            reason: Human-readable failure reason
+            **extra_details: Optional additional details (duplicate_of, error_code, etc.)
+        """
+        status = {"state": DocumentStatus.FAILED, "reason": reason[:500]}  # Truncate long reasons
+        if extra_details:
+            status.update(extra_details)
+        return status
+    
+    @staticmethod
+    def get_state(status: dict | None) -> str | None:
+        """Extract state from status dict, returns None if invalid."""
+        if status is None:
+            return None
+        return status.get("state") if isinstance(status, dict) else None
+    
+    @staticmethod
+    def is_state(status: dict | None, state: str) -> bool:
+        """Check if status matches a given state."""
+        return DocumentStatus.get_state(status) == state
+    
+    @staticmethod
+    def get_failure_reason(status: dict | None) -> str | None:
+        """Extract failure reason from status dict."""
+        if status is None or not isinstance(status, dict):
+            return None
+        if status.get("state") == DocumentStatus.FAILED:
+            return status.get("reason")
+        return None
+
+
 class LiteLLMProvider(str, Enum):
     """
     Enum for LLM providers supported by LiteLLM.
@@ -785,6 +859,17 @@ class Document(BaseModel, TimestampMixin):
         index=True,
     )
 
+    # Processing status for real-time visibility (JSONB)
+    # Format: {"state": "ready"} or {"state": "processing"} or {"state": "failed", "reason": "..."}
+    # Default to {"state": "ready"} for backward compatibility with existing documents
+    status = Column(
+        JSONB,
+        nullable=False,
+        default=DocumentStatus.ready,
+        server_default=text("'{\"state\": \"ready\"}'::jsonb"),
+        index=True,
+    )
+
     # Relationships
     search_space = relationship("SearchSpace", back_populates="documents")
     created_by = relationship("User", back_populates="documents")
diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py
index d25a2db48..b905ebf91 100644
--- a/surfsense_backend/app/routes/documents_routes.py
+++ b/surfsense_backend/app/routes/documents_routes.py
@@ -19,6 +19,7 @@ from app.db import (
 from app.schemas import (
     DocumentRead,
     DocumentsCreate,
+    DocumentStatusSchema,
     DocumentTitleRead,
     DocumentTitleSearchResponse,
     DocumentUpdate,
@@ -271,6 +272,14 @@ async def read_documents(
             if doc.created_by:
                 created_by_name = doc.created_by.display_name or doc.created_by.email
             
+            # Parse status from JSONB
+            status_data = None
+            if hasattr(doc, 'status') and doc.status:
+                status_data = DocumentStatusSchema(
+                    state=doc.status.get("state", "ready"),
+                    reason=doc.status.get("reason"),
+                )
+            
             api_documents.append(
                 DocumentRead(
                     id=doc.id,
@@ -285,6 +294,7 @@ async def read_documents(
                     search_space_id=doc.search_space_id,
                     created_by_id=doc.created_by_id,
                     created_by_name=created_by_name,
+                    status=status_data,
                 )
             )
 
@@ -417,6 +427,14 @@ async def search_documents(
             if doc.created_by:
                 created_by_name = doc.created_by.display_name or doc.created_by.email
             
+            # Parse status from JSONB
+            status_data = None
+            if hasattr(doc, 'status') and doc.status:
+                status_data = DocumentStatusSchema(
+                    state=doc.status.get("state", "ready"),
+                    reason=doc.status.get("reason"),
+                )
+            
             api_documents.append(
                 DocumentRead(
                     id=doc.id,
@@ -431,6 +449,7 @@ async def search_documents(
                     search_space_id=doc.search_space_id,
                     created_by_id=doc.created_by_id,
                     created_by_name=created_by_name,
+                    status=status_data,
                 )
             )
 
@@ -806,6 +825,7 @@ async def delete_document(
     """
     Delete a document.
     Requires DOCUMENTS_DELETE permission for the search space.
+    Documents in "processing" state cannot be deleted.
     """
     try:
         result = await session.execute(
@@ -818,6 +838,14 @@ async def delete_document(
                 status_code=404, detail=f"Document with id {document_id} not found"
             )
 
+        # Check if document is pending or currently being processed
+        doc_state = document.status.get("state") if document.status else None
+        if doc_state in ("pending", "processing"):
+            raise HTTPException(
+                status_code=409,  # Conflict
+                detail="Cannot delete document while it is pending or being processed. Please wait for processing to complete.",
+            )
+
         # Check permission for the search space
         await check_permission(
             session,
diff --git a/surfsense_backend/app/schemas/__init__.py b/surfsense_backend/app/schemas/__init__.py
index 6c9577c46..e4296d501 100644
--- a/surfsense_backend/app/schemas/__init__.py
+++ b/surfsense_backend/app/schemas/__init__.py
@@ -4,6 +4,7 @@ from .documents import (
     DocumentBase,
     DocumentRead,
     DocumentsCreate,
+    DocumentStatusSchema,
     DocumentTitleRead,
     DocumentTitleSearchResponse,
     DocumentUpdate,
@@ -87,6 +88,7 @@ __all__ = [
     # Document schemas
     "DocumentBase",
     "DocumentRead",
+    "DocumentStatusSchema",
     "DocumentTitleRead",
     "DocumentTitleSearchResponse",
     "DocumentUpdate",
diff --git a/surfsense_backend/app/schemas/documents.py b/surfsense_backend/app/schemas/documents.py
index ad1907b90..7d85d0229 100644
--- a/surfsense_backend/app/schemas/documents.py
+++ b/surfsense_backend/app/schemas/documents.py
@@ -41,6 +41,12 @@ class DocumentUpdate(DocumentBase):
     pass
 
 
+class DocumentStatusSchema(BaseModel):
+    """Document processing status."""
+    state: str  # "ready", "processing", "failed"
+    reason: str | None = None
+
+
 class DocumentRead(BaseModel):
     id: int
     title: str
@@ -54,6 +60,7 @@ class DocumentRead(BaseModel):
     search_space_id: int
     created_by_id: UUID | None = None  # User who created/uploaded this document
     created_by_name: str | None = None  # Display name or email of the user who created this document
+    status: DocumentStatusSchema | None = None  # Processing status (ready, processing, failed)
 
     model_config = ConfigDict(from_attributes=True)
 
diff --git a/surfsense_backend/app/tasks/connector_indexers/base.py b/surfsense_backend/app/tasks/connector_indexers/base.py
index b390937f0..b5b4e5559 100644
--- a/surfsense_backend/app/tasks/connector_indexers/base.py
+++ b/surfsense_backend/app/tasks/connector_indexers/base.py
@@ -28,6 +28,34 @@ def get_current_timestamp() -> datetime:
     return datetime.now(UTC)
 
 
+def safe_set_chunks(document: Document, chunks: list) -> None:
+    """
+    Safely assign chunks to a document without triggering lazy loading.
+    
+    ALWAYS use this instead of `document.chunks = chunks` to avoid
+    SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
+    
+    Why this is needed:
+    - Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
+      load the OLD chunks first (for comparison/orphan detection)
+    - This lazy loading fails in async context with asyncpg driver
+    - set_committed_value bypasses this by setting the value directly
+    
+    This function is safe regardless of how the document was loaded
+    (with or without selectinload).
+    
+    Args:
+        document: The Document object to update
+        chunks: List of Chunk objects to assign
+    
+    Example:
+        # Instead of: document.chunks = chunks (DANGEROUS!)
+        safe_set_chunks(document, chunks)  # Always safe
+    """
+    from sqlalchemy.orm.attributes import set_committed_value
+    set_committed_value(document, 'chunks', chunks)
+
+
 async def check_duplicate_document_by_hash(
     session: AsyncSession, content_hash: str
 ) -> Document | None:
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index e79b4d104..db77c6472 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -1,7 +1,7 @@
 "use client";
 
 import { formatDistanceToNow } from "date-fns";
-import { Calendar, ChevronDown, ChevronUp, FileText, FileX, Loader2, Network, Plus, User } from "lucide-react";
+import { AlertCircle, Calendar, CheckCircle2, ChevronDown, ChevronUp, Clock, FileText, FileX, Loader2, Network, Plus, User } from "lucide-react";
 import { motion } from "motion/react";
 import { useTranslations } from "next-intl";
 import React, { useRef, useState, useEffect, useCallback } from "react";
@@ -17,6 +17,7 @@ import {
 	DialogTitle,
 } from "@/components/ui/dialog";
 import { Skeleton } from "@/components/ui/skeleton";
+import { Spinner } from "@/components/ui/spinner";
 import {
 	Table,
 	TableBody,
@@ -29,7 +30,61 @@ import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip
 import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { DocumentTypeChip } from "./DocumentTypeIcon";
 import { RowActions } from "./RowActions";
-import type { ColumnVisibility, Document } from "./types";
+import type { ColumnVisibility, Document, DocumentStatus } from "./types";
+
+// Status indicator component for document processing status
+function StatusIndicator({ status }: { status?: DocumentStatus }) {
+	const state = status?.state ?? "ready";
+	
+	switch (state) {
+		case "pending":
+			return (
+				<Tooltip>
+					<TooltipTrigger asChild>
+						<div className="flex items-center justify-center">
+							<Clock className="h-5 w-5 text-muted-foreground" />
+						</div>
+					</TooltipTrigger>
+					<TooltipContent side="top">Pending - waiting to be processed</TooltipContent>
+				</Tooltip>
+			);
+		case "processing":
+			return (
+				<Tooltip>
+					<TooltipTrigger asChild>
+						<div className="flex items-center justify-center">
+							<Spinner size="sm" className="text-primary" />
+						</div>
+					</TooltipTrigger>
+					<TooltipContent side="top">Processing...</TooltipContent>
+				</Tooltip>
+			);
+		case "failed":
+			return (
+				<Tooltip>
+					<TooltipTrigger asChild>
+						<div className="flex items-center justify-center">
+							<AlertCircle className="h-5 w-5 text-destructive" />
+						</div>
+					</TooltipTrigger>
+					<TooltipContent side="top" className="max-w-xs">
+						{status?.reason || "Processing failed"}
+					</TooltipContent>
+				</Tooltip>
+			);
+		case "ready":
+			return (
+				<Tooltip>
+					<TooltipTrigger asChild>
+						<div className="flex items-center justify-center">
+							<CheckCircle2 className="h-5 w-5 text-muted-foreground/60" />
+						</div>
+					</TooltipTrigger>
+					<TooltipContent side="top">Ready</TooltipContent>
+				</Tooltip>
+			);
+	}
+}
 
 export type SortKey = keyof Pick<Document, "title" | "document_type" | "created_at">;
 
@@ -460,7 +515,7 @@ export function DocumentsTableShell({
 										</TableHead>
 									)}
 									{columnVisibility.created_at && (
-										<TableHead className="w-32">
+										<TableHead className="w-32 border-r border-border/40">
 											<SortableHeader
 												sortKey="created_at"
 												currentSortKey={sortKey}
@@ -472,6 +527,13 @@ export function DocumentsTableShell({
 											</SortableHeader>
 										</TableHead>
 									)}
+									{columnVisibility.status && (
+										<TableHead className="w-20 text-center">
+											<span className="text-sm font-medium text-muted-foreground/70">
+												Status
+											</span>
+										</TableHead>
+									)}
 									<TableHead className="w-10">
 										<span className="sr-only">Actions</span>
 									</TableHead>
@@ -552,7 +614,7 @@ export function DocumentsTableShell({
 													</TableCell>
 												)}
 												{columnVisibility.created_at && (
-													<TableCell className="w-32 py-2.5 text-sm text-foreground">
+													<TableCell className="w-32 py-2.5 text-sm text-foreground border-r border-border/40">
 														<Tooltip>
 															<TooltipTrigger asChild>
 																<span className="cursor-default">{formatRelativeDate(doc.created_at)}</span>
@@ -563,6 +625,11 @@ export function DocumentsTableShell({
 														</Tooltip>
 													</TableCell>
 												)}
+												{columnVisibility.status && (
+													<TableCell className="w-20 py-2.5 text-center">
+														<StatusIndicator status={doc.status} />
+													</TableCell>
+												)}
 												<TableCell className="w-10 py-2.5 text-center">
 													<RowActions
 														document={doc}
@@ -647,11 +714,14 @@ export function DocumentsTableShell({
 												)}
 											</div>
 										</div>
-										<RowActions
-											document={doc}
-											deleteDocument={deleteDocument}
-											searchSpaceId={searchSpaceId}
-										/>
+										<div className="flex items-center gap-2">
+											{columnVisibility.status && <StatusIndicator status={doc.status} />}
+											<RowActions
+												document={doc}
+												deleteDocument={deleteDocument}
+												searchSpaceId={searchSpaceId}
+											/>
+										</div>
 									</div>
 								</motion.div>
 							);
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
index 88077581c..96049baf5 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
@@ -45,10 +45,17 @@ export function RowActions({
 		document.document_type as (typeof EDITABLE_DOCUMENT_TYPES)[number]
 	);
 
-	const isDeletable = !NON_DELETABLE_DOCUMENT_TYPES.includes(
+	// Documents in "pending" or "processing" state should show disabled delete
+	const isBeingProcessed = document.status?.state === "pending" || document.status?.state === "processing";
+
+	// SURFSENSE_DOCS are system-managed and should not show delete at all
+	const shouldShowDelete = !NON_DELETABLE_DOCUMENT_TYPES.includes(
 		document.document_type as (typeof NON_DELETABLE_DOCUMENT_TYPES)[number]
 	);
 
+	// Delete is disabled while processing
+	const isDeleteDisabled = isBeingProcessed;
+
 	const handleDelete = async () => {
 		setIsDeleting(true);
 		try {
@@ -87,10 +94,11 @@ export function RowActions({
 								<Pencil className="mr-2 h-4 w-4" />
 								<span>Edit</span>
 							</DropdownMenuItem>
-							{isDeletable && (
+							{shouldShowDelete && (
 								<DropdownMenuItem
-									onClick={() => setIsDeleteOpen(true)}
-									className="text-destructive focus:text-destructive"
+									onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
+									disabled={isDeleteDisabled}
+									className={isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : "text-destructive focus:text-destructive"}
 								>
 									<Trash2 className="mr-2 h-4 w-4" />
 									<span>Delete</span>
@@ -100,13 +108,13 @@ export function RowActions({
 					</DropdownMenu>
 				) : (
 					// Non-editable documents: show only delete button directly
-					isDeletable && (
+					shouldShowDelete && (
 						<Button
 							variant="ghost"
 							size="icon"
-							className="h-8 w-8 text-muted-foreground hover:text-destructive hover:bg-destructive/10"
-							onClick={() => setIsDeleteOpen(true)}
-							disabled={isDeleting}
+							className={`h-8 w-8 ${isDeleteDisabled ? "text-muted-foreground/50 cursor-not-allowed" : "text-muted-foreground hover:text-destructive hover:bg-destructive/10"}`}
+							onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
+							disabled={isDeleting || isDeleteDisabled}
 						>
 							<Trash2 className="h-4 w-4" />
 							<span className="sr-only">Delete</span>
@@ -131,10 +139,11 @@ export function RowActions({
 								<Pencil className="mr-2 h-4 w-4" />
 								<span>Edit</span>
 							</DropdownMenuItem>
-							{isDeletable && (
+							{shouldShowDelete && (
 								<DropdownMenuItem
-									onClick={() => setIsDeleteOpen(true)}
-									className="text-destructive focus:text-destructive"
+									onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
+									disabled={isDeleteDisabled}
+									className={isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : "text-destructive focus:text-destructive"}
 								>
 									<Trash2 className="mr-2 h-4 w-4" />
 									<span>Delete</span>
@@ -144,13 +153,13 @@ export function RowActions({
 					</DropdownMenu>
 				) : (
 					// Non-editable documents: show only delete button directly
-					isDeletable && (
+					shouldShowDelete && (
 						<Button
 							variant="ghost"
 							size="icon"
-							className="h-8 w-8 text-muted-foreground hover:text-destructive hover:bg-destructive/10"
-							onClick={() => setIsDeleteOpen(true)}
-							disabled={isDeleting}
+							className={`h-8 w-8 ${isDeleteDisabled ? "text-muted-foreground/50 cursor-not-allowed" : "text-muted-foreground hover:text-destructive hover:bg-destructive/10"}`}
+							onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
+							disabled={isDeleting || isDeleteDisabled}
 						>
 							<Trash2 className="h-4 w-4" />
 							<span className="sr-only">Delete</span>
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts
index 5485be0ef..9dcf0ef00 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/types.ts
@@ -1,5 +1,10 @@
 export type DocumentType = string;
 
+export type DocumentStatus = {
+	state: "ready" | "pending" | "processing" | "failed";
+	reason?: string;
+};
+
 export type Document = {
 	id: number;
 	title: string;
@@ -11,10 +16,12 @@ export type Document = {
 	search_space_id: number;
 	created_by_id?: string | null;
 	created_by_name?: string | null;
+	status?: DocumentStatus;
 };
 
 export type ColumnVisibility = {
 	document_type: boolean;
 	created_by: boolean;
 	created_at: boolean;
+	status: boolean;
 };
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
index ab92d1b94..babf4589d 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@@ -38,6 +38,7 @@ export default function DocumentsTable() {
 		document_type: true,
 		created_by: true,
 		created_at: true,
+		status: true,
 	});
 	const [pageIndex, setPageIndex] = useState(0);
 	const [sortKey, setSortKey] = useState<SortKey>("created_at");
@@ -115,6 +116,7 @@ export default function DocumentsTable() {
 				created_by_id: item.created_by_id ?? null,
 				created_by_name: item.created_by_name ?? null,
 				created_at: item.created_at,
+				status: (item as { status?: { state: "ready" | "pending" | "processing" | "failed"; reason?: string } }).status ?? { state: "ready" as const },
 			}))
 		: paginatedRealtimeDocuments;
 
@@ -159,10 +161,35 @@ export default function DocumentsTable() {
 			toast.error(t("no_rows_selected"));
 			return;
 		}
+
+		// Filter out pending/processing documents - they cannot be deleted
+		// For real-time mode, use sortedRealtimeDocuments (which has status)
+		// For search mode, use searchResponse items (need to safely access status)
+		const allDocs = isSearchMode 
+			? (searchResponse?.items || []).map(item => ({
+				id: item.id,
+				status: (item as { status?: { state: string } }).status,
+			}))
+			: sortedRealtimeDocuments.map(doc => ({ id: doc.id, status: doc.status }));
+		
+		const selectedDocs = allDocs.filter((doc) => selectedIds.has(doc.id));
+		const deletableIds = selectedDocs
+			.filter((doc) => doc.status?.state !== "pending" && doc.status?.state !== "processing")
+			.map((doc) => doc.id);
+		const inProgressCount = selectedIds.size - deletableIds.length;
+
+		if (inProgressCount > 0) {
+			toast.warning(`${inProgressCount} document(s) are pending or processing and cannot be deleted.`);
+		}
+
+		if (deletableIds.length === 0) {
+			return;
+		}
+
 		try {
 			// Delete documents one by one using the mutation
 			const results = await Promise.all(
-				Array.from(selectedIds).map(async (id) => {
+				deletableIds.map(async (id) => {
 					try {
 						await deleteDocumentMutation({ id });
 						return true;
@@ -172,7 +199,7 @@ export default function DocumentsTable() {
 				})
 			);
 			const okCount = results.filter((r) => r === true).length;
-			if (okCount === selectedIds.size)
+			if (okCount === deletableIds.length)
 				toast.success(t("delete_success_count", { count: okCount }));
 			else toast.error(t("delete_partial_failed"));
 			
diff --git a/surfsense_web/hooks/use-documents.ts b/surfsense_web/hooks/use-documents.ts
index 99fb99e13..442c836b2 100644
--- a/surfsense_web/hooks/use-documents.ts
+++ b/surfsense_web/hooks/use-documents.ts
@@ -9,6 +9,12 @@ import { useElectricClient } from "@/lib/electric/context";
 // Stable empty array to prevent infinite re-renders when no typeFilter is provided
 const EMPTY_TYPE_FILTER: DocumentTypeEnum[] = [];
 
+// Document status type (matches backend DocumentStatus JSONB)
+export interface DocumentStatusType {
+	state: "ready" | "pending" | "processing" | "failed";
+	reason?: string;
+}
+
 // Document from Electric sync (lightweight table columns - NO content/metadata)
 interface DocumentElectric {
 	id: number;
@@ -17,6 +23,7 @@ interface DocumentElectric {
 	title: string;
 	created_by_id: string | null;
 	created_at: string;
+	status: DocumentStatusType | null;
 }
 
 // Document for display (with resolved user name)
@@ -28,6 +35,7 @@ export interface DocumentDisplay {
 	created_by_id: string | null;
 	created_by_name: string | null;
 	created_at: string;
+	status: DocumentStatusType;
 }
 
 /**
@@ -117,6 +125,7 @@ export function useDocuments(
 			created_by_id?: string | null;
 			created_by_name?: string | null;
 			created_at: string;
+			status?: DocumentStatusType | null;
 		}): DocumentDisplay => ({
 			id: item.id,
 			search_space_id: item.search_space_id,
@@ -125,6 +134,7 @@ export function useDocuments(
 			created_by_id: item.created_by_id ?? null,
 			created_by_name: item.created_by_name ?? null,
 			created_at: item.created_at,
+			status: item.status ?? { state: "ready" },
 		}),
 		[]
 	);
@@ -136,6 +146,7 @@ export function useDocuments(
 			created_by_name: doc.created_by_id
 				? userCacheRef.current.get(doc.created_by_id) ?? null
 				: null,
+			status: doc.status ?? { state: "ready" },
 		}),
 		[]
 	);
@@ -221,7 +232,7 @@ export function useDocuments(
 				const handle = await client.syncShape({
 					table: "documents",
 					where: `search_space_id = ${spaceId}`,
-					columns: ["id", "document_type", "search_space_id", "title", "created_by_id", "created_at"],
+					columns: ["id", "document_type", "search_space_id", "title", "created_by_id", "created_at", "status"],
 					primaryKey: ["id"],
 				});
 
@@ -259,7 +270,7 @@ export function useDocuments(
 					return;
 				}
 
-				let query = `SELECT id, document_type, search_space_id, title, created_by_id, created_at
+				let query = `SELECT id, document_type, search_space_id, title, created_by_id, created_at, status
 					FROM documents 
 					WHERE search_space_id = $1`;
 
diff --git a/surfsense_web/lib/electric/client.ts b/surfsense_web/lib/electric/client.ts
index 788a9444d..3fa4586ac 100644
--- a/surfsense_web/lib/electric/client.ts
+++ b/surfsense_web/lib/electric/client.ts
@@ -72,7 +72,9 @@ const pendingSyncs = new Map<string, Promise<SyncHandle>>();
 //     - fixed getSyncCutoffDate to use stable midnight UTC timestamps
 // v6: real-time documents table - added title and created_by_id columns for live document display
 // v7: removed use-documents-electric.ts - consolidated to single documents sync to prevent conflicts
-const SYNC_VERSION = 7;
+// v8: added status column for real-time document processing status (ready/processing/failed)
+// v9: added pending state for accurate document queue visibility
+const SYNC_VERSION = 11;
 
 // Database name prefix for identifying SurfSense databases
 const DB_PREFIX = "surfsense-";
@@ -245,12 +247,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 					document_type TEXT NOT NULL,
 					title TEXT NOT NULL DEFAULT '',
 					created_by_id TEXT,
-					created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+					created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+					status JSONB DEFAULT '{"state": "ready"}'::jsonb
 				);
 				
 				CREATE INDEX IF NOT EXISTS idx_documents_search_space_id ON documents(search_space_id);
 				CREATE INDEX IF NOT EXISTS idx_documents_type ON documents(document_type);
 				CREATE INDEX IF NOT EXISTS idx_documents_search_space_type ON documents(search_space_id, document_type);
+				CREATE INDEX IF NOT EXISTS idx_documents_status ON documents((status->>'state'));
 			`);
 
 			await db.exec(`

From 6cd3f5c1f6f89a110b092918f5a84df0a08aa4f1 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Thu, 5 Feb 2026 22:16:23 +0530
Subject: [PATCH 16/36] feat: add conflict handling for document deletion and
 selection based on processing state

---
 surfsense_backend/app/routes/notes_routes.py  |  8 +++++
 .../components/DocumentsTableShell.tsx        | 31 +++++++++++++------
 .../(manage)/components/RowActions.tsx        | 11 +++++--
 .../documents/(manage)/page.tsx               | 15 +++++++--
 4 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/surfsense_backend/app/routes/notes_routes.py b/surfsense_backend/app/routes/notes_routes.py
index 928cd462a..47cf96d04 100644
--- a/surfsense_backend/app/routes/notes_routes.py
+++ b/surfsense_backend/app/routes/notes_routes.py
@@ -230,6 +230,14 @@ async def delete_note(
     if not document:
         raise HTTPException(status_code=404, detail="Note not found")
 
+    # Check if note is pending or currently being processed
+    doc_state = document.status.get("state") if document.status else None
+    if doc_state in ("pending", "processing"):
+        raise HTTPException(
+            status_code=409,
+            detail="Cannot delete note while it is pending or being processed. Please wait for processing to complete.",
+        )
+
     # Delete document (chunks will be cascade deleted)
     await session.delete(document)
     await session.commit()
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index db77c6472..0bd8189b8 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -294,13 +294,22 @@ export function DocumentsTableShell({
 		[documents, sortKey, sortDesc]
 	);
 
-	const allSelectedOnPage = sorted.length > 0 && sorted.every((d) => selectedIds.has(d.id));
-	const someSelectedOnPage = sorted.some((d) => selectedIds.has(d.id)) && !allSelectedOnPage;
+	// Helper: check if document can be selected (not processing/pending)
+	const isSelectable = (doc: Document) => {
+		const state = doc.status?.state;
+		return state !== "pending" && state !== "processing";
+	};
+
+	// Only consider selectable documents for "select all" logic
+	const selectableDocs = sorted.filter(isSelectable);
+	const allSelectedOnPage = selectableDocs.length > 0 && selectableDocs.every((d) => selectedIds.has(d.id));
+	const someSelectedOnPage = selectableDocs.some((d) => selectedIds.has(d.id)) && !allSelectedOnPage;
 
 	const toggleAll = (checked: boolean) => {
 		const next = new Set(selectedIds);
 		if (checked)
-			sorted.forEach((d) => {
+			// Only select documents that are not processing/pending
+			selectableDocs.forEach((d) => {
 				next.add(d.id);
 			});
 		else
@@ -547,6 +556,7 @@ export function DocumentsTableShell({
 									{sorted.map((doc, index) => {
 										const title = doc.title;
 										const isSelected = selectedIds.has(doc.id);
+										const canSelect = isSelectable(doc);
 										return (
 											<motion.tr
 												key={doc.id}
@@ -568,9 +578,10 @@ export function DocumentsTableShell({
 													<div className="flex items-center justify-center h-full">
 														<Checkbox
 															checked={isSelected}
-															onCheckedChange={(v) => toggleOne(doc.id, !!v)}
-															aria-label="Select row"
-															className="border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary"
+															onCheckedChange={(v) => canSelect && toggleOne(doc.id, !!v)}
+															disabled={!canSelect}
+															aria-label={canSelect ? "Select row" : "Cannot select while processing"}
+															className={`border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary ${!canSelect ? "opacity-40 cursor-not-allowed" : ""}`}
 														/>
 													</div>
 												</TableCell>
@@ -649,6 +660,7 @@ export function DocumentsTableShell({
 					<div className="md:hidden divide-y divide-border/40 h-[50vh] overflow-auto">
 						{sorted.map((doc, index) => {
 							const isSelected = selectedIds.has(doc.id);
+							const canSelect = isSelectable(doc);
 							return (
 								<motion.div
 									key={doc.id}
@@ -661,9 +673,10 @@ export function DocumentsTableShell({
 									<div className="flex items-center gap-3">
 										<Checkbox
 											checked={isSelected}
-											onCheckedChange={(v) => toggleOne(doc.id, !!v)}
-											aria-label="Select row"
-											className="border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary"
+											onCheckedChange={(v) => canSelect && toggleOne(doc.id, !!v)}
+											disabled={!canSelect}
+											aria-label={canSelect ? "Select row" : "Cannot select while processing"}
+											className={`border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary ${!canSelect ? "opacity-40 cursor-not-allowed" : ""}`}
 										/>
 										<div className="flex-1 min-w-0 space-y-1.5">
 											<button
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
index 96049baf5..867fdc916 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
@@ -63,9 +63,16 @@ export function RowActions({
 			if (!ok) toast.error("Failed to delete document");
 			// Note: Success toast is handled by the mutation atom's onSuccess callback
 			// Cache is updated optimistically by the mutation, no need to refresh
-		} catch (error) {
+		} catch (error: unknown) {
 			console.error("Error deleting document:", error);
-			toast.error("Failed to delete document");
+			// Check for 409 Conflict (document started processing after UI loaded)
+			const status = (error as { response?: { status?: number } })?.response?.status 
+				?? (error as { status?: number })?.status;
+			if (status === 409) {
+				toast.error("Document is now being processed. Please try again later.");
+			} else {
+				toast.error("Failed to delete document");
+			}
 		} finally {
 			setIsDeleting(false);
 			setIsDeleteOpen(false);
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
index babf4589d..2c515ff77 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@@ -188,20 +188,29 @@ export default function DocumentsTable() {
 
 		try {
 			// Delete documents one by one using the mutation
+			// Track 409 conflicts separately (document started processing after UI loaded)
+			let conflictCount = 0;
 			const results = await Promise.all(
 				deletableIds.map(async (id) => {
 					try {
 						await deleteDocumentMutation({ id });
 						return true;
-					} catch {
+					} catch (error: unknown) {
+						const status = (error as { response?: { status?: number } })?.response?.status 
+							?? (error as { status?: number })?.status;
+						if (status === 409) conflictCount++;
 						return false;
 					}
 				})
 			);
 			const okCount = results.filter((r) => r === true).length;
-			if (okCount === deletableIds.length)
+			if (okCount === deletableIds.length) {
 				toast.success(t("delete_success_count", { count: okCount }));
-			else toast.error(t("delete_partial_failed"));
+			} else if (conflictCount > 0) {
+				toast.error(`${conflictCount} document(s) started processing. Please try again later.`);
+			} else {
+				toast.error(t("delete_partial_failed"));
+			}
 			
 			// If in search mode, refetch search results to reflect deletion
 			if (isSearchMode) {

From 5042fbfb852773edd58c5ead37854a94c3dc661c Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Thu, 5 Feb 2026 22:59:56 +0530
Subject: [PATCH 17/36] feat: enhance Gmail and Google Drive connectors with
 document status management and duplicate content checks

---
 .../connectors/composio_gmail_connector.py    | 482 ++++++------
 .../composio_google_drive_connector.py        | 687 +++++++++++-------
 .../components/DocumentsTableShell.tsx        |   2 +-
 .../(manage)/components/RowActions.tsx        |   4 +-
 4 files changed, 708 insertions(+), 467 deletions(-)

diff --git a/surfsense_backend/app/connectors/composio_gmail_connector.py b/surfsense_backend/app/connectors/composio_gmail_connector.py
index 05395bfba..870053c7f 100644
--- a/surfsense_backend/app/connectors/composio_gmail_connector.py
+++ b/surfsense_backend/app/connectors/composio_gmail_connector.py
@@ -16,11 +16,15 @@ from sqlalchemy.orm import selectinload
 
 from app.config import config
 from app.connectors.composio_connector import ComposioConnector
-from app.db import Document, DocumentType
+from app.db import Document, DocumentStatus, DocumentType
 from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
-from app.tasks.connector_indexers.base import calculate_date_range
+from app.tasks.connector_indexers.base import (
+    calculate_date_range,
+    check_duplicate_document_by_hash,
+    safe_set_chunks,
+)
 from app.utils.document_converters import (
     create_document_chunks,
     generate_content_hash,
@@ -206,26 +210,24 @@ class ComposioGmailConnector(ComposioConnector):
 # ============ Indexer Functions ============
 
 
-async def _process_gmail_message_batch(
+async def _analyze_gmail_messages_phase1(
     session: AsyncSession,
     messages: list[dict[str, Any]],
     composio_connector: ComposioGmailConnector,
     connector_id: int,
     search_space_id: int,
     user_id: str,
-    total_documents_indexed: int = 0,
-) -> tuple[int, int]:
+) -> tuple[list[dict[str, Any]], int, int]:
     """
-    Process a batch of Gmail messages and index them.
-
-    Args:
-        total_documents_indexed: Running total of documents indexed so far (for batch commits).
+    Phase 1: Analyze all messages, create pending documents.
+    Makes ALL documents visible in the UI immediately with pending status.
 
     Returns:
-        Tuple of (documents_indexed, documents_skipped)
+        Tuple of (messages_to_process, documents_skipped, duplicate_content_count)
     """
-    documents_indexed = 0
+    messages_to_process = []
     documents_skipped = 0
+    duplicate_content_count = 0
 
     for message in messages:
         try:
@@ -235,11 +237,7 @@ async def _process_gmail_message_batch(
                 documents_skipped += 1
                 continue
 
-            # Composio's GMAIL_FETCH_EMAILS already returns full message content
-            # No need for a separate detail API call
-
             # Extract message info from Composio response
-            # Composio structure: messageId, messageText, messageTimestamp, payload.headers, labelIds
             payload = message.get("payload", {})
             headers = payload.get("headers", [])
 
@@ -262,7 +260,7 @@ async def _process_gmail_message_batch(
                 message
             )
 
-            # Check for empty content (defensive parsing per Composio best practices)
+            # Check for empty content
             if not markdown_content.strip():
                 logger.warning(f"Skipping Gmail message with no content: {subject}")
                 documents_skipped += 1
@@ -280,99 +278,51 @@ async def _process_gmail_message_batch(
                 session, unique_identifier_hash
             )
 
-            # Get label IDs from Composio response
+            # Get label IDs and thread_id from Composio response
             label_ids = message.get("labelIds", [])
-            # Extract thread_id if available (for consistency with non-Composio implementation)
             thread_id = message.get("threadId", "") or message.get("thread_id", "")
 
             if existing_document:
                 if existing_document.content_hash == content_hash:
+                    # Ensure status is ready (might have been stuck in processing/pending)
+                    if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        existing_document.status = DocumentStatus.ready()
                     documents_skipped += 1
                     continue
 
-                # Update existing
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "message_id": message_id,
-                        "thread_id": thread_id,
-                        "subject": subject,
-                        "sender": sender,
-                        "document_type": "Gmail Message (Composio)",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        markdown_content, user_llm, document_metadata
-                    )
-                else:
-                    summary_content = (
-                        f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
-                    )
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                chunks = await create_document_chunks(markdown_content)
-
-                existing_document.title = subject
-                existing_document.content = summary_content
-                existing_document.content_hash = content_hash
-                existing_document.embedding = summary_embedding
-                existing_document.document_metadata = {
-                    "message_id": message_id,
-                    "thread_id": thread_id,
-                    "subject": subject,
-                    "sender": sender,
-                    "date": date_str,
-                    "labels": label_ids,
-                    "connector_id": connector_id,
-                    "source": "composio",
-                }
-                existing_document.chunks = chunks
-                existing_document.updated_at = get_current_timestamp()
-
-                documents_indexed += 1
-
-                # Batch commit every 10 documents
-                current_total = total_documents_indexed + documents_indexed
-                if current_total % 10 == 0:
-                    logger.info(
-                        f"Committing batch: {current_total} Gmail messages processed so far"
-                    )
-                    await session.commit()
+                # Queue existing document for update (will be set to processing in Phase 2)
+                messages_to_process.append({
+                    'document': existing_document,
+                    'is_new': False,
+                    'markdown_content': markdown_content,
+                    'content_hash': content_hash,
+                    'message_id': message_id,
+                    'thread_id': thread_id,
+                    'subject': subject,
+                    'sender': sender,
+                    'date_str': date_str,
+                    'label_ids': label_ids,
+                })
                 continue
 
-            # Create new document
-            user_llm = await get_user_long_context_llm(
-                session, user_id, search_space_id
-            )
-
-            if user_llm:
-                document_metadata = {
-                    "message_id": message_id,
-                    "thread_id": thread_id,
-                    "subject": subject,
-                    "sender": sender,
-                    "document_type": "Gmail Message (Composio)",
-                }
-                summary_content, summary_embedding = await generate_document_summary(
-                    markdown_content, user_llm, document_metadata
-                )
-            else:
-                summary_content = (
-                    f"Gmail: {subject}\n\nFrom: {sender}\nDate: {date_str}"
-                )
-                summary_embedding = config.embedding_model_instance.embed(
-                    summary_content
+            # Document doesn't exist by unique_identifier_hash
+            # Check if a document with the same content_hash exists (from standard connector)
+            with session.no_autoflush:
+                duplicate_by_content = await check_duplicate_document_by_hash(
+                    session, content_hash
                 )
 
-            chunks = await create_document_chunks(markdown_content)
+            if duplicate_by_content:
+                logger.info(
+                    f"Message {subject} already indexed by another connector "
+                    f"(existing document ID: {duplicate_by_content.id}, "
+                    f"type: {duplicate_by_content.document_type}). Skipping."
+                )
+                duplicate_content_count += 1
+                documents_skipped += 1
+                continue
 
+            # Create new document with PENDING status (visible in UI immediately)
             document = Document(
                 search_space_id=search_space_id,
                 title=subject,
@@ -388,39 +338,138 @@ async def _process_gmail_message_batch(
                     "toolkit_id": "gmail",
                     "source": "composio",
                 },
-                content=summary_content,
-                content_hash=content_hash,
+                content="Pending...",  # Placeholder until processed
+                content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                 unique_identifier_hash=unique_identifier_hash,
-                embedding=summary_embedding,
-                chunks=chunks,
+                embedding=None,
+                chunks=[],  # Empty at creation - safe for async
+                status=DocumentStatus.pending(),  # Pending until processing starts
                 updated_at=get_current_timestamp(),
                 created_by_id=user_id,
                 connector_id=connector_id,
             )
             session.add(document)
+
+            messages_to_process.append({
+                'document': document,
+                'is_new': True,
+                'markdown_content': markdown_content,
+                'content_hash': content_hash,
+                'message_id': message_id,
+                'thread_id': thread_id,
+                'subject': subject,
+                'sender': sender,
+                'date_str': date_str,
+                'label_ids': label_ids,
+            })
+
+        except Exception as e:
+            logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
+            documents_skipped += 1
+            continue
+
+    return messages_to_process, documents_skipped, duplicate_content_count
+
+
+async def _process_gmail_messages_phase2(
+    session: AsyncSession,
+    messages_to_process: list[dict[str, Any]],
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    on_heartbeat_callback: HeartbeatCallbackType | None = None,
+) -> tuple[int, int]:
+    """
+    Phase 2: Process each document one by one.
+    Each document transitions: pending → processing → ready/failed
+
+    Returns:
+        Tuple of (documents_indexed, documents_failed)
+    """
+    documents_indexed = 0
+    documents_failed = 0
+    last_heartbeat_time = time.time()
+
+    for item in messages_to_process:
+        # Send heartbeat periodically
+        if on_heartbeat_callback:
+            current_time = time.time()
+            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = current_time
+
+        document = item['document']
+        try:
+            # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+            document.status = DocumentStatus.processing()
+            await session.commit()
+
+            # Heavy processing (LLM, embeddings, chunks)
+            user_llm = await get_user_long_context_llm(
+                session, user_id, search_space_id
+            )
+
+            if user_llm:
+                document_metadata_for_summary = {
+                    "message_id": item['message_id'],
+                    "thread_id": item['thread_id'],
+                    "subject": item['subject'],
+                    "sender": item['sender'],
+                    "document_type": "Gmail Message (Composio)",
+                }
+                summary_content, summary_embedding = await generate_document_summary(
+                    item['markdown_content'], user_llm, document_metadata_for_summary
+                )
+            else:
+                summary_content = (
+                    f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}"
+                )
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+            chunks = await create_document_chunks(item['markdown_content'])
+
+            # Update document to READY with actual content
+            document.title = item['subject']
+            document.content = summary_content
+            document.content_hash = item['content_hash']
+            document.embedding = summary_embedding
+            document.document_metadata = {
+                "message_id": item['message_id'],
+                "thread_id": item['thread_id'],
+                "subject": item['subject'],
+                "sender": item['sender'],
+                "date": item['date_str'],
+                "labels": item['label_ids'],
+                "connector_id": connector_id,
+                "source": "composio",
+            }
+            safe_set_chunks(document, chunks)
+            document.updated_at = get_current_timestamp()
+            document.status = DocumentStatus.ready()
+
             documents_indexed += 1
 
-            # Batch commit every 10 documents
-            current_total = total_documents_indexed + documents_indexed
-            if current_total % 10 == 0:
+            # Batch commit every 10 documents (for ready status updates)
+            if documents_indexed % 10 == 0:
                 logger.info(
-                    f"Committing batch: {current_total} Gmail messages processed so far"
+                    f"Committing batch: {documents_indexed} Gmail messages processed so far"
                 )
                 await session.commit()
 
         except Exception as e:
             logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
-            documents_skipped += 1
-            # Rollback on error to avoid partial state (per Composio best practices)
+            # Mark document as failed with reason (visible in UI)
             try:
-                await session.rollback()
-            except Exception as rollback_error:
-                logger.error(
-                    f"Error during rollback: {rollback_error!s}", exc_info=True
-                )
+                document.status = DocumentStatus.failed(str(e))
+                document.updated_at = get_current_timestamp()
+            except Exception as status_error:
+                logger.error(f"Failed to update document status to failed: {status_error}")
+            documents_failed += 1
             continue
 
-    return documents_indexed, documents_skipped
+    return documents_indexed, documents_failed
 
 
 async def index_composio_gmail(
@@ -437,7 +486,7 @@ async def index_composio_gmail(
     max_items: int = 1000,
     on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str]:
-    """Index Gmail messages via Composio with pagination and incremental processing."""
+    """Index Gmail messages via Composio with real-time document status updates."""
     try:
         composio_connector = ComposioGmailConnector(session, connector_id)
 
@@ -448,14 +497,10 @@ async def index_composio_gmail(
             end_date = None
 
         # Use provided dates directly if both are provided, otherwise calculate from last_indexed_at
-        # This ensures user-selected dates are respected (matching non-Composio Gmail connector behavior)
         if start_date is not None and end_date is not None:
-            # User provided both dates - use them directly
             start_date_str = start_date
             end_date_str = end_date
         else:
-            # Calculate date range with defaults (uses last_indexed_at or 365 days back)
-            # This ensures indexing works even when user doesn't specify dates
             start_date_str, end_date_str = calculate_date_range(
                 connector, start_date, end_date, default_days_back=365
             )
@@ -473,48 +518,32 @@ async def index_composio_gmail(
             f"(start_date={start_date_str}, end_date={end_date_str})"
         )
 
-        # Use smaller batch size to avoid 413 payload too large errors
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Fetching Gmail messages via Composio for connector {connector_id}",
+            {"stage": "fetching_messages"},
+        )
+
+        # =======================================================================
+        # FETCH ALL MESSAGES FIRST
+        # =======================================================================
         batch_size = 50
         page_token = None
-        total_documents_indexed = 0
-        total_documents_skipped = 0
-        total_messages_fetched = 0
-        result_size_estimate = None  # Will be set from first API response
+        all_messages = []
+        result_size_estimate = None
         last_heartbeat_time = time.time()
 
-        while total_messages_fetched < max_items:
-            # Send heartbeat periodically to indicate task is still alive
+        while len(all_messages) < max_items:
+            # Send heartbeat periodically
             if on_heartbeat_callback:
                 current_time = time.time()
                 if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
-                    await on_heartbeat_callback(total_documents_indexed)
+                    await on_heartbeat_callback(len(all_messages))
                     last_heartbeat_time = current_time
 
-            # Calculate how many messages to fetch in this batch
-            remaining = max_items - total_messages_fetched
+            remaining = max_items - len(all_messages)
             current_batch_size = min(batch_size, remaining)
 
-            # Use result_size_estimate if available, otherwise fall back to max_items
-            estimated_total = (
-                result_size_estimate if result_size_estimate is not None else max_items
-            )
-            # Cap estimated_total at max_items to avoid showing misleading progress
-            estimated_total = min(estimated_total, max_items)
-
-            await task_logger.log_task_progress(
-                log_entry,
-                f"Fetching Gmail messages batch via Composio for connector {connector_id} "
-                f"({total_messages_fetched}/{estimated_total} fetched, {total_documents_indexed} indexed)",
-                {
-                    "stage": "fetching_messages",
-                    "batch_size": current_batch_size,
-                    "total_fetched": total_messages_fetched,
-                    "total_indexed": total_documents_indexed,
-                    "estimated_total": estimated_total,
-                },
-            )
-
-            # Fetch batch of messages
             (
                 messages,
                 next_token,
@@ -533,97 +562,136 @@ async def index_composio_gmail(
                 return 0, f"Failed to fetch Gmail messages: {error}"
 
             if not messages:
-                # No more messages available
                 break
 
-            # Update result_size_estimate from first response (Gmail provides this estimate)
             if result_size_estimate is None and result_size_estimate_batch is not None:
                 result_size_estimate = result_size_estimate_batch
                 logger.info(
                     f"Gmail API estimated {result_size_estimate} total messages for query: '{query}'"
                 )
 
-            total_messages_fetched += len(messages)
-            # Recalculate estimated_total after potentially updating result_size_estimate
-            estimated_total = (
-                result_size_estimate if result_size_estimate is not None else max_items
-            )
-            estimated_total = min(estimated_total, max_items)
+            all_messages.extend(messages)
+            logger.info(f"Fetched {len(messages)} messages (total: {len(all_messages)})")
 
-            logger.info(
-                f"Fetched batch of {len(messages)} Gmail messages "
-                f"(total: {total_messages_fetched}/{estimated_total})"
-            )
-
-            # Process batch incrementally
-            batch_indexed, batch_skipped = await _process_gmail_message_batch(
-                session=session,
-                messages=messages,
-                composio_connector=composio_connector,
-                connector_id=connector_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                total_documents_indexed=total_documents_indexed,
-            )
-
-            total_documents_indexed += batch_indexed
-            total_documents_skipped += batch_skipped
-
-            logger.info(
-                f"Processed batch: {batch_indexed} indexed, {batch_skipped} skipped "
-                f"(total: {total_documents_indexed} indexed, {total_documents_skipped} skipped)"
-            )
-
-            # Batch commits happen in _process_gmail_message_batch every 10 documents
-            # This ensures progress is saved incrementally, preventing data loss on crashes
-
-            # Check if we should continue
-            if not next_token:
-                # No more pages available
+            if not next_token or len(messages) < current_batch_size:
                 break
 
-            if len(messages) < current_batch_size:
-                # Last page had fewer items than requested, we're done
-                break
-
-            # Continue with next page
             page_token = next_token
 
-        if total_messages_fetched == 0:
+        if not all_messages:
             success_msg = "No Gmail messages found in the specified date range"
             await task_logger.log_task_success(
                 log_entry, success_msg, {"messages_count": 0}
             )
-            # CRITICAL: Update timestamp even when no messages found so Electric SQL syncs and UI shows indexed status
             await update_connector_last_indexed(session, connector, update_last_indexed)
             await session.commit()
-            return 0, None  # Return None (not error) when no items found
+            return (
+                0,
+                None,
+            )  # Return None (not error) when no items found - this is success with 0 items
 
-        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
-        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        logger.info(f"Found {len(all_messages)} Gmail messages to index via Composio")
+
+        # =======================================================================
+        # PHASE 1: Analyze all messages, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Phase 1: Creating pending documents for {len(all_messages)} messages",
+            {"stage": "phase1_pending"},
+        )
+
+        (
+            messages_to_process,
+            documents_skipped,
+            duplicate_content_count,
+        ) = await _analyze_gmail_messages_phase1(
+            session=session,
+            messages=all_messages,
+            composio_connector=composio_connector,
+            connector_id=connector_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+        )
+
+        # Commit all pending documents - they all appear in UI now
+        new_documents_count = len([m for m in messages_to_process if m['is_new']])
+        if new_documents_count > 0:
+            logger.info(f"Phase 1: Committing {new_documents_count} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(messages_to_process)} documents")
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Phase 2: Processing {len(messages_to_process)} documents",
+            {"stage": "phase2_processing"},
+        )
+
+        documents_indexed, documents_failed = await _process_gmail_messages_phase2(
+            session=session,
+            messages_to_process=messages_to_process,
+            connector_id=connector_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+            on_heartbeat_callback=on_heartbeat_callback,
+        )
+
+        # CRITICAL: Always update timestamp so Electric SQL syncs
         await update_connector_last_indexed(session, connector, update_last_indexed)
 
-        # Final commit to ensure all documents are persisted (safety net)
-        # This matches the pattern used in non-Composio Gmail indexer
+        # Final commit to ensure all documents are persisted
         logger.info(
-            f"Final commit: Total {total_documents_indexed} Gmail messages processed"
-        )
-        await session.commit()
-        logger.info(
-            "Successfully committed all Composio Gmail document changes to database"
+            f"Final commit: Total {documents_indexed} Gmail messages processed"
         )
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Composio Gmail document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
 
         await task_logger.log_task_success(
             log_entry,
             f"Successfully completed Gmail indexing via Composio for connector {connector_id}",
             {
-                "documents_indexed": total_documents_indexed,
-                "documents_skipped": total_documents_skipped,
-                "messages_fetched": total_messages_fetched,
+                "documents_indexed": documents_indexed,
+                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
             },
         )
 
-        return total_documents_indexed, None
+        logger.info(
+            f"Composio Gmail indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
+        )
+        return documents_indexed, warning_message
 
     except Exception as e:
         logger.error(f"Failed to index Gmail via Composio: {e!s}", exc_info=True)
diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py
index d7299fbfe..26cfd3020 100644
--- a/surfsense_backend/app/connectors/composio_google_drive_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py
@@ -21,10 +21,14 @@ from sqlalchemy.orm.attributes import flag_modified
 
 from app.config import config
 from app.connectors.composio_connector import ComposioConnector
-from app.db import Document, DocumentType, Log
+from app.db import Document, DocumentStatus, DocumentType, Log
 from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
+from app.tasks.connector_indexers.base import (
+    check_duplicate_document_by_hash,
+    safe_set_chunks,
+)
 from app.utils.document_converters import (
     create_document_chunks,
     generate_content_hash,
@@ -537,22 +541,6 @@ async def check_document_by_unique_identifier(
     return existing_doc_result.scalars().first()
 
 
-async def check_document_by_content_hash(
-    session: AsyncSession, content_hash: str
-) -> Document | None:
-    """Check if a document with the given content hash already exists.
-
-    This is used to prevent duplicate content from being indexed, regardless
-    of which connector originally indexed it.
-    """
-    from sqlalchemy.future import select
-
-    existing_doc_result = await session.execute(
-        select(Document).where(Document.content_hash == content_hash)
-    )
-    return existing_doc_result.scalars().first()
-
-
 async def check_document_by_google_drive_file_id(
     session: AsyncSession, file_id: str, search_space_id: int
 ) -> Document | None:
@@ -843,14 +831,16 @@ async def _index_composio_drive_delta_sync(
     log_entry,
     on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int, list[str]]:
-    """Index Google Drive files using delta sync (only changed files).
+    """Index Google Drive files using delta sync with real-time document status updates.
 
     Uses GOOGLEDRIVE_LIST_CHANGES to fetch only files that changed since last sync.
     Handles: new files, modified files, and deleted files.
     """
     documents_indexed = 0
     documents_skipped = 0
+    documents_failed = 0
     processing_errors = []
+    duplicate_content_count = 0
     last_heartbeat_time = time.time()
 
     # Fetch all changes with pagination
@@ -881,14 +871,13 @@ async def _index_composio_drive_delta_sync(
 
     logger.info(f"Processing {len(all_changes)} changes from delta sync")
 
-    for change in all_changes[:max_items]:
-        # Send heartbeat periodically to indicate task is still alive
-        if on_heartbeat_callback:
-            current_time = time.time()
-            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = current_time
+    # =======================================================================
+    # PHASE 1: Analyze all changes, handle deletions, create pending documents
+    # =======================================================================
+    files_to_process = []
+    new_documents_created = False
 
+    for change in all_changes[:max_items]:
         try:
             # Handle removed files
             is_removed = change.get("removed", False)
@@ -899,9 +888,8 @@ async def _index_composio_drive_delta_sync(
                 documents_skipped += 1
                 continue
 
-            # Check if file was trashed or removed
+            # Check if file was trashed or removed - handle deletions immediately
             if is_removed or file_info.get("trashed", False):
-                # Remove document from database
                 document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
                 unique_identifier_hash = generate_unique_identifier_hash(
                     document_type, f"drive_{file_id}", search_space_id
@@ -923,37 +911,219 @@ async def _index_composio_drive_delta_sync(
             if mime_type == "application/vnd.google-apps.folder":
                 continue
 
-            # Process the file
-            indexed, skipped, errors = await _process_single_drive_file(
-                session=session,
-                composio_connector=composio_connector,
-                file_id=file_id,
-                file_name=file_name,
-                mime_type=mime_type,
-                connector_id=connector_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                task_logger=task_logger,
-                log_entry=log_entry,
+            # Check for existing document by file ID (from any connector)
+            existing_by_file_id = await check_document_by_google_drive_file_id(
+                session, file_id, search_space_id
             )
 
-            documents_indexed += indexed
-            documents_skipped += skipped
-            processing_errors.extend(errors)
+            # Generate unique identifier hash
+            document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
+            unique_identifier_hash = generate_unique_identifier_hash(
+                document_type, f"drive_{file_id}", search_space_id
+            )
+
+            # Check if document exists by unique identifier
+            existing_document = await check_document_by_unique_identifier(
+                session, unique_identifier_hash
+            )
+
+            if existing_by_file_id and not existing_document:
+                # File already indexed by different connector - skip
+                logger.info(
+                    f"Skipping file {file_name} (file_id={file_id}): already indexed "
+                    f"by {existing_by_file_id.document_type.value}"
+                )
+                documents_skipped += 1
+                continue
+
+            if existing_document:
+                # Queue existing document for update
+                files_to_process.append({
+                    'document': existing_document,
+                    'is_new': False,
+                    'file_id': file_id,
+                    'file_name': file_name,
+                    'mime_type': mime_type,
+                })
+                continue
+
+            # Create new document with PENDING status
+            document = Document(
+                search_space_id=search_space_id,
+                title=file_name,
+                document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
+                document_metadata={
+                    "file_id": file_id,
+                    "file_name": file_name,
+                    "FILE_NAME": file_name,
+                    "mime_type": mime_type,
+                    "connector_id": connector_id,
+                    "toolkit_id": "googledrive",
+                    "source": "composio",
+                },
+                content="Pending...",
+                content_hash=unique_identifier_hash,
+                unique_identifier_hash=unique_identifier_hash,
+                embedding=None,
+                chunks=[],
+                status=DocumentStatus.pending(),
+                updated_at=get_current_timestamp(),
+                created_by_id=user_id,
+                connector_id=connector_id,
+            )
+            session.add(document)
+            new_documents_created = True
+
+            files_to_process.append({
+                'document': document,
+                'is_new': True,
+                'file_id': file_id,
+                'file_name': file_name,
+                'mime_type': mime_type,
+            })
+
+        except Exception as e:
+            logger.error(f"Error in Phase 1 for change: {e!s}", exc_info=True)
+            documents_skipped += 1
+            continue
+
+    # Commit all pending documents - they all appear in UI now
+    if new_documents_created:
+        logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents")
+        await session.commit()
+
+    # =======================================================================
+    # PHASE 2: Process each document one by one
+    # =======================================================================
+    logger.info(f"Phase 2: Processing {len(files_to_process)} documents")
+
+    for item in files_to_process:
+        # Send heartbeat periodically
+        if on_heartbeat_callback:
+            current_time = time.time()
+            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = current_time
+
+        document = item['document']
+        try:
+            # Set to PROCESSING and commit
+            document.status = DocumentStatus.processing()
+            await session.commit()
+
+            # Get file content
+            content, content_error = await composio_connector.get_drive_file_content(
+                item['file_id'], original_mime_type=item['mime_type']
+            )
+
+            if content_error or not content:
+                logger.warning(f"Could not get content for file {item['file_name']}: {content_error}")
+                markdown_content = f"# {item['file_name']}\n\n"
+                markdown_content += f"**File ID:** {item['file_id']}\n"
+                markdown_content += f"**Type:** {item['mime_type']}\n"
+            elif isinstance(content, dict):
+                error_msg = f"Unexpected dict content format for file {item['file_name']}: {list(content.keys())}"
+                logger.error(error_msg)
+                processing_errors.append(error_msg)
+                markdown_content = f"# {item['file_name']}\n\n"
+                markdown_content += f"**File ID:** {item['file_id']}\n"
+                markdown_content += f"**Type:** {item['mime_type']}\n"
+            else:
+                markdown_content = await _process_file_content(
+                    content=content,
+                    file_name=item['file_name'],
+                    file_id=item['file_id'],
+                    mime_type=item['mime_type'],
+                    search_space_id=search_space_id,
+                    user_id=user_id,
+                    session=session,
+                    task_logger=task_logger,
+                    log_entry=log_entry,
+                    processing_errors=processing_errors,
+                )
+
+            content_hash = generate_content_hash(markdown_content, search_space_id)
+
+            # For existing documents, check if content changed
+            if not item['is_new'] and document.content_hash == content_hash:
+                if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
+                    document.status = DocumentStatus.ready()
+                documents_skipped += 1
+                continue
+
+            # Check for duplicate content hash (for new documents)
+            if item['is_new']:
+                with session.no_autoflush:
+                    duplicate_by_content = await check_duplicate_document_by_hash(
+                        session, content_hash
+                    )
+                if duplicate_by_content:
+                    logger.info(
+                        f"File {item['file_name']} already indexed by another connector. Skipping."
+                    )
+                    await session.delete(document)
+                    duplicate_content_count += 1
+                    documents_skipped += 1
+                    continue
+
+            # Heavy processing (LLM, embeddings, chunks)
+            user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
+
+            if user_llm:
+                document_metadata_for_summary = {
+                    "file_id": item['file_id'],
+                    "file_name": item['file_name'],
+                    "mime_type": item['mime_type'],
+                    "document_type": "Google Drive File (Composio)",
+                }
+                summary_content, summary_embedding = await generate_document_summary(
+                    markdown_content, user_llm, document_metadata_for_summary
+                )
+            else:
+                summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
+                summary_embedding = config.embedding_model_instance.embed(summary_content)
+
+            chunks = await create_document_chunks(markdown_content)
+
+            # Update document to READY
+            document.title = item['file_name']
+            document.content = summary_content
+            document.content_hash = content_hash
+            document.embedding = summary_embedding
+            document.document_metadata = {
+                "file_id": item['file_id'],
+                "file_name": item['file_name'],
+                "FILE_NAME": item['file_name'],
+                "mime_type": item['mime_type'],
+                "connector_id": connector_id,
+                "source": "composio",
+            }
+            safe_set_chunks(document, chunks)
+            document.updated_at = get_current_timestamp()
+            document.status = DocumentStatus.ready()
+
+            documents_indexed += 1
 
             # Batch commit every 10 documents
-            if documents_indexed > 0 and documents_indexed % 10 == 0:
+            if documents_indexed % 10 == 0:
                 await session.commit()
                 logger.info(f"Committed batch: {documents_indexed} changes processed")
 
         except Exception as e:
-            error_msg = f"Error processing change for file {file_id}: {e!s}"
+            error_msg = f"Error processing change for file {item['file_id']}: {e!s}"
             logger.error(error_msg, exc_info=True)
             processing_errors.append(error_msg)
-            documents_skipped += 1
+            try:
+                document.status = DocumentStatus.failed(str(e))
+                document.updated_at = get_current_timestamp()
+            except Exception as status_error:
+                logger.error(f"Failed to update document status to failed: {status_error}")
+            documents_failed += 1
+            continue
 
     logger.info(
-        f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped"
+        f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped, "
+        f"{documents_failed} failed ({duplicate_content_count} duplicate content)"
     )
     return documents_indexed, documents_skipped, processing_errors
 
@@ -973,10 +1143,12 @@ async def _index_composio_drive_full_scan(
     log_entry,
     on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int, list[str]]:
-    """Index Google Drive files using full scan (first sync or when no delta token)."""
+    """Index Google Drive files using full scan with real-time document status updates."""
     documents_indexed = 0
     documents_skipped = 0
+    documents_failed = 0
     processing_errors = []
+    duplicate_content_count = 0
     last_heartbeat_time = time.time()
 
     all_files = []
@@ -1108,14 +1280,14 @@ async def _index_composio_drive_full_scan(
         f"Found {len(all_files)} Google Drive files to index via Composio (full scan)"
     )
 
-    for file_info in all_files:
-        # Send heartbeat periodically to indicate task is still alive
-        if on_heartbeat_callback:
-            current_time = time.time()
-            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = current_time
+    # =======================================================================
+    # PHASE 1: Analyze all files, create pending documents
+    # This makes ALL documents visible in the UI immediately with pending status
+    # =======================================================================
+    files_to_process = []  # List of dicts with document and file data
+    new_documents_created = False
 
+    for file_info in all_files:
         try:
             # Handle both standard Google API and potential Composio variations
             file_id = file_info.get("id", "") or file_info.get("fileId", "")
@@ -1132,227 +1304,228 @@ async def _index_composio_drive_full_scan(
             if mime_type == "application/vnd.google-apps.folder":
                 continue
 
-            # Process the file
-            indexed, skipped, errors = await _process_single_drive_file(
-                session=session,
-                composio_connector=composio_connector,
-                file_id=file_id,
-                file_name=file_name,
-                mime_type=mime_type,
-                connector_id=connector_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                task_logger=task_logger,
-                log_entry=log_entry,
+            # ========== EARLY DUPLICATE CHECK BY FILE ID ==========
+            existing_by_file_id = await check_document_by_google_drive_file_id(
+                session, file_id, search_space_id
+            )
+            if existing_by_file_id:
+                logger.info(
+                    f"Skipping file {file_name} (file_id={file_id}): already indexed "
+                    f"by {existing_by_file_id.document_type.value}"
+                )
+                documents_skipped += 1
+                continue
+
+            # Generate unique identifier hash
+            document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
+            unique_identifier_hash = generate_unique_identifier_hash(
+                document_type, f"drive_{file_id}", search_space_id
             )
 
-            documents_indexed += indexed
-            documents_skipped += skipped
-            processing_errors.extend(errors)
+            # Check if document exists by unique identifier
+            existing_document = await check_document_by_unique_identifier(
+                session, unique_identifier_hash
+            )
+
+            if existing_document:
+                # Queue existing document for update (will be set to processing in Phase 2)
+                files_to_process.append({
+                    'document': existing_document,
+                    'is_new': False,
+                    'file_id': file_id,
+                    'file_name': file_name,
+                    'mime_type': mime_type,
+                })
+                continue
+
+            # Create new document with PENDING status (visible in UI immediately)
+            document = Document(
+                search_space_id=search_space_id,
+                title=file_name,
+                document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
+                document_metadata={
+                    "file_id": file_id,
+                    "file_name": file_name,
+                    "FILE_NAME": file_name,
+                    "mime_type": mime_type,
+                    "connector_id": connector_id,
+                    "toolkit_id": "googledrive",
+                    "source": "composio",
+                },
+                content="Pending...",  # Placeholder until processed
+                content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+                unique_identifier_hash=unique_identifier_hash,
+                embedding=None,
+                chunks=[],  # Empty at creation - safe for async
+                status=DocumentStatus.pending(),  # Pending until processing starts
+                updated_at=get_current_timestamp(),
+                created_by_id=user_id,
+                connector_id=connector_id,
+            )
+            session.add(document)
+            new_documents_created = True
+
+            files_to_process.append({
+                'document': document,
+                'is_new': True,
+                'file_id': file_id,
+                'file_name': file_name,
+                'mime_type': mime_type,
+            })
+
+        except Exception as e:
+            logger.error(f"Error in Phase 1 for file: {e!s}", exc_info=True)
+            documents_skipped += 1
+            continue
+
+    # Commit all pending documents - they all appear in UI now
+    if new_documents_created:
+        logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents")
+        await session.commit()
+
+    # =======================================================================
+    # PHASE 2: Process each document one by one
+    # Each document transitions: pending → processing → ready/failed
+    # =======================================================================
+    logger.info(f"Phase 2: Processing {len(files_to_process)} documents")
+
+    for item in files_to_process:
+        # Send heartbeat periodically
+        if on_heartbeat_callback:
+            current_time = time.time()
+            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = current_time
+
+        document = item['document']
+        try:
+            # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+            document.status = DocumentStatus.processing()
+            await session.commit()
+
+            # Get file content (pass mime_type for Google Workspace export handling)
+            content, content_error = await composio_connector.get_drive_file_content(
+                item['file_id'], original_mime_type=item['mime_type']
+            )
+
+            if content_error or not content:
+                logger.warning(f"Could not get content for file {item['file_name']}: {content_error}")
+                markdown_content = f"# {item['file_name']}\n\n"
+                markdown_content += f"**File ID:** {item['file_id']}\n"
+                markdown_content += f"**Type:** {item['mime_type']}\n"
+            elif isinstance(content, dict):
+                error_msg = f"Unexpected dict content format for file {item['file_name']}: {list(content.keys())}"
+                logger.error(error_msg)
+                processing_errors.append(error_msg)
+                markdown_content = f"# {item['file_name']}\n\n"
+                markdown_content += f"**File ID:** {item['file_id']}\n"
+                markdown_content += f"**Type:** {item['mime_type']}\n"
+            else:
+                # Process content based on file type
+                markdown_content = await _process_file_content(
+                    content=content,
+                    file_name=item['file_name'],
+                    file_id=item['file_id'],
+                    mime_type=item['mime_type'],
+                    search_space_id=search_space_id,
+                    user_id=user_id,
+                    session=session,
+                    task_logger=task_logger,
+                    log_entry=log_entry,
+                    processing_errors=processing_errors,
+                )
+
+            content_hash = generate_content_hash(markdown_content, search_space_id)
+
+            # For existing documents, check if content changed
+            if not item['is_new'] and document.content_hash == content_hash:
+                # Ensure status is ready
+                if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
+                    document.status = DocumentStatus.ready()
+                documents_skipped += 1
+                continue
+
+            # Check for duplicate content hash (for new documents)
+            if item['is_new']:
+                with session.no_autoflush:
+                    duplicate_by_content = await check_duplicate_document_by_hash(
+                        session, content_hash
+                    )
+                if duplicate_by_content:
+                    logger.info(
+                        f"File {item['file_name']} already indexed by another connector. Skipping."
+                    )
+                    # Remove the pending document we created
+                    await session.delete(document)
+                    duplicate_content_count += 1
+                    documents_skipped += 1
+                    continue
+
+            # Heavy processing (LLM, embeddings, chunks)
+            user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
+
+            if user_llm:
+                document_metadata_for_summary = {
+                    "file_id": item['file_id'],
+                    "file_name": item['file_name'],
+                    "mime_type": item['mime_type'],
+                    "document_type": "Google Drive File (Composio)",
+                }
+                summary_content, summary_embedding = await generate_document_summary(
+                    markdown_content, user_llm, document_metadata_for_summary
+                )
+            else:
+                summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
+                summary_embedding = config.embedding_model_instance.embed(summary_content)
+
+            chunks = await create_document_chunks(markdown_content)
+
+            # Update document to READY with actual content
+            document.title = item['file_name']
+            document.content = summary_content
+            document.content_hash = content_hash
+            document.embedding = summary_embedding
+            document.document_metadata = {
+                "file_id": item['file_id'],
+                "file_name": item['file_name'],
+                "FILE_NAME": item['file_name'],
+                "mime_type": item['mime_type'],
+                "connector_id": connector_id,
+                "source": "composio",
+            }
+            safe_set_chunks(document, chunks)
+            document.updated_at = get_current_timestamp()
+            document.status = DocumentStatus.ready()
+
+            documents_indexed += 1
 
             # Batch commit every 10 documents
-            if documents_indexed > 0 and documents_indexed % 10 == 0:
+            if documents_indexed % 10 == 0:
                 logger.info(
                     f"Committing batch: {documents_indexed} Google Drive files processed so far"
                 )
                 await session.commit()
 
         except Exception as e:
-            error_msg = f"Error processing Drive file {file_name or 'unknown'}: {e!s}"
+            error_msg = f"Error processing Drive file {item['file_name']}: {e!s}"
             logger.error(error_msg, exc_info=True)
             processing_errors.append(error_msg)
-            documents_skipped += 1
+            # Mark document as failed with reason (visible in UI)
+            try:
+                document.status = DocumentStatus.failed(str(e))
+                document.updated_at = get_current_timestamp()
+            except Exception as status_error:
+                logger.error(f"Failed to update document status to failed: {status_error}")
+            documents_failed += 1
+            continue
 
     logger.info(
-        f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped"
+        f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped, "
+        f"{documents_failed} failed ({duplicate_content_count} duplicate content)"
     )
     return documents_indexed, documents_skipped, processing_errors
 
 
-async def _process_single_drive_file(
-    session: AsyncSession,
-    composio_connector: ComposioGoogleDriveConnector,
-    file_id: str,
-    file_name: str,
-    mime_type: str,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    task_logger: TaskLoggingService,
-    log_entry,
-) -> tuple[int, int, list[str]]:
-    """Process a single Google Drive file for indexing.
-
-    Returns:
-        Tuple of (documents_indexed, documents_skipped, processing_errors)
-    """
-    processing_errors = []
-
-    # ========== EARLY DUPLICATE CHECK BY FILE ID ==========
-    # Check if this Google Drive file was already indexed by ANY connector
-    # This happens BEFORE download/ETL to save expensive API calls
-    existing_by_file_id = await check_document_by_google_drive_file_id(
-        session, file_id, search_space_id
-    )
-    if existing_by_file_id:
-        logger.info(
-            f"Skipping file {file_name} (file_id={file_id}): already indexed "
-            f"by {existing_by_file_id.document_type.value} as '{existing_by_file_id.title}' "
-            f"(saved download & ETL cost)"
-        )
-        return 0, 1, processing_errors  # Skip - NO download, NO ETL!
-    # ======================================================
-
-    # Generate unique identifier hash
-    document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"])
-    unique_identifier_hash = generate_unique_identifier_hash(
-        document_type, f"drive_{file_id}", search_space_id
-    )
-
-    # Check if document exists by unique identifier (same connector, same file)
-    existing_document = await check_document_by_unique_identifier(
-        session, unique_identifier_hash
-    )
-
-    # Get file content (pass mime_type for Google Workspace export handling)
-    content, content_error = await composio_connector.get_drive_file_content(
-        file_id, original_mime_type=mime_type
-    )
-
-    if content_error or not content:
-        logger.warning(f"Could not get content for file {file_name}: {content_error}")
-        # Use metadata as content fallback
-        markdown_content = f"# {file_name}\n\n"
-        markdown_content += f"**File ID:** {file_id}\n"
-        markdown_content += f"**Type:** {mime_type}\n"
-    elif isinstance(content, dict):
-        # Safety check: if content is still a dict, log error and use fallback
-        error_msg = f"Unexpected dict content format for file {file_name}: {list(content.keys())}"
-        logger.error(error_msg)
-        processing_errors.append(error_msg)
-        markdown_content = f"# {file_name}\n\n"
-        markdown_content += f"**File ID:** {file_id}\n"
-        markdown_content += f"**Type:** {mime_type}\n"
-    else:
-        # Process content based on file type
-        markdown_content = await _process_file_content(
-            content=content,
-            file_name=file_name,
-            file_id=file_id,
-            mime_type=mime_type,
-            search_space_id=search_space_id,
-            user_id=user_id,
-            session=session,
-            task_logger=task_logger,
-            log_entry=log_entry,
-            processing_errors=processing_errors,
-        )
-
-    content_hash = generate_content_hash(markdown_content, search_space_id)
-
-    if existing_document:
-        if existing_document.content_hash == content_hash:
-            return 0, 1, processing_errors  # Skipped - unchanged
-
-        # Update existing document
-        user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
-
-        if user_llm:
-            document_metadata = {
-                "file_id": file_id,
-                "file_name": file_name,
-                "mime_type": mime_type,
-                "document_type": "Google Drive File (Composio)",
-            }
-            (
-                summary_content,
-                summary_embedding,
-            ) = await generate_document_summary(
-                markdown_content, user_llm, document_metadata
-            )
-        else:
-            summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
-            summary_embedding = config.embedding_model_instance.embed(summary_content)
-
-        chunks = await create_document_chunks(markdown_content)
-
-        existing_document.title = file_name
-        existing_document.content = summary_content
-        existing_document.content_hash = content_hash
-        existing_document.embedding = summary_embedding
-        existing_document.document_metadata = {
-            "file_id": file_id,
-            "file_name": file_name,
-            "FILE_NAME": file_name,  # For compatibility
-            "mime_type": mime_type,
-            "connector_id": connector_id,
-            "source": "composio",
-        }
-        existing_document.chunks = chunks
-        existing_document.updated_at = get_current_timestamp()
-
-        return 1, 0, processing_errors  # Indexed - updated
-
-    # Check if content_hash already exists (from any connector)
-    # This prevents duplicate content and avoids IntegrityError on unique constraint
-    existing_by_content_hash = await check_document_by_content_hash(
-        session, content_hash
-    )
-    if existing_by_content_hash:
-        logger.info(
-            f"Skipping file {file_name} (file_id={file_id}): identical content "
-            f"already indexed as '{existing_by_content_hash.title}'"
-        )
-        return 0, 1, processing_errors  # Skipped - duplicate content
-
-    # Create new document
-    user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
-
-    if user_llm:
-        document_metadata = {
-            "file_id": file_id,
-            "file_name": file_name,
-            "mime_type": mime_type,
-            "document_type": "Google Drive File (Composio)",
-        }
-        (
-            summary_content,
-            summary_embedding,
-        ) = await generate_document_summary(
-            markdown_content, user_llm, document_metadata
-        )
-    else:
-        summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
-        summary_embedding = config.embedding_model_instance.embed(summary_content)
-
-    chunks = await create_document_chunks(markdown_content)
-
-    document = Document(
-        search_space_id=search_space_id,
-        title=file_name,
-        document_type=DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]),
-        document_metadata={
-            "file_id": file_id,
-            "file_name": file_name,
-            "FILE_NAME": file_name,  # For compatibility
-            "mime_type": mime_type,
-            "toolkit_id": "googledrive",
-            "source": "composio",
-        },
-        content=summary_content,
-        content_hash=content_hash,
-        unique_identifier_hash=unique_identifier_hash,
-        embedding=summary_embedding,
-        chunks=chunks,
-        updated_at=get_current_timestamp(),
-        created_by_id=user_id,
-        connector_id=connector_id,
-    )
-    session.add(document)
-
-    return 1, 0, processing_errors  # Indexed - new
-
-
 async def _fetch_folder_files_recursively(
     composio_connector: ComposioGoogleDriveConnector,
     folder_id: str,
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index 0bd8189b8..d579fe677 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -56,7 +56,7 @@ function StatusIndicator({ status }: { status?: DocumentStatus }) {
 							<Spinner size="sm" className="text-primary" />
 						</div>
 					</TooltipTrigger>
-					<TooltipContent side="top">Processing...</TooltipContent>
+					<TooltipContent side="top">Syncing</TooltipContent>
 				</Tooltip>
 			);
 		case "failed":
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
index 867fdc916..4133f2960 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
@@ -119,7 +119,7 @@ export function RowActions({
 						<Button
 							variant="ghost"
 							size="icon"
-							className={`h-8 w-8 ${isDeleteDisabled ? "text-muted-foreground/50 cursor-not-allowed" : "text-muted-foreground hover:text-destructive hover:bg-destructive/10"}`}
+							className={`h-8 w-8 ${isDeleteDisabled ? "text-muted-foreground cursor-not-allowed" : "text-muted-foreground hover:text-destructive hover:bg-destructive/10"}`}
 							onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
 							disabled={isDeleting || isDeleteDisabled}
 						>
@@ -164,7 +164,7 @@ export function RowActions({
 						<Button
 							variant="ghost"
 							size="icon"
-							className={`h-8 w-8 ${isDeleteDisabled ? "text-muted-foreground/50 cursor-not-allowed" : "text-muted-foreground hover:text-destructive hover:bg-destructive/10"}`}
+							className={`h-8 w-8 ${isDeleteDisabled ? "text-muted-foreground cursor-not-allowed" : "text-muted-foreground hover:text-destructive hover:bg-destructive/10"}`}
 							onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
 							disabled={isDeleting || isDeleteDisabled}
 						>

From 3bbac0d4ea4cb20c15f456eea963197034c9919c Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 00:12:48 +0530
Subject: [PATCH 18/36] feat: implement two-phase document indexing for
 Airtable and Notion connectors with real-time status updates

---
 .../connector_indexers/airtable_indexer.py    | 367 ++++++++--------
 .../connector_indexers/notion_indexer.py      | 402 ++++++++++--------
 2 files changed, 416 insertions(+), 353 deletions(-)

diff --git a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
index 54b1afd26..05a4007ae 100644
--- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
@@ -1,5 +1,9 @@
 """
 Airtable connector indexer.
+
+Implements real-time document status updates using a two-phase approach:
+- Phase 1: Create all documents with PENDING status (visible in UI immediately)
+- Phase 2: Process each document one by one (pending → processing → ready/failed)
 """
 
 import time
@@ -10,7 +14,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
 from app.connectors.airtable_history import AirtableHistoryConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -27,6 +31,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -134,24 +139,30 @@ async def index_airtable_records(
                 await task_logger.log_task_success(
                     log_entry, success_msg, {"bases_count": 0}
                 )
-                return 0, success_msg
+                # CRITICAL: Update timestamp even when no bases found so Electric SQL syncs
+                await update_connector_last_indexed(session, connector, update_last_indexed)
+                await session.commit()
+                return 0, None  # Return None (not error) when no items found
 
             logger.info(f"Found {len(bases)} Airtable bases to process")
 
             # Heartbeat tracking - update notification periodically to prevent appearing stuck
             last_heartbeat_time = time.time()
-            total_documents_indexed = 0
 
-            # Process each base
+            # Track overall statistics
+            documents_indexed = 0
+            documents_skipped = 0
+            documents_failed = 0
+            duplicate_content_count = 0
+
+            # =======================================================================
+            # PHASE 1: Collect all records and create pending documents
+            # This makes ALL documents visible in the UI immediately with pending status
+            # =======================================================================
+            records_to_process = []  # List of dicts with document and record data
+            new_documents_created = False
+
             for base in bases:
-                # Check if it's time for a heartbeat update
-                if (
-                    on_heartbeat_callback
-                    and (time.time() - last_heartbeat_time)
-                    >= HEARTBEAT_INTERVAL_SECONDS
-                ):
-                    await on_heartbeat_callback(total_documents_indexed)
-                    last_heartbeat_time = time.time()
                 base_id = base.get("id")
                 base_name = base.get("name", "Unknown Base")
 
@@ -201,7 +212,6 @@ async def index_airtable_records(
                                 max_records=max_records,
                             )
                         )
-
                     else:
                         # Fetch all records
                         records, records_error = airtable_connector.get_all_records(
@@ -222,21 +232,14 @@ async def index_airtable_records(
 
                     logger.info(f"Found {len(records)} records in table {table_name}")
 
-                    documents_indexed = 0
-                    skipped_messages = []
-                    documents_skipped = 0
-                    # Process each record
+                    # Phase 1: Analyze each record and create pending documents
                     for record in records:
-                        # Check if it's time for a heartbeat update
-                        if (
-                            on_heartbeat_callback
-                            and (time.time() - last_heartbeat_time)
-                            >= HEARTBEAT_INTERVAL_SECONDS
-                        ):
-                            await on_heartbeat_callback(total_documents_indexed)
-                            last_heartbeat_time = time.time()
-
                         try:
+                            record_id = record.get("id", "")
+                            if not record_id:
+                                documents_skipped += 1
+                                continue
+
                             # Generate markdown content
                             markdown_content = (
                                 airtable_connector.format_record_to_markdown(
@@ -246,16 +249,11 @@ async def index_airtable_records(
 
                             if not markdown_content.strip():
                                 logger.warning(
-                                    f"Skipping message with no content: {record.get('id')}"
-                                )
-                                skipped_messages.append(
-                                    f"{record.get('id')} (no content)"
+                                    f"Skipping record with no content: {record_id}"
                                 )
                                 documents_skipped += 1
                                 continue
 
-                            record_id = record.get("id", "Unknown")
-
                             # Generate unique identifier hash for this Airtable record
                             unique_identifier_hash = generate_unique_identifier_hash(
                                 DocumentType.AIRTABLE_CONNECTOR,
@@ -278,75 +276,24 @@ async def index_airtable_records(
                             if existing_document:
                                 # Document exists - check if content has changed
                                 if existing_document.content_hash == content_hash:
-                                    logger.info(
-                                        f"Document for Airtable record {record_id} unchanged. Skipping."
-                                    )
+                                    # Ensure status is ready (might have been stuck in processing/pending)
+                                    if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                                        existing_document.status = DocumentStatus.ready()
                                     documents_skipped += 1
                                     continue
-                                else:
-                                    # Content has changed - update the existing document
-                                    logger.info(
-                                        f"Content changed for Airtable record {record_id}. Updating document."
-                                    )
 
-                                    # Generate document summary
-                                    user_llm = await get_user_long_context_llm(
-                                        session, user_id, search_space_id
-                                    )
-
-                                    if user_llm:
-                                        document_metadata = {
-                                            "record_id": record_id,
-                                            "created_time": record.get(
-                                                "CREATED_TIME()", ""
-                                            ),
-                                            "document_type": "Airtable Record",
-                                            "connector_type": "Airtable",
-                                        }
-                                        (
-                                            summary_content,
-                                            summary_embedding,
-                                        ) = await generate_document_summary(
-                                            markdown_content,
-                                            user_llm,
-                                            document_metadata,
-                                        )
-                                    else:
-                                        summary_content = (
-                                            f"Airtable Record: {record_id}\n\n"
-                                        )
-                                        summary_embedding = (
-                                            config.embedding_model_instance.embed(
-                                                summary_content
-                                            )
-                                        )
-
-                                    # Process chunks
-                                    chunks = await create_document_chunks(
-                                        markdown_content
-                                    )
-
-                                    # Update existing document
-                                    existing_document.title = record_id
-                                    existing_document.content = summary_content
-                                    existing_document.content_hash = content_hash
-                                    existing_document.embedding = summary_embedding
-                                    existing_document.document_metadata = {
-                                        "record_id": record_id,
-                                        "created_time": record.get(
-                                            "CREATED_TIME()", ""
-                                        ),
-                                    }
-                                    existing_document.chunks = chunks
-                                    existing_document.updated_at = (
-                                        get_current_timestamp()
-                                    )
-
-                                    documents_indexed += 1
-                                    logger.info(
-                                        f"Successfully updated Airtable record {record_id}"
-                                    )
-                                    continue
+                                # Queue existing document for update (will be set to processing in Phase 2)
+                                records_to_process.append({
+                                    'document': existing_document,
+                                    'is_new': False,
+                                    'markdown_content': markdown_content,
+                                    'content_hash': content_hash,
+                                    'record_id': record_id,
+                                    'record': record,
+                                    'base_name': base_name,
+                                    'table_name': table_name,
+                                })
+                                continue
 
                             # Document doesn't exist by unique_identifier_hash
                             # Check if a document with the same content_hash exists (from another connector)
@@ -363,44 +310,11 @@ async def index_airtable_records(
                                     f"(existing document ID: {duplicate_by_content.id}, "
                                     f"type: {duplicate_by_content.document_type}). Skipping."
                                 )
+                                duplicate_content_count += 1
                                 documents_skipped += 1
                                 continue
 
-                            # Document doesn't exist - create new one
-                            # Generate document summary
-                            user_llm = await get_user_long_context_llm(
-                                session, user_id, search_space_id
-                            )
-
-                            if user_llm:
-                                document_metadata = {
-                                    "record_id": record_id,
-                                    "created_time": record.get("CREATED_TIME()", ""),
-                                    "document_type": "Airtable Record",
-                                    "connector_type": "Airtable",
-                                }
-                                (
-                                    summary_content,
-                                    summary_embedding,
-                                ) = await generate_document_summary(
-                                    markdown_content, user_llm, document_metadata
-                                )
-                            else:
-                                # Fallback to simple summary if no LLM configured
-                                summary_content = f"Airtable Record: {record_id}\n\n"
-                                summary_embedding = (
-                                    config.embedding_model_instance.embed(
-                                        summary_content
-                                    )
-                                )
-
-                            # Process chunks
-                            chunks = await create_document_chunks(markdown_content)
-
-                            # Create and store new document
-                            logger.info(
-                                f"Creating new document for Airtable record: {record_id}"
-                            )
+                            # Create new document with PENDING status (visible in UI immediately)
                             document = Document(
                                 search_space_id=search_space_id,
                                 title=record_id,
@@ -408,78 +322,181 @@ async def index_airtable_records(
                                 document_metadata={
                                     "record_id": record_id,
                                     "created_time": record.get("CREATED_TIME()", ""),
+                                    "base_name": base_name,
+                                    "table_name": table_name,
+                                    "connector_id": connector_id,
                                 },
-                                content=summary_content,
-                                content_hash=content_hash,
+                                content="Pending...",  # Placeholder until processed
+                                content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                                 unique_identifier_hash=unique_identifier_hash,
-                                embedding=summary_embedding,
-                                chunks=chunks,
+                                embedding=None,
+                                chunks=[],  # Empty at creation - safe for async
+                                status=DocumentStatus.pending(),  # Pending until processing starts
                                 updated_at=get_current_timestamp(),
                                 created_by_id=user_id,
                                 connector_id=connector_id,
                             )
-
                             session.add(document)
-                            documents_indexed += 1
-                            logger.info(
-                                f"Successfully indexed new Airtable record {summary_content}"
-                            )
+                            new_documents_created = True
 
-                            # Batch commit every 10 documents
-                            if documents_indexed % 10 == 0:
-                                logger.info(
-                                    f"Committing batch: {documents_indexed} Airtable records processed so far"
-                                )
-                                await session.commit()
+                            records_to_process.append({
+                                'document': document,
+                                'is_new': True,
+                                'markdown_content': markdown_content,
+                                'content_hash': content_hash,
+                                'record_id': record_id,
+                                'record': record,
+                                'base_name': base_name,
+                                'table_name': table_name,
+                            })
 
                         except Exception as e:
-                            logger.error(
-                                f"Error processing the Airtable record {record.get('id', 'Unknown')}: {e!s}",
-                                exc_info=True,
-                            )
-                            skipped_messages.append(
-                                f"{record.get('id', 'Unknown')} (processing error)"
-                            )
-                            documents_skipped += 1
-                            continue  # Skip this message and continue with others
+                            logger.error(f"Error in Phase 1 for record: {e!s}", exc_info=True)
+                            documents_failed += 1
+                            continue
 
-                    # Accumulate total processed across all tables
-                    total_processed += documents_indexed
+            # Commit all pending documents - they all appear in UI now
+            if new_documents_created:
+                logger.info(f"Phase 1: Committing {len([r for r in records_to_process if r['is_new']])} pending documents")
+                await session.commit()
 
-                    # Final commit for any remaining documents not yet committed in batches
-                    if documents_indexed > 0:
+            # =======================================================================
+            # PHASE 2: Process each document one by one
+            # Each document transitions: pending → processing → ready/failed
+            # =======================================================================
+            logger.info(f"Phase 2: Processing {len(records_to_process)} documents")
+
+            for item in records_to_process:
+                # Send heartbeat periodically
+                if on_heartbeat_callback:
+                    current_time = time.time()
+                    if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                        await on_heartbeat_callback(documents_indexed)
+                        last_heartbeat_time = current_time
+
+                document = item['document']
+                try:
+                    # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                    document.status = DocumentStatus.processing()
+                    await session.commit()
+
+                    # Heavy processing (LLM, embeddings, chunks)
+                    user_llm = await get_user_long_context_llm(
+                        session, user_id, search_space_id
+                    )
+
+                    if user_llm:
+                        document_metadata_for_summary = {
+                            "record_id": item['record_id'],
+                            "created_time": item['record'].get("CREATED_TIME()", ""),
+                            "document_type": "Airtable Record",
+                            "connector_type": "Airtable",
+                        }
+                        summary_content, summary_embedding = await generate_document_summary(
+                            item['markdown_content'], user_llm, document_metadata_for_summary
+                        )
+                    else:
+                        # Fallback to simple summary if no LLM configured
+                        summary_content = f"Airtable Record: {item['record_id']}\n\n"
+                        summary_embedding = config.embedding_model_instance.embed(
+                            summary_content
+                        )
+
+                    chunks = await create_document_chunks(item['markdown_content'])
+
+                    # Update document to READY with actual content
+                    document.title = item['record_id']
+                    document.content = summary_content
+                    document.content_hash = item['content_hash']
+                    document.embedding = summary_embedding
+                    document.document_metadata = {
+                        "record_id": item['record_id'],
+                        "created_time": item['record'].get("CREATED_TIME()", ""),
+                        "base_name": item['base_name'],
+                        "table_name": item['table_name'],
+                        "connector_id": connector_id,
+                    }
+                    safe_set_chunks(document, chunks)
+                    document.updated_at = get_current_timestamp()
+                    document.status = DocumentStatus.ready()
+
+                    documents_indexed += 1
+
+                    # Batch commit every 10 documents (for ready status updates)
+                    if documents_indexed % 10 == 0:
                         logger.info(
-                            f"Final commit for table {table_name}: {documents_indexed} Airtable records processed"
+                            f"Committing batch: {documents_indexed} Airtable records processed so far"
                         )
                         await session.commit()
-                        logger.info(
-                            f"Successfully committed all Airtable document changes for table {table_name}"
-                        )
 
-            # Update the last_indexed_at timestamp for the connector only if requested
-            # (after all tables in all bases are processed)
-            if total_processed > 0:
-                await update_connector_last_indexed(
-                    session, connector, update_last_indexed
+                except Exception as e:
+                    logger.error(f"Error processing Airtable record: {e!s}", exc_info=True)
+                    # Mark document as failed with reason (visible in UI)
+                    try:
+                        document.status = DocumentStatus.failed(str(e))
+                        document.updated_at = get_current_timestamp()
+                    except Exception as status_error:
+                        logger.error(f"Failed to update document status to failed: {status_error}")
+                    documents_failed += 1
+                    continue
+
+            # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+            await update_connector_last_indexed(session, connector, update_last_indexed)
+
+            total_processed = documents_indexed
+
+            # Final commit to ensure all documents are persisted (safety net)
+            logger.info(f"Final commit: Total {documents_indexed} Airtable records processed")
+            try:
+                await session.commit()
+                logger.info(
+                    "Successfully committed all Airtable document changes to database"
                 )
+            except Exception as e:
+                # Handle any remaining integrity errors gracefully (race conditions, etc.)
+                if (
+                    "duplicate key value violates unique constraint" in str(e).lower()
+                    or "uniqueviolationerror" in str(e).lower()
+                ):
+                    logger.warning(
+                        f"Duplicate content_hash detected during final commit. "
+                        f"This may occur if the same record was indexed by multiple connectors. "
+                        f"Rolling back and continuing. Error: {e!s}"
+                    )
+                    await session.rollback()
+                    # Don't fail the entire task - some documents may have been successfully indexed
+                else:
+                    raise
+
+            # Build warning message if there were issues
+            warning_parts = []
+            if duplicate_content_count > 0:
+                warning_parts.append(f"{duplicate_content_count} duplicate")
+            if documents_failed > 0:
+                warning_parts.append(f"{documents_failed} failed")
+            warning_message = ", ".join(warning_parts) if warning_parts else None
 
             # Log success after processing all bases and tables
             await task_logger.log_task_success(
                 log_entry,
                 f"Successfully completed Airtable indexing for connector {connector_id}",
                 {
-                    "events_processed": total_processed,
-                    "documents_indexed": total_processed,
+                    "documents_indexed": documents_indexed,
+                    "documents_skipped": documents_skipped,
+                    "documents_failed": documents_failed,
+                    "duplicate_content_count": duplicate_content_count,
                 },
             )
 
             logger.info(
-                f"Airtable indexing completed: {total_processed} total records processed"
+                f"Airtable indexing completed: {documents_indexed} ready, "
+                f"{documents_skipped} skipped, {documents_failed} failed "
+                f"({duplicate_content_count} duplicate content)"
             )
             return (
                 total_processed,
-                None,
-            )  # Return None as the error message to indicate success
+                warning_message,
+            )
 
         except Exception as e:
             logger.error(
diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
index 8d4d7650a..37927b779 100644
--- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
@@ -1,5 +1,9 @@
 """
 Notion connector indexer.
+
+Implements real-time document status updates using a two-phase approach:
+- Phase 1: Create all documents with PENDING status (visible in UI immediately)
+- Phase 2: Process each document one by one (pending → processing → ready/failed)
 """
 
 import time
@@ -9,8 +13,9 @@ from datetime import datetime
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 
+from app.config import config
 from app.connectors.notion_history import NotionHistoryConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -28,6 +33,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -214,12 +220,17 @@ async def index_notion_pages(
                 {"pages_found": 0},
             )
             logger.info("No Notion pages found to index")
+            # CRITICAL: Update timestamp even when no pages found so Electric SQL syncs
+            await update_connector_last_indexed(session, connector, update_last_indexed)
+            await session.commit()
             await notion_client.close()
             return 0, None  # Success with 0 pages, not an error
 
         # Track the number of documents indexed
         documents_indexed = 0
         documents_skipped = 0
+        documents_failed = 0
+        duplicate_content_count = 0
         skipped_pages = []
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
@@ -231,22 +242,69 @@ async def index_notion_pages(
             {"stage": "process_pages", "total_pages": len(pages)},
         )
 
-        # Process each page
-        for page in pages:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
+        # =======================================================================
+        # PHASE 1: Analyze all pages, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        pages_to_process = []  # List of dicts with document and page data
+        new_documents_created = False
 
+        # Helper function to convert page content to markdown
+        def process_blocks(blocks, level=0):
+            result = ""
+            for block in blocks:
+                block_type = block.get("type")
+                block_content = block.get("content", "")
+                children = block.get("children", [])
+
+                # Add indentation based on level
+                indent = "  " * level
+
+                # Format based on block type
+                if block_type in ["paragraph", "text"]:
+                    result += f"{indent}{block_content}\n\n"
+                elif block_type in ["heading_1", "header"]:
+                    result += f"{indent}# {block_content}\n\n"
+                elif block_type == "heading_2":
+                    result += f"{indent}## {block_content}\n\n"
+                elif block_type == "heading_3":
+                    result += f"{indent}### {block_content}\n\n"
+                elif block_type == "bulleted_list_item":
+                    result += f"{indent}* {block_content}\n"
+                elif block_type == "numbered_list_item":
+                    result += f"{indent}1. {block_content}\n"
+                elif block_type == "to_do":
+                    result += f"{indent}- [ ] {block_content}\n"
+                elif block_type == "toggle":
+                    result += f"{indent}> {block_content}\n"
+                elif block_type == "code":
+                    result += f"{indent}```\n{block_content}\n```\n\n"
+                elif block_type == "quote":
+                    result += f"{indent}> {block_content}\n\n"
+                elif block_type == "callout":
+                    result += f"{indent}> **Note:** {block_content}\n\n"
+                elif block_type == "image":
+                    result += f"{indent}![Image]({block_content})\n\n"
+                else:
+                    # Default for other block types
+                    if block_content:
+                        result += f"{indent}{block_content}\n\n"
+
+                # Process children recursively
+                if children:
+                    result += process_blocks(children, level + 1)
+
+            return result
+
+        for page in pages:
             try:
                 page_id = page.get("page_id")
                 page_title = page.get("title", f"Untitled page ({page_id})")
                 page_content = page.get("content", [])
 
-                logger.info(f"Processing Notion page: {page_title} ({page_id})")
+                if not page_id:
+                    documents_skipped += 1
+                    continue
 
                 if not page_content:
                     logger.info(f"No content found in page {page_title}. Skipping.")
@@ -256,57 +314,6 @@ async def index_notion_pages(
 
                 # Convert page content to markdown format
                 markdown_content = f"# Notion Page: {page_title}\n\n"
-
-                # Process blocks recursively
-                def process_blocks(blocks, level=0):
-                    result = ""
-                    for block in blocks:
-                        block_type = block.get("type")
-                        block_content = block.get("content", "")
-                        children = block.get("children", [])
-
-                        # Add indentation based on level
-                        indent = "  " * level
-
-                        # Format based on block type
-                        if block_type in ["paragraph", "text"]:
-                            result += f"{indent}{block_content}\n\n"
-                        elif block_type in ["heading_1", "header"]:
-                            result += f"{indent}# {block_content}\n\n"
-                        elif block_type == "heading_2":
-                            result += f"{indent}## {block_content}\n\n"
-                        elif block_type == "heading_3":
-                            result += f"{indent}### {block_content}\n\n"
-                        elif block_type == "bulleted_list_item":
-                            result += f"{indent}* {block_content}\n"
-                        elif block_type == "numbered_list_item":
-                            result += f"{indent}1. {block_content}\n"
-                        elif block_type == "to_do":
-                            result += f"{indent}- [ ] {block_content}\n"
-                        elif block_type == "toggle":
-                            result += f"{indent}> {block_content}\n"
-                        elif block_type == "code":
-                            result += f"{indent}```\n{block_content}\n```\n\n"
-                        elif block_type == "quote":
-                            result += f"{indent}> {block_content}\n\n"
-                        elif block_type == "callout":
-                            result += f"{indent}> **Note:** {block_content}\n\n"
-                        elif block_type == "image":
-                            result += f"{indent}![Image]({block_content})\n\n"
-                        else:
-                            # Default for other block types
-                            if block_content:
-                                result += f"{indent}{block_content}\n\n"
-
-                        # Process children recursively
-                        if children:
-                            result += process_blocks(children, level + 1)
-
-                    return result
-
-                logger.debug(
-                    f"Converting {len(page_content)} blocks to markdown for page {page_title}"
-                )
                 markdown_content += process_blocks(page_content)
 
                 # Format document metadata
@@ -346,71 +353,22 @@ async def index_notion_pages(
                 if existing_document:
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
-                        logger.info(
-                            f"Document for Notion page {page_title} unchanged. Skipping."
-                        )
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            existing_document.status = DocumentStatus.ready()
                         documents_skipped += 1
                         continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Notion page {page_title}. Updating document."
-                        )
 
-                        # Get user's long context LLM
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-                        if not user_llm:
-                            logger.error(
-                                f"No long context LLM configured for user {user_id}"
-                            )
-                            skipped_pages.append(f"{page_title} (no LLM configured)")
-                            documents_skipped += 1
-                            continue
-
-                        # Generate summary with metadata
-                        document_metadata = {
-                            "page_title": page_title,
-                            "page_id": page_id,
-                            "document_type": "Notion Page",
-                            "connector_type": "Notion",
-                        }
-                        (
-                            summary_content,
-                            summary_embedding,
-                        ) = await generate_document_summary(
-                            markdown_content, user_llm, document_metadata
-                        )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(markdown_content)
-
-                        # Update existing document
-                        existing_document.title = page_title
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
-                            "page_title": page_title,
-                            "page_id": page_id,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-                        existing_document.connector_id = connector_id
-
-                        documents_indexed += 1
-                        logger.info(f"Successfully updated Notion page: {page_title}")
-
-                        # Batch commit every 10 documents
-                        if documents_indexed % 10 == 0:
-                            logger.info(
-                                f"Committing batch: {documents_indexed} documents processed so far"
-                            )
-                            await session.commit()
-
-                        continue
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    pages_to_process.append({
+                        'document': existing_document,
+                        'is_new': False,
+                        'markdown_content': markdown_content,
+                        'content_hash': content_hash,
+                        'page_id': page_id,
+                        'page_title': page_title,
+                    })
+                    continue
 
                 # Document doesn't exist by unique_identifier_hash
                 # Check if a document with the same content_hash exists (from another connector)
@@ -425,37 +383,11 @@ async def index_notion_pages(
                         f"(existing document ID: {duplicate_by_content.id}, "
                         f"type: {duplicate_by_content.document_type}). Skipping."
                     )
+                    duplicate_content_count += 1
                     documents_skipped += 1
                     continue
 
-                # Document doesn't exist - create new one
-                # Get user's long context LLM
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-                if not user_llm:
-                    logger.error(f"No long context LLM configured for user {user_id}")
-                    skipped_pages.append(f"{page_title} (no LLM configured)")
-                    documents_skipped += 1
-                    continue
-
-                # Generate summary with metadata
-                logger.debug(f"Generating summary for page {page_title}")
-                document_metadata = {
-                    "page_title": page_title,
-                    "page_id": page_id,
-                    "document_type": "Notion Page",
-                    "connector_type": "Notion",
-                }
-                summary_content, summary_embedding = await generate_document_summary(
-                    markdown_content, user_llm, document_metadata
-                )
-
-                # Process chunks
-                logger.debug(f"Chunking content for page {page_title}")
-                chunks = await create_document_chunks(markdown_content)
-
-                # Create and store new document
+                # Create new document with PENDING status (visible in UI immediately)
                 document = Document(
                     search_space_id=search_space_id,
                     title=page_title,
@@ -463,53 +395,159 @@ async def index_notion_pages(
                     document_metadata={
                         "page_title": page_title,
                         "page_id": page_id,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        "connector_id": connector_id,
                     },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                     unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                     updated_at=get_current_timestamp(),
                     created_by_id=user_id,
                     connector_id=connector_id,
                 )
-
                 session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new Notion page: {page_title}")
+                new_documents_created = True
 
-                # Batch commit every 10 documents
+                pages_to_process.append({
+                    'document': document,
+                    'is_new': True,
+                    'markdown_content': markdown_content,
+                    'content_hash': content_hash,
+                    'page_id': page_id,
+                    'page_title': page_title,
+                })
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(pages_to_process)} documents")
+
+        for item in pages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "page_title": item['page_title'],
+                        "page_id": item['page_id'],
+                        "document_type": "Notion Page",
+                        "connector_type": "Notion",
+                    }
+                    summary_content, summary_embedding = await generate_document_summary(
+                        item['markdown_content'], user_llm, document_metadata_for_summary
+                    )
+                else:
+                    # Fallback to simple summary if no LLM configured
+                    summary_content = f"Notion Page: {item['page_title']}\n\n{item['markdown_content'][:500]}..."
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(item['markdown_content'])
+
+                # Update document to READY with actual content
+                document.title = item['page_title']
+                document.content = summary_content
+                document.content_hash = item['content_hash']
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "page_title": item['page_title'],
+                    "page_id": item['page_id'],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                 if documents_indexed % 10 == 0:
                     logger.info(
-                        f"Committing batch: {documents_indexed} documents processed so far"
+                        f"Committing batch: {documents_indexed} Notion pages processed so far"
                     )
                     await session.commit()
 
             except Exception as e:
-                logger.error(
-                    f"Error processing Notion page {page.get('title', 'Unknown')}: {e!s}",
-                    exc_info=True,
-                )
-                skipped_pages.append(
-                    f"{page.get('title', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
-                continue  # Skip this page and continue with others
+                logger.error(f"Error processing Notion page: {e!s}", exc_info=True)
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
+                skipped_pages.append(f"{item['page_title']} (processing error)")
+                documents_failed += 1
+                continue
+
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
-        # Update the last_indexed_at timestamp for the connector only if requested
-        # and if we successfully indexed at least one page
         total_processed = documents_indexed
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
 
-        # Final commit for any remaining documents not yet committed in batches
+        # Final commit to ensure all documents are persisted (safety net)
         logger.info(f"Final commit: Total {documents_indexed} documents processed")
-        await session.commit()
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Notion document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same page was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
 
         # Get final count of pages with skipped Notion AI content
         pages_with_skipped_ai_content = notion_client.get_skipped_content_count()
 
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
+
         # Prepare result message with user-friendly notification about skipped content
         result_message = None
         if skipped_pages:
@@ -532,6 +570,8 @@ async def index_notion_pages(
                 "pages_processed": total_processed,
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
                 "skipped_pages_count": len(skipped_pages),
                 "pages_with_skipped_ai_content": pages_with_skipped_ai_content,
                 "result_message": result_message,
@@ -539,7 +579,9 @@ async def index_notion_pages(
         )
 
         logger.info(
-            f"Notion indexing completed: {documents_indexed} new pages, {documents_skipped} skipped"
+            f"Notion indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
         )
 
         # Clean up the async client
@@ -559,6 +601,10 @@ async def index_notion_pages(
                 "Using legacy token. Reconnect with OAuth for better reliability."
             )
 
+        # Include warning message if there were issues
+        if warning_message:
+            notification_parts.append(warning_message)
+
         user_notification_message = (
             " ".join(notification_parts) if notification_parts else None
         )

From c12401c1e87ec1244103e4318363b0435f615911 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 02:24:35 +0530
Subject: [PATCH 19/36] feat: implement two-phase document indexing across
 Google connectors with real-time status updates

---
 .../routes/search_source_connectors_routes.py |   2 +
 .../google_calendar_indexer.py                | 322 +++++++--------
 .../google_drive_indexer.py                   | 365 +++++++++++++++---
 .../google_gmail_indexer.py                   | 311 ++++++++-------
 .../document_processors/file_processors.py    |   8 +-
 .../document_processors/markdown_processor.py |   4 +-
 6 files changed, 681 insertions(+), 331 deletions(-)

diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 70e8f28f9..747e02834 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -2127,6 +2127,7 @@ async def run_google_gmail_indexing(
         start_date: str | None,
         end_date: str | None,
         update_last_indexed: bool,
+        on_heartbeat_callback=None,
     ) -> tuple[int, str | None]:
         # Use a reasonable default for max_messages
         max_messages = 1000
@@ -2139,6 +2140,7 @@ async def run_google_gmail_indexing(
             end_date=end_date,
             update_last_indexed=update_last_indexed,
             max_messages=max_messages,
+            on_heartbeat_callback=on_heartbeat_callback,
         )
         # index_google_gmail_messages returns (int, str) but we need (int, str | None)
         return indexed_count, error_message if error_message else None
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
index 8d7b8b045..ad749e61c 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@@ -1,5 +1,9 @@
 """
 Google Calendar connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """
 
 import time
@@ -11,7 +15,7 @@ from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.connectors.google_calendar_connector import GoogleCalendarConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -27,6 +31,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -284,7 +289,7 @@ async def index_google_calendar_events(
 
         documents_indexed = 0
         documents_skipped = 0
-        skipped_events = []
+        documents_failed = 0  # Track events that failed processing
         duplicate_content_count = (
             0  # Track events skipped due to duplicate content_hash
         )
@@ -292,14 +297,14 @@ async def index_google_calendar_events(
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
 
+        # =======================================================================
+        # PHASE 1: Analyze all events, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        events_to_process = []  # List of dicts with document and event data
+        new_documents_created = False
+
         for event in events:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
             try:
                 event_id = event.get("id")
                 event_summary = event.get("summary", "No Title")
@@ -307,14 +312,12 @@ async def index_google_calendar_events(
 
                 if not event_id:
                     logger.warning(f"Skipping event with missing ID: {event_summary}")
-                    skipped_events.append(f"{event_summary} (missing ID)")
                     documents_skipped += 1
                     continue
 
                 event_markdown = calendar_client.format_event_to_markdown(event)
                 if not event_markdown.strip():
                     logger.warning(f"Skipping event with no content: {event_summary}")
-                    skipped_events.append(f"{event_summary} (no content)")
                     documents_skipped += 1
                     continue
 
@@ -341,82 +344,27 @@ async def index_google_calendar_events(
                 if existing_document:
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
-                        logger.info(
-                            f"Document for Google Calendar event {event_summary} unchanged. Skipping."
-                        )
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            existing_document.status = DocumentStatus.ready()
                         documents_skipped += 1
                         continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Google Calendar event {event_summary}. Updating document."
-                        )
 
-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "event_id": event_id,
-                                "event_summary": event_summary,
-                                "calendar_id": calendar_id,
-                                "start_time": start_time,
-                                "end_time": end_time,
-                                "location": location or "No location",
-                                "document_type": "Google Calendar Event",
-                                "connector_type": "Google Calendar",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                event_markdown, user_llm, document_metadata
-                            )
-                        else:
-                            summary_content = (
-                                f"Google Calendar Event: {event_summary}\n\n"
-                            )
-                            summary_content += f"Calendar: {calendar_id}\n"
-                            summary_content += f"Start: {start_time}\n"
-                            summary_content += f"End: {end_time}\n"
-                            if location:
-                                summary_content += f"Location: {location}\n"
-                            if description:
-                                desc_preview = description[:1000]
-                                if len(description) > 1000:
-                                    desc_preview += "..."
-                                summary_content += f"Description: {desc_preview}\n"
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(event_markdown)
-
-                        # Update existing document
-                        existing_document.title = event_summary
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
-                            "event_id": event_id,
-                            "event_summary": event_summary,
-                            "calendar_id": calendar_id,
-                            "start_time": start_time,
-                            "end_time": end_time,
-                            "location": location,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(
-                            f"Successfully updated Google Calendar event {event_summary}"
-                        )
-                        continue
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    events_to_process.append({
+                        'document': existing_document,
+                        'is_new': False,
+                        'event_markdown': event_markdown,
+                        'content_hash': content_hash,
+                        'event_id': event_id,
+                        'event_summary': event_summary,
+                        'calendar_id': calendar_id,
+                        'start_time': start_time,
+                        'end_time': end_time,
+                        'location': location,
+                        'description': description,
+                    })
+                    continue
 
                 # Document doesn't exist by unique_identifier_hash
                 # Check if a document with the same content_hash exists (from another connector)
@@ -434,52 +382,9 @@ async def index_google_calendar_events(
                     )
                     duplicate_content_count += 1
                     documents_skipped += 1
-                    skipped_events.append(
-                        f"{event_summary} (already indexed by another connector)"
-                    )
                     continue
 
-                # Document doesn't exist - create new one
-                # Generate summary with metadata
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "event_id": event_id,
-                        "event_summary": event_summary,
-                        "calendar_id": calendar_id,
-                        "start_time": start_time,
-                        "end_time": end_time,
-                        "location": location or "No location",
-                        "document_type": "Google Calendar Event",
-                        "connector_type": "Google Calendar",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        event_markdown, user_llm, document_metadata
-                    )
-                else:
-                    # Fallback to simple summary if no LLM configured
-                    summary_content = f"Google Calendar Event: {event_summary}\n\n"
-                    summary_content += f"Calendar: {calendar_id}\n"
-                    summary_content += f"Start: {start_time}\n"
-                    summary_content += f"End: {end_time}\n"
-                    if location:
-                        summary_content += f"Location: {location}\n"
-                    if description:
-                        desc_preview = description[:1000]
-                        if len(description) > 1000:
-                            desc_preview += "..."
-                        summary_content += f"Description: {desc_preview}\n"
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-                chunks = await create_document_chunks(event_markdown)
-
+                # Create new document with PENDING status (visible in UI immediately)
                 document = Document(
                     search_space_id=search_space_id,
                     title=event_summary,
@@ -491,23 +396,124 @@ async def index_google_calendar_events(
                         "start_time": start_time,
                         "end_time": end_time,
                         "location": location,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        "connector_id": connector_id,
                     },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                     unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                     updated_at=get_current_timestamp(),
                     created_by_id=user_id,
                     connector_id=connector_id,
                 )
-
                 session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new event {event_summary}")
+                new_documents_created = True
 
-                # Batch commit every 10 documents
+                events_to_process.append({
+                    'document': document,
+                    'is_new': True,
+                    'event_markdown': event_markdown,
+                    'content_hash': content_hash,
+                    'event_id': event_id,
+                    'event_summary': event_summary,
+                    'calendar_id': calendar_id,
+                    'start_time': start_time,
+                    'end_time': end_time,
+                    'location': location,
+                    'description': description,
+                })
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(events_to_process)} documents")
+
+        for item in events_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "event_id": item['event_id'],
+                        "event_summary": item['event_summary'],
+                        "calendar_id": item['calendar_id'],
+                        "start_time": item['start_time'],
+                        "end_time": item['end_time'],
+                        "location": item['location'] or "No location",
+                        "document_type": "Google Calendar Event",
+                        "connector_type": "Google Calendar",
+                    }
+                    summary_content, summary_embedding = await generate_document_summary(
+                        item['event_markdown'], user_llm, document_metadata_for_summary
+                    )
+                else:
+                    summary_content = f"Google Calendar Event: {item['event_summary']}\n\n"
+                    summary_content += f"Calendar: {item['calendar_id']}\n"
+                    summary_content += f"Start: {item['start_time']}\n"
+                    summary_content += f"End: {item['end_time']}\n"
+                    if item['location']:
+                        summary_content += f"Location: {item['location']}\n"
+                    if item['description']:
+                        desc_preview = item['description'][:1000]
+                        if len(item['description']) > 1000:
+                            desc_preview += "..."
+                        summary_content += f"Description: {desc_preview}\n"
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(item['event_markdown'])
+
+                # Update document to READY with actual content
+                document.title = item['event_summary']
+                document.content = summary_content
+                document.content_hash = item['content_hash']
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "event_id": item['event_id'],
+                    "event_summary": item['event_summary'],
+                    "calendar_id": item['calendar_id'],
+                    "start_time": item['start_time'],
+                    "end_time": item['end_time'],
+                    "location": item['location'],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                 if documents_indexed % 10 == 0:
                     logger.info(
                         f"Committing batch: {documents_indexed} Google Calendar events processed so far"
@@ -515,19 +521,18 @@ async def index_google_calendar_events(
                     await session.commit()
 
             except Exception as e:
-                logger.error(
-                    f"Error processing event {event.get('summary', 'Unknown')}: {e!s}",
-                    exc_info=True,
-                )
-                skipped_events.append(
-                    f"{event.get('summary', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
+                logger.error(f"Error processing Calendar event: {e!s}", exc_info=True)
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
+                documents_failed += 1
                 continue
 
-        total_processed = documents_indexed
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit for any remaining documents not yet committed in batches
         logger.info(
@@ -535,6 +540,9 @@ async def index_google_calendar_events(
         )
         try:
             await session.commit()
+            logger.info(
+                "Successfully committed all Google Calendar document changes to database"
+            )
         except Exception as e:
             # Handle any remaining integrity errors gracefully (race conditions, etc.)
             if (
@@ -551,10 +559,15 @@ async def index_google_calendar_events(
             else:
                 raise
 
-        # Build warning message if duplicates were found
-        warning_message = None
+        # Build warning message if there were issues
+        warning_parts = []
         if duplicate_content_count > 0:
-            warning_message = f"{duplicate_content_count} skipped (duplicate)"
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
+
+        total_processed = documents_indexed
 
         await task_logger.log_task_success(
             log_entry,
@@ -563,14 +576,15 @@ async def index_google_calendar_events(
                 "events_processed": total_processed,
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                 "duplicate_content_count": duplicate_content_count,
-                "skipped_events_count": len(skipped_events),
             },
         )
 
         logger.info(
-            f"Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped "
-            f"({duplicate_content_count} due to duplicate content from other connectors)"
+            f"Google Calendar indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
         )
         return total_processed, warning_message
 
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
index 151c1abbc..8eae35d00 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -1,4 +1,9 @@
-"""Google Drive indexer using Surfsense file processors."""
+"""Google Drive indexer using Surfsense file processors.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
+"""
 
 import logging
 import time
@@ -17,11 +22,12 @@ from app.connectors.google_drive import (
     get_files_in_folder,
     get_start_page_token,
 )
-from app.db import DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.task_logging_service import TaskLoggingService
 from app.tasks.connector_indexers.base import (
     check_document_by_unique_identifier,
     get_connector_by_id,
+    get_current_timestamp,
     update_connector_last_indexed,
 )
 from app.utils.document_converters import generate_unique_identifier_hash
@@ -324,8 +330,29 @@ async def index_google_drive_single_file(
         display_name = file_name or file.get("name", "Unknown")
         logger.info(f"Indexing Google Drive file: {display_name} ({file_id})")
 
+        # Create pending document for status visibility
+        pending_doc, should_skip = await _create_pending_document_for_file(
+            session=session,
+            file=file,
+            connector_id=connector_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+        )
+
+        if should_skip:
+            await task_logger.log_task_progress(
+                log_entry,
+                f"File {display_name} is unchanged or not indexable",
+                {"status": "skipped"},
+            )
+            return 0, None
+
+        # Commit pending document so it appears in UI
+        if pending_doc and pending_doc.id is None:
+            await session.commit()
+
         # Process the file
-        indexed, skipped = await _process_single_file(
+        indexed, skipped, failed = await _process_single_file(
             drive_client=drive_client,
             session=session,
             file=file,
@@ -334,6 +361,7 @@ async def index_google_drive_single_file(
             user_id=user_id,
             task_logger=task_logger,
             log_entry=log_entry,
+            pending_document=pending_doc,
         )
 
         await session.commit()
@@ -341,6 +369,15 @@ async def index_google_drive_single_file(
             "Successfully committed Google Drive file indexing changes to database"
         )
 
+        if failed > 0:
+            error_msg = f"Failed to index file {display_name}"
+            await task_logger.log_task_failure(
+                log_entry,
+                error_msg,
+                {"file_name": display_name, "file_id": file_id},
+            )
+            return 0, error_msg
+
         if indexed > 0:
             await task_logger.log_task_success(
                 log_entry,
@@ -397,7 +434,12 @@ async def _index_full_scan(
     include_subfolders: bool = False,
     on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int]:
-    """Perform full scan indexing of a folder."""
+    """Perform full scan indexing of a folder.
+    
+    Implements 2-phase document status updates for real-time UI feedback:
+    - Phase 1: Collect all files and create pending documents (visible in UI immediately)
+    - Phase 2: Process each file: pending → processing → ready/failed
+    """
     await task_logger.log_task_progress(
         log_entry,
         f"Starting full scan of folder: {folder_name} (include_subfolders={include_subfolders})",
@@ -410,29 +452,31 @@ async def _index_full_scan(
 
     documents_indexed = 0
     documents_skipped = 0
+    documents_failed = 0
     files_processed = 0
 
     # Heartbeat tracking - update notification periodically to prevent appearing stuck
     last_heartbeat_time = time.time()
 
+    # =======================================================================
+    # PHASE 1: Collect all files and create pending documents
+    # This makes ALL documents visible in the UI immediately with pending status
+    # =======================================================================
+    files_to_process = []  # List of (file, pending_document or None)
+    new_documents_created = False
+
     # Queue of folders to process: (folder_id, folder_name)
     folders_to_process = [(folder_id, folder_name)]
 
+    logger.info("Phase 1: Collecting files and creating pending documents")
+
     while folders_to_process and files_processed < max_files:
-        # Check if it's time for a heartbeat update
-        if (
-            on_heartbeat_callback
-            and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-        ):
-            await on_heartbeat_callback(documents_indexed)
-            last_heartbeat_time = time.time()
         current_folder_id, current_folder_name = folders_to_process.pop(0)
-        logger.info(f"Processing folder: {current_folder_name} ({current_folder_id})")
+        logger.info(f"Scanning folder: {current_folder_name} ({current_folder_id})")
         page_token = None
 
         while files_processed < max_files:
             # Get files and folders in current folder
-            # include_subfolders=True here so we get folder items to queue them
             files, next_token, error = await get_files_in_folder(
                 drive_client,
                 current_folder_id,
@@ -462,35 +506,74 @@ async def _index_full_scan(
                         logger.debug(f"Queued subfolder: {file.get('name', 'Unknown')}")
                     continue
 
-                # Process the file
                 files_processed += 1
 
-                indexed, skipped = await _process_single_file(
-                    drive_client=drive_client,
+                # Create pending document for this file
+                pending_doc, should_skip = await _create_pending_document_for_file(
                     session=session,
                     file=file,
                     connector_id=connector_id,
                     search_space_id=search_space_id,
                     user_id=user_id,
-                    task_logger=task_logger,
-                    log_entry=log_entry,
                 )
 
-                documents_indexed += indexed
-                documents_skipped += skipped
+                if should_skip:
+                    documents_skipped += 1
+                    continue
 
-                if documents_indexed % 10 == 0 and documents_indexed > 0:
-                    await session.commit()
-                    logger.info(
-                        f"Committed batch: {documents_indexed} files indexed so far"
-                    )
+                if pending_doc and pending_doc.id is None:
+                    # New document was created
+                    new_documents_created = True
+
+                files_to_process.append((file, pending_doc))
 
             page_token = next_token
             if not page_token:
                 break
 
+    # Commit all pending documents - they all appear in UI now
+    if new_documents_created:
+        logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f[1] and f[1].id is None])} pending documents")
+        await session.commit()
+
+    # =======================================================================
+    # PHASE 2: Process each file one by one
+    # Each document transitions: pending → processing → ready/failed
+    # =======================================================================
+    logger.info(f"Phase 2: Processing {len(files_to_process)} files")
+
+    for file, pending_doc in files_to_process:
+        # Check if it's time for a heartbeat update
+        if on_heartbeat_callback:
+            current_time = time.time()
+            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = current_time
+
+        indexed, skipped, failed = await _process_single_file(
+            drive_client=drive_client,
+            session=session,
+            file=file,
+            connector_id=connector_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+            task_logger=task_logger,
+            log_entry=log_entry,
+            pending_document=pending_doc,
+        )
+
+        documents_indexed += indexed
+        documents_skipped += skipped
+        documents_failed += failed
+
+        if documents_indexed % 10 == 0 and documents_indexed > 0:
+            await session.commit()
+            logger.info(
+                f"Committed batch: {documents_indexed} files indexed so far"
+            )
+
     logger.info(
-        f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped"
+        f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed"
     )
     return documents_indexed, documents_skipped
 
@@ -514,6 +597,10 @@ async def _index_with_delta_sync(
 
     Note: include_subfolders is accepted for API consistency but delta sync
     automatically tracks changes across all folders including subfolders.
+    
+    Implements 2-phase document status updates for real-time UI feedback:
+    - Phase 1: Collect all changes and create pending documents (visible in UI immediately)
+    - Phase 2: Process each file: pending → processing → ready/failed
     """
     await task_logger.log_task_progress(
         log_entry,
@@ -537,19 +624,21 @@ async def _index_with_delta_sync(
 
     documents_indexed = 0
     documents_skipped = 0
+    documents_failed = 0
     files_processed = 0
 
     # Heartbeat tracking - update notification periodically to prevent appearing stuck
     last_heartbeat_time = time.time()
 
+    # =======================================================================
+    # PHASE 1: Analyze changes and create pending documents for new/modified files
+    # =======================================================================
+    changes_to_process = []  # List of (change, file, pending_document or None)
+    new_documents_created = False
+
+    logger.info("Phase 1: Analyzing changes and creating pending documents")
+
     for change in changes:
-        # Check if it's time for a heartbeat update
-        if (
-            on_heartbeat_callback
-            and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-        ):
-            await on_heartbeat_callback(documents_indexed)
-            last_heartbeat_time = time.time()
         if files_processed >= max_files:
             break
 
@@ -566,7 +655,45 @@ async def _index_with_delta_sync(
         if not file:
             continue
 
-        indexed, skipped = await _process_single_file(
+        # Create pending document for this file
+        pending_doc, should_skip = await _create_pending_document_for_file(
+            session=session,
+            file=file,
+            connector_id=connector_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+        )
+
+        if should_skip:
+            documents_skipped += 1
+            continue
+
+        if pending_doc and pending_doc.id is None:
+            # New document was created
+            new_documents_created = True
+
+        changes_to_process.append((change, file, pending_doc))
+
+    # Commit all pending documents - they all appear in UI now
+    if new_documents_created:
+        logger.info(f"Phase 1: Committing pending documents")
+        await session.commit()
+
+    # =======================================================================
+    # PHASE 2: Process each file one by one
+    # Each document transitions: pending → processing → ready/failed
+    # =======================================================================
+    logger.info(f"Phase 2: Processing {len(changes_to_process)} changes")
+
+    for change, file, pending_doc in changes_to_process:
+        # Check if it's time for a heartbeat update
+        if on_heartbeat_callback:
+            current_time = time.time()
+            if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                await on_heartbeat_callback(documents_indexed)
+                last_heartbeat_time = current_time
+
+        indexed, skipped, failed = await _process_single_file(
             drive_client=drive_client,
             session=session,
             file=file,
@@ -575,21 +702,123 @@ async def _index_with_delta_sync(
             user_id=user_id,
             task_logger=task_logger,
             log_entry=log_entry,
+            pending_document=pending_doc,
         )
 
         documents_indexed += indexed
         documents_skipped += skipped
+        documents_failed += failed
 
         if documents_indexed % 10 == 0 and documents_indexed > 0:
             await session.commit()
             logger.info(f"Committed batch: {documents_indexed} changes processed")
 
     logger.info(
-        f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped"
+        f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed"
     )
     return documents_indexed, documents_skipped
 
 
+async def _create_pending_document_for_file(
+    session: AsyncSession,
+    file: dict,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+) -> tuple[Document | None, bool]:
+    """
+    Create a pending document for a Google Drive file if it doesn't exist.
+    
+    This is Phase 1 of the 2-phase document status update pattern.
+    Creates documents with 'pending' status so they appear in UI immediately.
+    
+    Args:
+        session: Database session
+        file: File metadata from Google Drive API
+        connector_id: ID of the Drive connector
+        search_space_id: ID of the search space
+        user_id: ID of the user
+        
+    Returns:
+        Tuple of (document, should_skip):
+        - (existing_doc, False): Existing document that needs update
+        - (new_pending_doc, False): New pending document created
+        - (None, True): File should be skipped (unchanged, rename-only, or folder)
+    """
+    from app.connectors.google_drive.file_types import should_skip_file
+    
+    file_id = file.get("id")
+    file_name = file.get("name", "Unknown")
+    mime_type = file.get("mimeType", "")
+    
+    # Skip folders and shortcuts
+    if should_skip_file(mime_type):
+        return None, True
+    
+    if not file_id:
+        return None, True
+    
+    # Generate unique identifier hash for this file
+    unique_identifier_hash = generate_unique_identifier_hash(
+        DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id
+    )
+    
+    # Check if document exists
+    existing_document = await check_document_by_unique_identifier(
+        session, unique_identifier_hash
+    )
+    
+    if existing_document:
+        # Check if this is a rename-only update (content unchanged)
+        incoming_md5 = file.get("md5Checksum")
+        incoming_modified_time = file.get("modifiedTime")
+        doc_metadata = existing_document.document_metadata or {}
+        stored_md5 = doc_metadata.get("md5_checksum")
+        stored_modified_time = doc_metadata.get("modified_time")
+        
+        # Determine if content changed
+        content_unchanged = False
+        if incoming_md5 and stored_md5:
+            content_unchanged = incoming_md5 == stored_md5
+        elif not incoming_md5 and incoming_modified_time and stored_modified_time:
+            # Google Workspace file - use modifiedTime as fallback
+            content_unchanged = incoming_modified_time == stored_modified_time
+        
+        if content_unchanged:
+            # Ensure status is ready (might have been stuck in processing/pending)
+            if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                existing_document.status = DocumentStatus.ready()
+            return None, True
+        
+        # Content changed - return existing document for update
+        return existing_document, False
+    
+    # Create new pending document
+    document = Document(
+        search_space_id=search_space_id,
+        title=file_name,
+        document_type=DocumentType.GOOGLE_DRIVE_FILE,
+        document_metadata={
+            "google_drive_file_id": file_id,
+            "google_drive_file_name": file_name,
+            "google_drive_mime_type": mime_type,
+            "connector_id": connector_id,
+        },
+        content="Pending...",  # Placeholder until processed
+        content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+        unique_identifier_hash=unique_identifier_hash,
+        embedding=None,
+        chunks=[],  # Empty at creation
+        status=DocumentStatus.pending(),  # Pending until processing starts
+        updated_at=get_current_timestamp(),
+        created_by_id=user_id,
+        connector_id=connector_id,
+    )
+    session.add(document)
+    
+    return document, False
+
+
 async def _check_rename_only_update(
     session: AsyncSession,
     file: dict,
@@ -725,15 +954,31 @@ async def _process_single_file(
     user_id: str,
     task_logger: TaskLoggingService,
     log_entry: any,
-) -> tuple[int, int]:
+    pending_document: Document | None = None,
+) -> tuple[int, int, int]:
     """
     Process a single file by downloading and using Surfsense's file processor.
+    
+    Implements Phase 2 of the 2-phase document status update pattern.
+    Updates document status: pending → processing → ready/failed
+
+    Args:
+        drive_client: Google Drive client
+        session: Database session
+        file: File metadata from Google Drive API
+        connector_id: ID of the connector
+        search_space_id: ID of the search space
+        user_id: ID of the user
+        task_logger: Task logging service
+        log_entry: Log entry for tracking
+        pending_document: Optional pending document created in Phase 1
 
     Returns:
-        Tuple of (indexed_count, skipped_count)
+        Tuple of (indexed_count, skipped_count, failed_count)
     """
     file_name = file.get("name", "Unknown")
     mime_type = file.get("mimeType", "")
+    file_id = file.get("id")
 
     try:
         logger.info(f"Processing file: {file_name} ({mime_type})")
@@ -756,10 +1001,15 @@ async def _process_single_file(
             # Return 1 for renamed files (they are "indexed" in the sense that they're updated)
             # Return 0 for unchanged files
             if "renamed" in (rename_message or "").lower():
-                return 1, 0
-            return 0, 1
+                return 1, 0, 0
+            return 0, 1, 0
 
-        _, error, _ = await download_and_process_file(
+        # Set document to PROCESSING status if we have a pending document
+        if pending_document:
+            pending_document.status = DocumentStatus.processing()
+            await session.commit()
+
+        _, error, metadata = await download_and_process_file(
             client=drive_client,
             file=file,
             search_space_id=search_space_id,
@@ -776,14 +1026,43 @@ async def _process_single_file(
                 f"Skipped {file_name}: {error}",
                 {"status": "skipped", "reason": error},
             )
-            return 0, 1
+            # Mark pending document as failed if it exists
+            if pending_document:
+                pending_document.status = DocumentStatus.failed(error)
+                pending_document.updated_at = get_current_timestamp()
+                await session.commit()
+            return 0, 1, 0
+
+        # The document was created/updated by download_and_process_file
+        # Find the document and ensure it has READY status
+        if file_id:
+            unique_identifier_hash = generate_unique_identifier_hash(
+                DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id
+            )
+            processed_doc = await check_document_by_unique_identifier(
+                session, unique_identifier_hash
+            )
+            if processed_doc:
+                # Ensure status is READY
+                if not DocumentStatus.is_state(processed_doc.status, DocumentStatus.READY):
+                    processed_doc.status = DocumentStatus.ready()
+                    processed_doc.updated_at = get_current_timestamp()
+                    await session.commit()
 
         logger.info(f"Successfully indexed Google Drive file: {file_name}")
-        return 1, 0
+        return 1, 0, 0
 
     except Exception as e:
         logger.error(f"Error processing file {file_name}: {e!s}", exc_info=True)
-        return 0, 1
+        # Mark pending document as failed if it exists
+        if pending_document:
+            try:
+                pending_document.status = DocumentStatus.failed(str(e))
+                pending_document.updated_at = get_current_timestamp()
+                await session.commit()
+            except Exception as status_error:
+                logger.error(f"Failed to update document status to failed: {status_error}")
+        return 0, 0, 1
 
 
 async def _remove_document(session: AsyncSession, file_id: str, search_space_id: int):
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
index 805be5781..89e8796d3 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@@ -1,5 +1,9 @@
 """
 Google Gmail connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """
 
 import time
@@ -13,6 +17,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from app.connectors.google_gmail_connector import GoogleGmailConnector
 from app.db import (
     Document,
+    DocumentStatus,
     DocumentType,
     SearchSourceConnectorType,
 )
@@ -32,6 +37,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -220,20 +226,21 @@ async def index_google_gmail_messages(
         logger.info(f"Found {len(messages)} Google gmail messages to index")
 
         documents_indexed = 0
-        skipped_messages = []
         documents_skipped = 0
+        documents_failed = 0  # Track messages that failed processing
+        duplicate_content_count = 0  # Track messages skipped due to duplicate content_hash
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
 
+        # =======================================================================
+        # PHASE 1: Analyze all messages, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        messages_to_process = []  # List of dicts with document and message data
+        new_documents_created = False
+
         for message in messages:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
             try:
                 # Extract message information
                 message_id = message.get("id", "")
@@ -259,7 +266,6 @@ async def index_google_gmail_messages(
 
                 if not message_id:
                     logger.warning(f"Skipping message with missing ID: {subject}")
-                    skipped_messages.append(f"{subject} (missing ID)")
                     documents_skipped += 1
                     continue
 
@@ -268,7 +274,6 @@ async def index_google_gmail_messages(
 
                 if not markdown_content.strip():
                     logger.warning(f"Skipping message with no content: {subject}")
-                    skipped_messages.append(f"{subject} (no content)")
                     documents_skipped += 1
                     continue
 
@@ -288,68 +293,25 @@ async def index_google_gmail_messages(
                 if existing_document:
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
-                        logger.info(
-                            f"Document for Gmail message {subject} unchanged. Skipping."
-                        )
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            existing_document.status = DocumentStatus.ready()
                         documents_skipped += 1
                         continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Gmail message {subject}. Updating document."
-                        )
 
-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "message_id": message_id,
-                                "thread_id": thread_id,
-                                "subject": subject,
-                                "sender": sender,
-                                "date": date_str,
-                                "document_type": "Gmail Message",
-                                "connector_type": "Google Gmail",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                markdown_content, user_llm, document_metadata
-                            )
-                        else:
-                            summary_content = f"Google Gmail Message: {subject}\n\n"
-                            summary_content += f"Sender: {sender}\n"
-                            summary_content += f"Date: {date_str}\n"
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(markdown_content)
-
-                        # Update existing document
-                        existing_document.title = subject
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
-                            "message_id": message_id,
-                            "thread_id": thread_id,
-                            "subject": subject,
-                            "sender": sender,
-                            "date": date_str,
-                            "connector_id": connector_id,
-                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(f"Successfully updated Gmail message {subject}")
-                        continue
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    messages_to_process.append({
+                        'document': existing_document,
+                        'is_new': False,
+                        'markdown_content': markdown_content,
+                        'content_hash': content_hash,
+                        'message_id': message_id,
+                        'thread_id': thread_id,
+                        'subject': subject,
+                        'sender': sender,
+                        'date_str': date_str,
+                    })
+                    continue
 
                 # Document doesn't exist by unique_identifier_hash
                 # Check if a document with the same content_hash exists (from another connector)
@@ -364,45 +326,11 @@ async def index_google_gmail_messages(
                         f"(existing document ID: {duplicate_by_content.id}, "
                         f"type: {duplicate_by_content.document_type}). Skipping."
                     )
+                    duplicate_content_count += 1
                     documents_skipped += 1
                     continue
 
-                # Document doesn't exist - create new one
-                # Generate summary with metadata
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "message_id": message_id,
-                        "thread_id": thread_id,
-                        "subject": subject,
-                        "sender": sender,
-                        "date": date_str,
-                        "document_type": "Gmail Message",
-                        "connector_type": "Google Gmail",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        markdown_content, user_llm, document_metadata
-                    )
-                else:
-                    # Fallback to simple summary if no LLM configured
-                    summary_content = f"Google Gmail Message: {subject}\n\n"
-                    summary_content += f"Sender: {sender}\n"
-                    summary_content += f"Date: {date_str}\n"
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                # Process chunks
-                chunks = await create_document_chunks(markdown_content)
-
-                # Create and store new document
-                logger.info(f"Creating new document for Gmail message: {subject}")
+                # Create new document with PENDING status (visible in UI immediately)
                 document = Document(
                     search_space_id=search_space_id,
                     title=subject,
@@ -413,21 +341,111 @@ async def index_google_gmail_messages(
                         "subject": subject,
                         "sender": sender,
                         "date": date_str,
+                        "connector_id": connector_id,
                     },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                     unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                     updated_at=get_current_timestamp(),
                     created_by_id=user_id,
                     connector_id=connector_id,
                 )
                 session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new email {summary_content}")
+                new_documents_created = True
 
-                # Batch commit every 10 documents
+                messages_to_process.append({
+                    'document': document,
+                    'is_new': True,
+                    'markdown_content': markdown_content,
+                    'content_hash': content_hash,
+                    'message_id': message_id,
+                    'thread_id': thread_id,
+                    'subject': subject,
+                    'sender': sender,
+                    'date_str': date_str,
+                })
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(messages_to_process)} documents")
+
+        for item in messages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "message_id": item['message_id'],
+                        "thread_id": item['thread_id'],
+                        "subject": item['subject'],
+                        "sender": item['sender'],
+                        "date": item['date_str'],
+                        "document_type": "Gmail Message",
+                        "connector_type": "Google Gmail",
+                    }
+                    summary_content, summary_embedding = await generate_document_summary(
+                        item['markdown_content'], user_llm, document_metadata_for_summary
+                    )
+                else:
+                    summary_content = f"Google Gmail Message: {item['subject']}\n\n"
+                    summary_content += f"Sender: {item['sender']}\n"
+                    summary_content += f"Date: {item['date_str']}\n"
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(item['markdown_content'])
+
+                # Update document to READY with actual content
+                document.title = item['subject']
+                document.content = summary_content
+                document.content_hash = item['content_hash']
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "message_id": item['message_id'],
+                    "thread_id": item['thread_id'],
+                    "subject": item['subject'],
+                    "sender": item['sender'],
+                    "date": item['date_str'],
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                 if documents_indexed % 10 == 0:
                     logger.info(
                         f"Committing batch: {documents_indexed} Gmail messages processed so far"
@@ -435,45 +453,74 @@ async def index_google_gmail_messages(
                     await session.commit()
 
             except Exception as e:
-                logger.error(
-                    f"Error processing the email {message_id}: {e!s}",
-                    exc_info=True,
-                )
-                skipped_messages.append(f"{subject} (processing error)")
-                documents_skipped += 1
-                continue  # Skip this message and continue with others
+                logger.error(f"Error processing Gmail message: {e!s}", exc_info=True)
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
+                documents_failed += 1
+                continue
 
-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit for any remaining documents not yet committed in batches
         logger.info(f"Final commit: Total {documents_indexed} Gmail messages processed")
-        await session.commit()
-        logger.info(
-            "Successfully committed all Google gmail document changes to database"
-        )
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Google Gmail document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same message was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
+
+        total_processed = documents_indexed
 
         # Log success
         await task_logger.log_task_success(
             log_entry,
-            f"Successfully completed Google gmail indexing for connector {connector_id}",
+            f"Successfully completed Google Gmail indexing for connector {connector_id}",
             {
                 "events_processed": total_processed,
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
-                "skipped_messages_count": len(skipped_messages),
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
             },
         )
 
         logger.info(
-            f"Google gmail indexing completed: {documents_indexed} new emails, {documents_skipped} skipped"
+            f"Google Gmail indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
         )
         return (
             total_processed,
-            None,
-        )  # Return None as the error message to indicate success
+            warning_message,
+        )  # Return warning_message (None on success)
 
     except SQLAlchemyError as db_error:
         await session.rollback()
diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py
index 674773463..4433cb11e 100644
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@@ -17,7 +17,7 @@ from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config as app_config
-from app.db import Document, DocumentType, Log, Notification
+from app.db import Document, DocumentStatus, DocumentType, Log, Notification
 from app.services.llm_service import get_user_long_context_llm
 from app.services.notification_service import NotificationService
 from app.services.task_logging_service import TaskLoggingService
@@ -499,6 +499,7 @@ async def add_received_file_document_using_unstructured(
             existing_document.blocknote_document = blocknote_json
             existing_document.content_needs_reindexing = False
             existing_document.updated_at = get_current_timestamp()
+            existing_document.status = DocumentStatus.ready()  # Mark as ready
 
             await session.commit()
             await session.refresh(existing_document)
@@ -528,6 +529,7 @@ async def add_received_file_document_using_unstructured(
                 updated_at=get_current_timestamp(),
                 created_by_id=user_id,
                 connector_id=connector.get("connector_id") if connector else None,
+                status=DocumentStatus.ready(),  # Mark as ready
             )
 
             session.add(document)
@@ -640,6 +642,7 @@ async def add_received_file_document_using_llamacloud(
             existing_document.blocknote_document = blocknote_json
             existing_document.content_needs_reindexing = False
             existing_document.updated_at = get_current_timestamp()
+            existing_document.status = DocumentStatus.ready()  # Mark as ready
 
             await session.commit()
             await session.refresh(existing_document)
@@ -669,6 +672,7 @@ async def add_received_file_document_using_llamacloud(
                 updated_at=get_current_timestamp(),
                 created_by_id=user_id,
                 connector_id=connector.get("connector_id") if connector else None,
+                status=DocumentStatus.ready(),  # Mark as ready
             )
 
             session.add(document)
@@ -806,6 +810,7 @@ async def add_received_file_document_using_docling(
             existing_document.blocknote_document = blocknote_json
             existing_document.content_needs_reindexing = False
             existing_document.updated_at = get_current_timestamp()
+            existing_document.status = DocumentStatus.ready()  # Mark as ready
 
             await session.commit()
             await session.refresh(existing_document)
@@ -835,6 +840,7 @@ async def add_received_file_document_using_docling(
                 updated_at=get_current_timestamp(),
                 created_by_id=user_id,
                 connector_id=connector.get("connector_id") if connector else None,
+                status=DocumentStatus.ready(),  # Mark as ready
             )
 
             session.add(document)
diff --git a/surfsense_backend/app/tasks/document_processors/markdown_processor.py b/surfsense_backend/app/tasks/document_processors/markdown_processor.py
index ff85d962e..8ecbb1370 100644
--- a/surfsense_backend/app/tasks/document_processors/markdown_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/markdown_processor.py
@@ -7,7 +7,7 @@ import logging
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from app.db import Document, DocumentType
+from app.db import Document, DocumentStatus, DocumentType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -270,6 +270,7 @@ async def add_received_markdown_file_document(
             existing_document.chunks = chunks
             existing_document.blocknote_document = blocknote_json
             existing_document.updated_at = get_current_timestamp()
+            existing_document.status = DocumentStatus.ready()  # Mark as ready
 
             await session.commit()
             await session.refresh(existing_document)
@@ -297,6 +298,7 @@ async def add_received_markdown_file_document(
                 updated_at=get_current_timestamp(),
                 created_by_id=user_id,
                 connector_id=connector.get("connector_id") if connector else None,
+                status=DocumentStatus.ready(),  # Mark as ready
             )
 
             session.add(document)

From 781cdc3dbd94c27bf80f5992055389490abdb919 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 02:24:51 +0530
Subject: [PATCH 20/36] refactor: remove manual refresh functionality and
 update UI components for improved document management experience

---
 .../(manage)/components/DocumentsFilters.tsx  |  2 +-
 .../components/DocumentsTableShell.tsx        | 33 ++++++++++++-------
 .../documents/(manage)/page.tsx               | 17 ----------
 .../contracts/enums/connectorIcons.tsx        |  2 +-
 surfsense_web/messages/en.json                |  1 -
 surfsense_web/messages/zh.json                |  1 -
 6 files changed, 24 insertions(+), 32 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
index ed882916e..028f38098 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
@@ -226,7 +226,7 @@ export function DocumentsFilters({
 								)}
 							</div>
 							{activeTypes.length > 0 && (
-								<div className="px-3 pt-1.5 border-t border-border/50">
+								<div className="px-3 pt-1.5 pb-1.5 border-t border-border/50">
 									<Button
 										variant="ghost"
 										size="sm"
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index d579fe677..beb808191 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -45,7 +45,7 @@ function StatusIndicator({ status }: { status?: DocumentStatus }) {
 							<Clock className="h-5 w-5 text-muted-foreground" />
 						</div>
 					</TooltipTrigger>
-					<TooltipContent side="top">Pending - waiting to be processed</TooltipContent>
+					<TooltipContent side="top">Pending - waiting to be synced</TooltipContent>
 				</Tooltip>
 			);
 		case "processing":
@@ -191,7 +191,6 @@ export function DocumentsTableShell({
 	documents,
 	loading,
 	error,
-	onRefresh,
 	selectedIds,
 	setSelectedIds,
 	columnVisibility,
@@ -204,7 +203,6 @@ export function DocumentsTableShell({
 	documents: Document[];
 	loading: boolean;
 	error: boolean;
-	onRefresh: () => Promise<void>;
 	selectedIds: Set<number>;
 	setSelectedIds: (update: Set<number>) => void;
 	columnVisibility: ColumnVisibility;
@@ -361,10 +359,15 @@ export function DocumentsTableShell({
 										</TableHead>
 									)}
 									{columnVisibility.created_at && (
-										<TableHead className="w-32">
+										<TableHead className="w-32 border-r border-border/40">
 											<Skeleton className="h-3 w-16" />
 										</TableHead>
 									)}
+									{columnVisibility.status && (
+										<TableHead className="w-20 text-center">
+											<Skeleton className="h-3 w-12 mx-auto" />
+										</TableHead>
+									)}
 									<TableHead className="w-10">
 										<span className="sr-only">Actions</span>
 									</TableHead>
@@ -401,10 +404,15 @@ export function DocumentsTableShell({
 												</TableCell>
 											)}
 											{columnVisibility.created_at && (
-												<TableCell className="w-32 py-2.5">
+												<TableCell className="w-32 py-2.5 border-r border-border/40">
 													<Skeleton className="h-4 w-20" />
 												</TableCell>
 											)}
+											{columnVisibility.status && (
+												<TableCell className="w-20 py-2.5 text-center">
+													<Skeleton className="h-5 w-5 mx-auto rounded-full" />
+												</TableCell>
+											)}
 											<TableCell className="w-10 py-2.5 text-center">
 												<Skeleton className="h-6 w-6 mx-auto rounded" />
 											</TableCell>
@@ -435,23 +443,26 @@ export function DocumentsTableShell({
 											)}
 										</div>
 									</div>
-									<Skeleton className="h-7 w-7 rounded" />
+									<div className="flex items-center gap-2">
+										{columnVisibility.status && (
+											<Skeleton className="h-5 w-5 rounded-full" />
+										)}
+										<Skeleton className="h-7 w-7 rounded" />
+									</div>
 								</div>
 							</div>
 						))}
 					</div>
 				</>
 			) : error ? (
-				<div className="flex h-[400px] w-full items-center justify-center">
+				<div className="flex h-[50vh] w-full items-center justify-center">
 					<div className="flex flex-col items-center gap-3">
+						<AlertCircle className="h-8 w-8 text-destructive/60" />
 						<p className="text-sm text-destructive">{t("error_loading")}</p>
-						<Button variant="outline" size="sm" onClick={() => onRefresh()}>
-							{t("retry")}
-						</Button>
 					</div>
 				</div>
 			) : sorted.length === 0 ? (
-				<div className="flex h-[400px] w-full items-center justify-center">
+				<div className="flex h-[50vh] w-full items-center justify-center">
 					<motion.div
 						initial={{ opacity: 0, y: 20 }}
 						animate={{ opacity: 1, y: 0 }}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
index 2c515ff77..b85b334d7 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@@ -140,22 +140,6 @@ export default function DocumentsTable() {
 		setPageIndex(0);
 	};
 
-	const [isRefreshing, setIsRefreshing] = useState(false);
-
-	const refreshCurrentView = useCallback(async () => {
-		if (isRefreshing) return;
-		setIsRefreshing(true);
-		try {
-			if (isSearchMode) {
-				await refetchSearch();
-			}
-			// Real-time view doesn't need manual refresh - Electric handles it
-			toast.success(t("refresh_success") || "Documents refreshed");
-		} finally {
-			setIsRefreshing(false);
-		}
-	}, [isSearchMode, refetchSearch, t, isRefreshing]);
-
 	const onBulkDelete = async () => {
 		if (selectedIds.size === 0) {
 			toast.error(t("no_rows_selected"));
@@ -293,7 +277,6 @@ export default function DocumentsTable() {
 				documents={displayDocs}
 				loading={!!loading}
 				error={!!error}
-				onRefresh={refreshCurrentView}
 				selectedIds={selectedIds}
 				setSelectedIds={setSelectedIds}
 				columnVisibility={columnVisibility}
diff --git a/surfsense_web/contracts/enums/connectorIcons.tsx b/surfsense_web/contracts/enums/connectorIcons.tsx
index aaf476215..18a872d94 100644
--- a/surfsense_web/contracts/enums/connectorIcons.tsx
+++ b/surfsense_web/contracts/enums/connectorIcons.tsx
@@ -92,7 +92,7 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas
 		case "FILE":
 			return <File {...iconProps} />;
 		case "GOOGLE_DRIVE_FILE":
-			return <File {...iconProps} />;
+			return <Image src="/connectors/google-drive.svg" alt="Google Drive" {...imgProps} />;
 		case "COMPOSIO_GOOGLE_DRIVE_CONNECTOR":
 			return <Image src="/connectors/google-drive.svg" alt="Google Drive" {...imgProps} />;
 		case "COMPOSIO_GMAIL_CONNECTOR":
diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json
index 75b186420..0dcf44776 100644
--- a/surfsense_web/messages/en.json
+++ b/surfsense_web/messages/en.json
@@ -328,7 +328,6 @@
 		"filter_placeholder": "Filter by title...",
 		"rows_per_page": "Rows per page",
 		"refresh": "Refresh",
-		"refresh_success": "Documents refreshed",
 		"upload_documents": "Upload Documents",
 		"create_shared_note": "Create Shared Note",
 		"processing_documents": "Processing documents...",
diff --git a/surfsense_web/messages/zh.json b/surfsense_web/messages/zh.json
index 81121ef3e..bf5961fa7 100644
--- a/surfsense_web/messages/zh.json
+++ b/surfsense_web/messages/zh.json
@@ -313,7 +313,6 @@
 		"filter_placeholder": "按标题筛选...",
 		"rows_per_page": "每页行数",
 		"refresh": "刷新",
-		"refresh_success": "文档已刷新",
 		"upload_documents": "上传文档",
 		"create_shared_note": "创建共享笔记",
 		"processing_documents": "正在处理文档...",

From 2077344934600197ae21db895fe3b37253d06934 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 02:59:21 +0530
Subject: [PATCH 21/36] feat: implement two-phase document indexing for Linear
 and Slack connectors with real-time status updates

---
 .../connector_indexers/linear_indexer.py      | 318 ++++++++++--------
 .../tasks/connector_indexers/slack_indexer.py | 249 +++++++++-----
 2 files changed, 337 insertions(+), 230 deletions(-)

diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
index c28f151ca..45e1e357a 100644
--- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
@@ -1,5 +1,9 @@
 """
 Linear connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """
 
 import time
@@ -11,7 +15,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
 from app.connectors.linear_connector import LinearConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -28,6 +32,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -196,6 +201,7 @@ async def index_linear_issues(
         # Track the number of documents indexed
         documents_indexed = 0
         documents_skipped = 0
+        documents_failed = 0  # Track issues that failed processing
         skipped_issues = []
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
@@ -207,16 +213,14 @@ async def index_linear_issues(
             {"stage": "process_issues", "total_issues": len(issues)},
         )
 
-        # Process each issue
-        for issue in issues:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
+        # =======================================================================
+        # PHASE 1: Analyze all issues, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        issues_to_process = []  # List of dicts with document and issue data
+        new_documents_created = False
 
+        for issue in issues:
             try:
                 issue_id = issue.get("id", "")
                 issue_identifier = issue.get("identifier", "")
@@ -262,78 +266,35 @@ async def index_linear_issues(
                 state = formatted_issue.get("state", "Unknown")
                 description = formatted_issue.get("description", "")
                 comment_count = len(formatted_issue.get("comments", []))
+                priority = formatted_issue.get("priority", "Unknown")
 
                 if existing_document:
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            existing_document.status = DocumentStatus.ready()
                         logger.info(
                             f"Document for Linear issue {issue_identifier} unchanged. Skipping."
                         )
                         documents_skipped += 1
                         continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Linear issue {issue_identifier}. Updating document."
-                        )
 
-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "issue_id": issue_identifier,
-                                "issue_title": issue_title,
-                                "state": state,
-                                "priority": formatted_issue.get("priority", "Unknown"),
-                                "comment_count": comment_count,
-                                "document_type": "Linear Issue",
-                                "connector_type": "Linear",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                issue_content, user_llm, document_metadata
-                            )
-                        else:
-                            # Fallback to simple summary if no LLM configured
-                            if description and len(description) > 1000:
-                                description = description[:997] + "..."
-                            summary_content = f"Linear Issue {issue_identifier}: {issue_title}\n\nStatus: {state}\n\n"
-                            if description:
-                                summary_content += f"Description: {description}\n\n"
-                            summary_content += f"Comments: {comment_count}"
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(issue_content)
-
-                        # Update existing document
-                        existing_document.title = f"{issue_identifier}: {issue_title}"
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
-                            "issue_id": issue_id,
-                            "issue_identifier": issue_identifier,
-                            "issue_title": issue_title,
-                            "state": state,
-                            "comment_count": comment_count,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(
-                            f"Successfully updated Linear issue {issue_identifier}"
-                        )
-                        continue
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    issues_to_process.append({
+                        'document': existing_document,
+                        'is_new': False,
+                        'issue_content': issue_content,
+                        'content_hash': content_hash,
+                        'issue_id': issue_id,
+                        'issue_identifier': issue_identifier,
+                        'issue_title': issue_title,
+                        'state': state,
+                        'description': description,
+                        'comment_count': comment_count,
+                        'priority': priority,
+                    })
+                    continue
 
                 # Document doesn't exist by unique_identifier_hash
                 # Check if a document with the same content_hash exists (from another connector)
@@ -351,48 +312,7 @@ async def index_linear_issues(
                     documents_skipped += 1
                     continue
 
-                # Document doesn't exist - create new one
-                # Generate summary with metadata
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "issue_id": issue_identifier,
-                        "issue_title": issue_title,
-                        "state": state,
-                        "priority": formatted_issue.get("priority", "Unknown"),
-                        "comment_count": comment_count,
-                        "document_type": "Linear Issue",
-                        "connector_type": "Linear",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        issue_content, user_llm, document_metadata
-                    )
-                else:
-                    # Fallback to simple summary if no LLM configured
-                    # Truncate description if it's too long for the summary
-                    if description and len(description) > 1000:
-                        description = description[:997] + "..."
-                    summary_content = f"Linear Issue {issue_identifier}: {issue_title}\n\nStatus: {state}\n\n"
-                    if description:
-                        summary_content += f"Description: {description}\n\n"
-                    summary_content += f"Comments: {comment_count}"
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                # Process chunks - using the full issue content with comments
-                chunks = await create_document_chunks(issue_content)
-
-                # Create and store new document
-                logger.info(
-                    f"Creating new document for issue {issue_identifier} - {issue_title}"
-                )
+                # Create new document with PENDING status (visible in UI immediately)
                 document = Document(
                     search_space_id=search_space_id,
                     title=f"{issue_identifier}: {issue_title}",
@@ -403,25 +323,119 @@ async def index_linear_issues(
                         "issue_title": issue_title,
                         "state": state,
                         "comment_count": comment_count,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        "connector_id": connector_id,
                     },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                     unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                     updated_at=get_current_timestamp(),
                     created_by_id=user_id,
                     connector_id=connector_id,
                 )
-
                 session.add(document)
-                documents_indexed += 1
-                logger.info(
-                    f"Successfully indexed new issue {issue_identifier} - {issue_title}"
+                new_documents_created = True
+
+                issues_to_process.append({
+                    'document': document,
+                    'is_new': True,
+                    'issue_content': issue_content,
+                    'content_hash': content_hash,
+                    'issue_id': issue_id,
+                    'issue_identifier': issue_identifier,
+                    'issue_title': issue_title,
+                    'state': state,
+                    'description': description,
+                    'comment_count': comment_count,
+                    'priority': priority,
+                })
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(issues_to_process)} documents")
+
+        for item in issues_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
                 )
 
-                # Batch commit every 10 documents
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "issue_id": item['issue_identifier'],
+                        "issue_title": item['issue_title'],
+                        "state": item['state'],
+                        "priority": item['priority'],
+                        "comment_count": item['comment_count'],
+                        "document_type": "Linear Issue",
+                        "connector_type": "Linear",
+                    }
+                    summary_content, summary_embedding = await generate_document_summary(
+                        item['issue_content'], user_llm, document_metadata_for_summary
+                    )
+                else:
+                    # Fallback to simple summary if no LLM configured
+                    description = item['description']
+                    if description and len(description) > 1000:
+                        description = description[:997] + "..."
+                    summary_content = f"Linear Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['state']}\n\n"
+                    if description:
+                        summary_content += f"Description: {description}\n\n"
+                    summary_content += f"Comments: {item['comment_count']}"
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(item['issue_content'])
+
+                # Update document to READY with actual content
+                document.title = f"{item['issue_identifier']}: {item['issue_title']}"
+                document.content = summary_content
+                document.content_hash = item['content_hash']
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "issue_id": item['issue_id'],
+                    "issue_identifier": item['issue_identifier'],
+                    "issue_title": item['issue_title'],
+                    "state": item['state'],
+                    "comment_count": item['comment_count'],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                 if documents_indexed % 10 == 0:
                     logger.info(
                         f"Committing batch: {documents_indexed} Linear issues processed so far"
@@ -430,44 +444,68 @@ async def index_linear_issues(
 
             except Exception as e:
                 logger.error(
-                    f"Error processing issue {issue.get('identifier', 'Unknown')}: {e!s}",
+                    f"Error processing issue {item.get('issue_identifier', 'Unknown')}: {e!s}",
                     exc_info=True,
                 )
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
                 skipped_issues.append(
-                    f"{issue.get('identifier', 'Unknown')} (processing error)"
+                    f"{item.get('issue_identifier', 'Unknown')} (processing error)"
                 )
-                documents_skipped += 1
-                continue  # Skip this issue and continue with others
+                documents_failed += 1
+                continue
 
-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit for any remaining documents not yet committed in batches
         logger.info(f"Final commit: Total {documents_indexed} Linear issues processed")
-        await session.commit()
-        logger.info("Successfully committed all Linear document changes to database")
+        try:
+            await session.commit()
+            logger.info("Successfully committed all Linear document changes to database")
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same issue was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
 
         # Log success
         await task_logger.log_task_success(
             log_entry,
             f"Successfully completed Linear indexing for connector {connector_id}",
             {
-                "issues_processed": total_processed,
+                "issues_processed": documents_indexed,
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                 "skipped_issues_count": len(skipped_issues),
             },
         )
 
         logger.info(
-            f"Linear indexing completed: {documents_indexed} new issues, {documents_skipped} skipped"
+            f"Linear indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed"
         )
-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
+        return documents_indexed, warning_message
 
     except SQLAlchemyError as db_error:
         await session.rollback()
diff --git a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
index 010d1eff4..61faa39b3 100644
--- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
@@ -1,5 +1,9 @@
 """
 Slack connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """
 
 import time
@@ -12,7 +16,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
 from app.connectors.slack_history import SlackHistory
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
     create_document_chunks,
@@ -28,6 +32,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -168,11 +173,15 @@ async def index_slack_messages(
                 f"No Slack channels found for connector {connector_id}",
                 {"channels_found": 0},
             )
-            return 0, "No Slack channels found"
+            # CRITICAL: Update timestamp even when no channels found so Electric SQL syncs
+            await update_connector_last_indexed(session, connector, update_last_indexed)
+            await session.commit()
+            return 0, None  # Return None (not error) when no channels found
 
         # Track the number of documents indexed
         documents_indexed = 0
         documents_skipped = 0
+        documents_failed = 0  # Track messages that failed processing
         skipped_channels = []
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
@@ -184,15 +193,14 @@ async def index_slack_messages(
             {"stage": "process_channels", "total_channels": len(channels)},
         )
 
-        # Process each channel
+        # =======================================================================
+        # PHASE 1: Collect all messages from all channels, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        messages_to_process = []  # List of dicts with document and message data
+        new_documents_created = False
+
         for channel_obj in channels:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
             channel_id = channel_obj["id"]
             channel_name = channel_obj["name"]
             is_private = channel_obj["is_private"]
@@ -305,47 +313,29 @@ async def index_slack_messages(
                     if existing_document:
                         # Document exists - check if content has changed
                         if existing_document.content_hash == content_hash:
+                            # Ensure status is ready (might have been stuck in processing/pending)
+                            if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                                existing_document.status = DocumentStatus.ready()
                             logger.info(
                                 f"Document for Slack message {msg_ts} in channel {channel_name} unchanged. Skipping."
                             )
                             documents_skipped += 1
                             continue
-                        else:
-                            # Content has changed - update the existing document
-                            logger.info(
-                                f"Content changed for Slack message {msg_ts} in channel {channel_name}. Updating document."
-                            )
 
-                            # Update chunks and embedding
-                            chunks = await create_document_chunks(
-                                combined_document_string
-                            )
-                            doc_embedding = config.embedding_model_instance.embed(
-                                combined_document_string
-                            )
-
-                            # Update existing document
-                            existing_document.content = combined_document_string
-                            existing_document.content_hash = content_hash
-                            existing_document.embedding = doc_embedding
-                            existing_document.document_metadata = {
-                                "channel_name": channel_name,
-                                "channel_id": channel_id,
-                                "start_date": start_date_str,
-                                "end_date": end_date_str,
-                                "message_count": len(formatted_messages),
-                                "indexed_at": datetime.now().strftime(
-                                    "%Y-%m-%d %H:%M:%S"
-                                ),
-                            }
-
-                            # Delete old chunks and add new ones
-                            existing_document.chunks = chunks
-                            existing_document.updated_at = get_current_timestamp()
-
-                            documents_indexed += 1
-                            logger.info(f"Successfully updated Slack message {msg_ts}")
-                            continue
+                        # Queue existing document for update (will be set to processing in Phase 2)
+                        messages_to_process.append({
+                            'document': existing_document,
+                            'is_new': False,
+                            'combined_document_string': combined_document_string,
+                            'content_hash': content_hash,
+                            'channel_name': channel_name,
+                            'channel_id': channel_id,
+                            'msg_ts': msg_ts,
+                            'start_date': start_date_str,
+                            'end_date': end_date_str,
+                            'message_count': len(formatted_messages),
+                        })
+                        continue
 
                     # Document doesn't exist by unique_identifier_hash
                     # Check if a document with the same content_hash exists (from another connector)
@@ -363,14 +353,7 @@ async def index_slack_messages(
                         documents_skipped += 1
                         continue
 
-                    # Document doesn't exist - create new one
-                    # Process chunks
-                    chunks = await create_document_chunks(combined_document_string)
-                    doc_embedding = config.embedding_model_instance.embed(
-                        combined_document_string
-                    )
-
-                    # Create and store new document
+                    # Create new document with PENDING status (visible in UI immediately)
                     document = Document(
                         search_space_id=search_space_id,
                         title=channel_name,
@@ -378,33 +361,37 @@ async def index_slack_messages(
                         document_metadata={
                             "channel_name": channel_name,
                             "channel_id": channel_id,
-                            "start_date": start_date_str,
-                            "end_date": end_date_str,
-                            "message_count": len(formatted_messages),
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                            "msg_ts": msg_ts,
+                            "connector_id": connector_id,
                         },
-                        content=combined_document_string,
-                        embedding=doc_embedding,
-                        chunks=chunks,
-                        content_hash=content_hash,
+                        content="Pending...",  # Placeholder until processed
+                        content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                         unique_identifier_hash=unique_identifier_hash,
+                        embedding=None,
+                        chunks=[],  # Empty at creation - safe for async
+                        status=DocumentStatus.pending(),  # Pending until processing starts
                         updated_at=get_current_timestamp(),
                         created_by_id=user_id,
                         connector_id=connector_id,
                     )
-
                     session.add(document)
-                    documents_indexed += 1
+                    new_documents_created = True
 
-                    # Batch commit every 10 documents
-                    if documents_indexed % 10 == 0:
-                        logger.info(
-                            f"Committing batch: {documents_indexed} Slack channels processed so far"
-                        )
-                        await session.commit()
+                    messages_to_process.append({
+                        'document': document,
+                        'is_new': True,
+                        'combined_document_string': combined_document_string,
+                        'content_hash': content_hash,
+                        'channel_name': channel_name,
+                        'channel_id': channel_id,
+                        'msg_ts': msg_ts,
+                        'start_date': start_date_str,
+                        'end_date': end_date_str,
+                        'message_count': len(formatted_messages),
+                    })
 
                 logger.info(
-                    f"Successfully indexed new channel {channel_name} with {len(formatted_messages)} messages"
+                    f"Phase 1: Collected {len(formatted_messages)} messages from channel {channel_name}"
                 )
 
             except SlackApiError as slack_error:
@@ -420,43 +407,125 @@ async def index_slack_messages(
                 documents_skipped += 1
                 continue  # Skip this channel and continue with others
 
-        # Update the last_indexed_at timestamp for the connector only if requested
-        # and if we successfully indexed at least one channel
-        total_processed = documents_indexed
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(messages_to_process)} documents")
+
+        for item in messages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (embeddings, chunks)
+                chunks = await create_document_chunks(item['combined_document_string'])
+                doc_embedding = config.embedding_model_instance.embed(
+                    item['combined_document_string']
+                )
+
+                # Update document to READY with actual content
+                document.title = item['channel_name']
+                document.content = item['combined_document_string']
+                document.content_hash = item['content_hash']
+                document.embedding = doc_embedding
+                document.document_metadata = {
+                    "channel_name": item['channel_name'],
+                    "channel_id": item['channel_id'],
+                    "start_date": item['start_date'],
+                    "end_date": item['end_date'],
+                    "message_count": item['message_count'],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
+                if documents_indexed % 10 == 0:
+                    logger.info(
+                        f"Committing batch: {documents_indexed} Slack messages processed so far"
+                    )
+                    await session.commit()
+
+            except Exception as e:
+                logger.error(
+                    f"Error processing Slack message {item.get('msg_ts', 'Unknown')}: {e!s}",
+                    exc_info=True,
+                )
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
+                documents_failed += 1
+                continue
+
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit for any remaining documents not yet committed in batches
-        logger.info(f"Final commit: Total {documents_indexed} Slack channels processed")
-        await session.commit()
+        logger.info(f"Final commit: Total {documents_indexed} Slack messages processed")
+        try:
+            await session.commit()
+            logger.info("Successfully committed all Slack document changes to database")
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same message was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise
 
-        # Prepare result message
-        result_message = None
-        if skipped_channels:
-            result_message = f"Processed {total_processed} channels. Skipped {len(skipped_channels)} channels: {', '.join(skipped_channels)}"
-        else:
-            result_message = f"Processed {total_processed} channels."
+        # Build warning message if there were issues
+        warning_parts = []
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
 
         # Log success
         await task_logger.log_task_success(
             log_entry,
             f"Successfully completed Slack indexing for connector {connector_id}",
             {
-                "channels_processed": total_processed,
+                "channels_processed": len(channels),
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                 "skipped_channels_count": len(skipped_channels),
-                "result_message": result_message,
             },
         )
 
         logger.info(
-            f"Slack indexing completed: {documents_indexed} new channels, {documents_skipped} skipped"
+            f"Slack indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed"
         )
-        return (
-            total_processed,
-            None,
-        )  # Return None on success (result_message is for logging only)
+        return documents_indexed, warning_message
 
     except SQLAlchemyError as db_error:
         await session.rollback()

From 0249ea20a5df4bdfdf890a2de66bac2f8a33a316 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 03:42:03 +0530
Subject: [PATCH 22/36] feat: implement two-phase document indexing for Discord
 and Teams connectors with real-time status updates

---
 .../connector_indexers/discord_indexer.py     | 384 +++++++++++-------
 .../tasks/connector_indexers/teams_indexer.py | 291 +++++++------
 2 files changed, 400 insertions(+), 275 deletions(-)

diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
index f9a6918a7..e5f333531 100644
--- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
@@ -1,5 +1,9 @@
 """
 Discord connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """
 
 import asyncio
@@ -12,7 +16,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
 from app.connectors.discord_connector import DiscordConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
     create_document_chunks,
@@ -27,6 +31,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -48,7 +53,11 @@ async def index_discord_messages(
     on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
     """
-    Index Discord messages from all accessible channels.
+    Index Discord messages from the configured guild's channels.
+
+    Implements 2-phase document status updates for real-time UI feedback:
+    - Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+    - Phase 2: Process each document: pending → processing → ready/failed
 
     Args:
         session: Database session
@@ -113,6 +122,37 @@ async def index_discord_messages(
 
         logger.info(f"Starting Discord indexing for connector {connector_id}")
 
+        # =======================================================================
+        # GUILD FILTERING: Only index the specific guild configured for this connector
+        # =======================================================================
+        # Extract guild_id from connector config (set during OAuth flow)
+        configured_guild_id = connector.config.get("guild_id")
+        configured_guild_name = connector.config.get("guild_name")
+
+        # Legacy connector check - if no guild_id, we need to warn and handle gracefully
+        is_legacy_connector = configured_guild_id is None
+
+        if is_legacy_connector:
+            logger.warning(
+                f"Discord connector {connector_id} has no guild_id configured. "
+                "This is a legacy connector. Please reconnect the Discord server to fix this. "
+                "For now, indexing will be skipped to prevent indexing unwanted servers."
+            )
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Legacy Discord connector {connector_id} missing guild_id",
+                "No guild_id configured. Please reconnect this Discord server.",
+                {"error_type": "MissingGuildId", "is_legacy": True},
+            )
+            return (
+                0,
+                "This Discord connector needs to be reconnected. Please disconnect and reconnect your Discord server to enable indexing.",
+            )
+
+        logger.info(
+            f"Configured to index guild: {configured_guild_name} ({configured_guild_id})"
+        )
+
         # Initialize Discord client with OAuth credentials support
         await task_logger.log_task_progress(
             log_entry,
@@ -255,77 +295,68 @@ async def index_discord_messages(
         try:
             await task_logger.log_task_progress(
                 log_entry,
-                f"Starting Discord bot and fetching guilds for connector {connector_id}",
-                {"stage": "fetch_guilds"},
+                f"Starting Discord bot for connector {connector_id}",
+                {"stage": "bot_initialization"},
             )
 
-            logger.info("Starting Discord bot to fetch guilds")
+            logger.info("Starting Discord bot")
             discord_client._bot_task = asyncio.create_task(discord_client.start_bot())
             await discord_client._wait_until_ready()
 
-            logger.info("Fetching Discord guilds")
-            guilds = await discord_client.get_guilds()
-            logger.info(f"Found {len(guilds)} guilds")
+            # We only process the configured guild, not all guilds
+            logger.info(
+                f"Processing configured guild only: {configured_guild_name} ({configured_guild_id})"
+            )
+
         except Exception as e:
             await task_logger.log_task_failure(
                 log_entry,
-                f"Failed to get Discord guilds for connector {connector_id}",
+                f"Failed to start Discord bot for connector {connector_id}",
                 str(e),
-                {"error_type": "GuildFetchError"},
+                {"error_type": "BotStartError"},
             )
-            logger.error(f"Failed to get Discord guilds: {e!s}", exc_info=True)
+            logger.error(f"Failed to start Discord bot: {e!s}", exc_info=True)
             await discord_client.close_bot()
-            return 0, f"Failed to get Discord guilds: {e!s}"
-
-        if not guilds:
-            await task_logger.log_task_success(
-                log_entry,
-                f"No Discord guilds found for connector {connector_id}",
-                {"guilds_found": 0},
-            )
-            logger.info("No Discord guilds found to index")
-            await discord_client.close_bot()
-            return 0, "No Discord guilds found"
+            return 0, f"Failed to start Discord bot: {e!s}"
 
         # Track results
         documents_indexed = 0
         documents_skipped = 0
+        documents_failed = 0
+        duplicate_content_count = 0
         skipped_channels: list[str] = []
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
 
-        # Process each guild and channel
+        # Use the configured guild info
+        guild_id = configured_guild_id
+        guild_name = configured_guild_name or "Unknown Guild"
+
         await task_logger.log_task_progress(
             log_entry,
-            f"Starting to process {len(guilds)} Discord guilds",
-            {"stage": "process_guilds", "total_guilds": len(guilds)},
+            f"Processing Discord guild: {guild_name}",
+            {"stage": "process_guild", "guild_id": guild_id, "guild_name": guild_name},
         )
 
+        # =======================================================================
+        # PHASE 1: Collect all messages and create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        messages_to_process = []  # List of dicts with document and message data
+        new_documents_created = False
+
         try:
-            for guild in guilds:
-                # Check if it's time for a heartbeat update
-                if (
-                    on_heartbeat_callback
-                    and (time.time() - last_heartbeat_time)
-                    >= HEARTBEAT_INTERVAL_SECONDS
-                ):
-                    await on_heartbeat_callback(documents_indexed)
-                    last_heartbeat_time = time.time()
-                guild_id = guild["id"]
-                guild_name = guild["name"]
-                logger.info(f"Processing guild: {guild_name} ({guild_id})")
-
-                try:
-                    channels = await discord_client.get_text_channels(guild_id)
-                    if not channels:
-                        logger.info(
-                            f"No channels found in guild {guild_name}. Skipping."
-                        )
-                        skipped_channels.append(f"{guild_name} (no channels)")
-                        documents_skipped += 1
-                        continue
+            logger.info(f"Processing guild: {guild_name} ({guild_id})")
 
+            try:
+                channels = await discord_client.get_text_channels(guild_id)
+                if not channels:
+                    logger.info(
+                        f"No channels found in guild {guild_name}. Skipping."
+                    )
+                    skipped_channels.append(f"{guild_name} (no channels)")
+                else:
                     for channel in channels:
                         channel_id = channel["id"]
                         channel_name = channel["name"]
@@ -343,14 +374,12 @@ async def index_discord_messages(
                             skipped_channels.append(
                                 f"{guild_name}#{channel_name} (fetch error)"
                             )
-                            documents_skipped += 1
                             continue
 
                         if not messages:
                             logger.info(
                                 f"No messages found in channel {channel_name} for the specified date range."
                             )
-                            documents_skipped += 1
                             continue
 
                         # Filter/format messages
@@ -365,7 +394,6 @@ async def index_discord_messages(
                             logger.info(
                                 f"No valid messages found in channel {channel_name} after filtering."
                             )
-                            documents_skipped += 1
                             continue
 
                         # Process each message as an individual document (like Slack)
@@ -427,55 +455,27 @@ async def index_discord_messages(
                             if existing_document:
                                 # Document exists - check if content has changed
                                 if existing_document.content_hash == content_hash:
-                                    logger.info(
-                                        f"Document for Discord message {msg_id} in {guild_name}#{channel_name} unchanged. Skipping."
-                                    )
+                                    # Ensure status is ready (might have been stuck in processing/pending)
+                                    if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                                        existing_document.status = DocumentStatus.ready()
                                     documents_skipped += 1
                                     continue
-                                else:
-                                    # Content has changed - update the existing document
-                                    logger.info(
-                                        f"Content changed for Discord message {msg_id} in {guild_name}#{channel_name}. Updating document."
-                                    )
 
-                                    # Update chunks and embedding
-                                    chunks = await create_document_chunks(
-                                        combined_document_string
-                                    )
-                                    doc_embedding = (
-                                        config.embedding_model_instance.embed(
-                                            combined_document_string
-                                        )
-                                    )
-
-                                    # Update existing document
-                                    existing_document.content = combined_document_string
-                                    existing_document.content_hash = content_hash
-                                    existing_document.embedding = doc_embedding
-                                    existing_document.document_metadata = {
-                                        "guild_name": guild_name,
-                                        "guild_id": guild_id,
-                                        "channel_name": channel_name,
-                                        "channel_id": channel_id,
-                                        "message_id": msg_id,
-                                        "message_timestamp": msg_timestamp,
-                                        "message_user_name": msg_user_name,
-                                        "indexed_at": datetime.now(UTC).strftime(
-                                            "%Y-%m-%d %H:%M:%S"
-                                        ),
-                                    }
-
-                                    # Delete old chunks and add new ones
-                                    existing_document.chunks = chunks
-                                    existing_document.updated_at = (
-                                        get_current_timestamp()
-                                    )
-
-                                    documents_indexed += 1
-                                    logger.info(
-                                        f"Successfully updated Discord message {msg_id}"
-                                    )
-                                    continue
+                                # Queue existing document for update (will be set to processing in Phase 2)
+                                messages_to_process.append({
+                                    'document': existing_document,
+                                    'is_new': False,
+                                    'combined_document_string': combined_document_string,
+                                    'content_hash': content_hash,
+                                    'guild_name': guild_name,
+                                    'guild_id': guild_id,
+                                    'channel_name': channel_name,
+                                    'channel_id': channel_id,
+                                    'message_id': msg_id,
+                                    'message_timestamp': msg_timestamp,
+                                    'message_user_name': msg_user_name,
+                                })
+                                continue
 
                             # Document doesn't exist by unique_identifier_hash
                             # Check if a document with the same content_hash exists (from another connector)
@@ -492,19 +492,11 @@ async def index_discord_messages(
                                     f"(existing document ID: {duplicate_by_content.id}, "
                                     f"type: {duplicate_by_content.document_type}). Skipping."
                                 )
+                                duplicate_content_count += 1
                                 documents_skipped += 1
                                 continue
 
-                            # Document doesn't exist - create new one
-                            # Process chunks
-                            chunks = await create_document_chunks(
-                                combined_document_string
-                            )
-                            doc_embedding = config.embedding_model_instance.embed(
-                                combined_document_string
-                            )
-
-                            # Create and store new document
+                            # Create new document with PENDING status (visible in UI immediately)
                             document = Document(
                                 search_space_id=search_space_id,
                                 title=f"{guild_name}#{channel_name}",
@@ -515,87 +507,171 @@ async def index_discord_messages(
                                     "channel_name": channel_name,
                                     "channel_id": channel_id,
                                     "message_id": msg_id,
-                                    "message_timestamp": msg_timestamp,
-                                    "message_user_name": msg_user_name,
-                                    "indexed_at": datetime.now(UTC).strftime(
-                                        "%Y-%m-%d %H:%M:%S"
-                                    ),
+                                    "connector_id": connector_id,
                                 },
-                                content=combined_document_string,
-                                embedding=doc_embedding,
-                                chunks=chunks,
-                                content_hash=content_hash,
+                                content="Pending...",  # Placeholder until processed
+                                content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                                 unique_identifier_hash=unique_identifier_hash,
+                                embedding=None,
+                                chunks=[],  # Empty at creation - safe for async
+                                status=DocumentStatus.pending(),  # Pending until processing starts
                                 updated_at=get_current_timestamp(),
                                 created_by_id=user_id,
                                 connector_id=connector_id,
                             )
-
                             session.add(document)
-                            documents_indexed += 1
+                            new_documents_created = True
 
-                            # Batch commit every 10 documents
-                            if documents_indexed % 10 == 0:
-                                logger.info(
-                                    f"Committing batch: {documents_indexed} Discord messages processed so far"
-                                )
-                                await session.commit()
+                            messages_to_process.append({
+                                'document': document,
+                                'is_new': True,
+                                'combined_document_string': combined_document_string,
+                                'content_hash': content_hash,
+                                'guild_name': guild_name,
+                                'guild_id': guild_id,
+                                'channel_name': channel_name,
+                                'channel_id': channel_id,
+                                'message_id': msg_id,
+                                'message_timestamp': msg_timestamp,
+                                'message_user_name': msg_user_name,
+                            })
 
-                        logger.info(
-                            f"Successfully indexed channel {guild_name}#{channel_name} with {len(formatted_messages)} messages"
-                        )
+            except Exception as e:
+                logger.error(
+                    f"Error processing guild {guild_name}: {e!s}", exc_info=True
+                )
+                skipped_channels.append(f"{guild_name} (processing error)")
 
-                except Exception as e:
-                    logger.error(
-                        f"Error processing guild {guild_name}: {e!s}", exc_info=True
-                    )
-                    skipped_channels.append(f"{guild_name} (processing error)")
-                    documents_skipped += 1
-                    continue
         finally:
             await discord_client.close_bot()
 
-        # Update last_indexed_at only if we indexed at least one
-        if documents_indexed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(messages_to_process)} documents")
+
+        for item in messages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (embeddings, chunks)
+                chunks = await create_document_chunks(item['combined_document_string'])
+                doc_embedding = config.embedding_model_instance.embed(
+                    item['combined_document_string']
+                )
+
+                # Update document to READY with actual content
+                document.title = f"{item['guild_name']}#{item['channel_name']}"
+                document.content = item['combined_document_string']
+                document.content_hash = item['content_hash']
+                document.embedding = doc_embedding
+                document.document_metadata = {
+                    "guild_name": item['guild_name'],
+                    "guild_id": item['guild_id'],
+                    "channel_name": item['channel_name'],
+                    "channel_id": item['channel_id'],
+                    "message_id": item['message_id'],
+                    "message_timestamp": item['message_timestamp'],
+                    "message_user_name": item['message_user_name'],
+                    "indexed_at": datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
+                if documents_indexed % 10 == 0:
+                    logger.info(
+                        f"Committing batch: {documents_indexed} Discord messages processed so far"
+                    )
+                    await session.commit()
+
+            except Exception as e:
+                logger.error(f"Error processing Discord message: {e!s}", exc_info=True)
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
+                documents_failed += 1
+                continue
+
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit for any remaining documents not yet committed in batches
         logger.info(
             f"Final commit: Total {documents_indexed} Discord messages processed"
         )
-        await session.commit()
-
-        # Prepare result message
-        result_message = None
-        if skipped_channels:
-            result_message = (
-                f"Processed {documents_indexed} messages. Skipped {len(skipped_channels)} channels: "
-                + ", ".join(skipped_channels)
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Discord document changes to database"
             )
-        else:
-            result_message = f"Processed {documents_indexed} messages."
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        if skipped_channels:
+            warning_parts.append(f"{len(skipped_channels)} channels skipped")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
 
         # Log success
         await task_logger.log_task_success(
             log_entry,
             f"Successfully completed Discord indexing for connector {connector_id}",
             {
-                "messages_processed": documents_indexed,
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
                 "skipped_channels_count": len(skipped_channels),
-                "guilds_processed": len(guilds),
-                "result_message": result_message,
+                "guild_id": guild_id,
+                "guild_name": guild_name,
             },
         )
 
         logger.info(
-            f"Discord indexing completed: {documents_indexed} new messages, {documents_skipped} skipped"
+            f"Discord indexing completed for guild {guild_name}: {documents_indexed} ready, {documents_skipped} skipped, "
+            f"{documents_failed} failed ({duplicate_content_count} duplicate content)"
         )
-        return (
-            documents_indexed,
-            None,
-        )  # Return None on success (result_message is for logging only)
+        return documents_indexed, warning_message
 
     except SQLAlchemyError as db_error:
         await session.rollback()
diff --git a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
index d42c5b7f1..27259fd6f 100644
--- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
@@ -1,17 +1,21 @@
 """
 Microsoft Teams connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """
 
 import time
 from collections.abc import Awaitable, Callable
-from datetime import UTC
+from datetime import UTC, datetime
 
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
 from app.connectors.teams_history import TeamsHistory
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
     create_document_chunks,
@@ -27,6 +31,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -50,6 +55,10 @@ async def index_teams_messages(
     """
     Index Microsoft Teams messages from all accessible teams and channels.
 
+    Implements 2-phase document status updates for real-time UI feedback:
+    - Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+    - Phase 2: Process each document: pending → processing → ready/failed
+
     Args:
         session: Database session
         connector_id: ID of the Teams connector
@@ -165,11 +174,16 @@ async def index_teams_messages(
                 f"No Teams found for connector {connector_id}",
                 {"teams_found": 0},
             )
-            return 0, "No Teams found"
+            # CRITICAL: Update timestamp even when no teams found so Electric SQL syncs
+            await update_connector_last_indexed(session, connector, update_last_indexed)
+            await session.commit()
+            return 0, None  # Return None (not error) when no items found
 
         # Track the number of documents indexed
         documents_indexed = 0
         documents_skipped = 0
+        documents_failed = 0
+        duplicate_content_count = 0
         skipped_channels = []
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
@@ -182,8 +196,6 @@ async def index_teams_messages(
         )
 
         # Convert date strings to datetime objects for filtering
-        from datetime import datetime
-
         start_datetime = None
         end_datetime = None
         if start_date_str:
@@ -197,16 +209,14 @@ async def index_teams_messages(
                 hour=23, minute=59, second=59, tzinfo=UTC
             )
 
-        # Process each team
-        for team in teams:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
+        # =======================================================================
+        # PHASE 1: Collect all messages and create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        messages_to_process = []  # List of dicts with document and message data
+        new_documents_created = False
 
+        for team in teams:
             team_id = team.get("id")
             team_name = team.get("displayName", "Unknown Team")
 
@@ -239,7 +249,6 @@ async def index_teams_messages(
                                 channel_name,
                                 team_name,
                             )
-                            documents_skipped += 1
                             continue
 
                         # Process each message
@@ -322,60 +331,27 @@ async def index_teams_messages(
                             if existing_document:
                                 # Document exists - check if content has changed
                                 if existing_document.content_hash == content_hash:
-                                    logger.info(
-                                        "Document for Teams message %s in channel %s unchanged. Skipping.",
-                                        message_id,
-                                        channel_name,
-                                    )
+                                    # Ensure status is ready (might have been stuck in processing/pending)
+                                    if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                                        existing_document.status = DocumentStatus.ready()
                                     documents_skipped += 1
                                     continue
-                                else:
-                                    # Content has changed - update the existing document
-                                    logger.info(
-                                        "Content changed for Teams message %s in channel %s. Updating document.",
-                                        message_id,
-                                        channel_name,
-                                    )
 
-                                    # Update chunks and embedding
-                                    chunks = await create_document_chunks(
-                                        combined_document_string
-                                    )
-                                    doc_embedding = (
-                                        config.embedding_model_instance.embed(
-                                            combined_document_string
-                                        )
-                                    )
-
-                                    # Update existing document
-                                    existing_document.content = combined_document_string
-                                    existing_document.content_hash = content_hash
-                                    existing_document.embedding = doc_embedding
-                                    existing_document.document_metadata = {
-                                        "team_name": team_name,
-                                        "team_id": team_id,
-                                        "channel_name": channel_name,
-                                        "channel_id": channel_id,
-                                        "start_date": start_date_str,
-                                        "end_date": end_date_str,
-                                        "message_count": len(messages),
-                                        "indexed_at": datetime.now().strftime(
-                                            "%Y-%m-%d %H:%M:%S"
-                                        ),
-                                    }
-
-                                    # Delete old chunks and add new ones
-                                    existing_document.chunks = chunks
-                                    existing_document.updated_at = (
-                                        get_current_timestamp()
-                                    )
-
-                                    documents_indexed += 1
-                                    logger.info(
-                                        "Successfully updated Teams message %s",
-                                        message_id,
-                                    )
-                                    continue
+                                # Queue existing document for update (will be set to processing in Phase 2)
+                                messages_to_process.append({
+                                    'document': existing_document,
+                                    'is_new': False,
+                                    'combined_document_string': combined_document_string,
+                                    'content_hash': content_hash,
+                                    'team_name': team_name,
+                                    'team_id': team_id,
+                                    'channel_name': channel_name,
+                                    'channel_id': channel_id,
+                                    'message_id': message_id,
+                                    'start_date': start_date_str,
+                                    'end_date': end_date_str,
+                                })
+                                continue
 
                             # Document doesn't exist by unique_identifier_hash
                             # Check if a document with the same content_hash exists (from another connector)
@@ -395,19 +371,11 @@ async def index_teams_messages(
                                     duplicate_by_content.id,
                                     duplicate_by_content.document_type,
                                 )
+                                duplicate_content_count += 1
                                 documents_skipped += 1
                                 continue
 
-                            # Document doesn't exist - create new one
-                            # Process chunks
-                            chunks = await create_document_chunks(
-                                combined_document_string
-                            )
-                            doc_embedding = config.embedding_model_instance.embed(
-                                combined_document_string
-                            )
-
-                            # Create and store new document
+                            # Create new document with PENDING status (visible in UI immediately)
                             document = Document(
                                 search_space_id=search_space_id,
                                 title=f"{team_name} - {channel_name}",
@@ -417,40 +385,34 @@ async def index_teams_messages(
                                     "team_id": team_id,
                                     "channel_name": channel_name,
                                     "channel_id": channel_id,
-                                    "start_date": start_date_str,
-                                    "end_date": end_date_str,
-                                    "message_count": len(messages),
-                                    "indexed_at": datetime.now().strftime(
-                                        "%Y-%m-%d %H:%M:%S"
-                                    ),
+                                    "connector_id": connector_id,
                                 },
-                                content=combined_document_string,
-                                embedding=doc_embedding,
-                                chunks=chunks,
-                                content_hash=content_hash,
+                                content="Pending...",  # Placeholder until processed
+                                content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                                 unique_identifier_hash=unique_identifier_hash,
+                                embedding=None,
+                                chunks=[],  # Empty at creation - safe for async
+                                status=DocumentStatus.pending(),  # Pending until processing starts
                                 updated_at=get_current_timestamp(),
                                 created_by_id=user_id,
                                 connector_id=connector_id,
                             )
-
                             session.add(document)
-                            documents_indexed += 1
+                            new_documents_created = True
 
-                            # Batch commit every 10 documents
-                            if documents_indexed % 10 == 0:
-                                logger.info(
-                                    "Committing batch: %s Teams messages processed so far",
-                                    documents_indexed,
-                                )
-                                await session.commit()
-
-                        logger.info(
-                            "Successfully indexed channel %s in team %s with %s messages",
-                            channel_name,
-                            team_name,
-                            len(messages),
-                        )
+                            messages_to_process.append({
+                                'document': document,
+                                'is_new': True,
+                                'combined_document_string': combined_document_string,
+                                'content_hash': content_hash,
+                                'team_name': team_name,
+                                'team_id': team_id,
+                                'channel_name': channel_name,
+                                'channel_id': channel_id,
+                                'message_id': message_id,
+                                'start_date': start_date_str,
+                                'end_date': end_date_str,
+                            })
 
                     except Exception as e:
                         logger.error(
@@ -462,54 +424,141 @@ async def index_teams_messages(
                         skipped_channels.append(
                             f"{team_name}/{channel_name} (processing error)"
                         )
-                        documents_skipped += 1
                         continue
 
             except Exception as e:
                 logger.error("Error processing team %s: %s", team_name, str(e))
                 continue
 
-        # Update the last_indexed_at timestamp for the connector only if requested
-        # and if we successfully indexed at least one document
-        total_processed = documents_indexed
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(messages_to_process)} documents")
+
+        for item in messages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (embeddings, chunks)
+                chunks = await create_document_chunks(item['combined_document_string'])
+                doc_embedding = config.embedding_model_instance.embed(
+                    item['combined_document_string']
+                )
+
+                # Update document to READY with actual content
+                document.title = f"{item['team_name']} - {item['channel_name']}"
+                document.content = item['combined_document_string']
+                document.content_hash = item['content_hash']
+                document.embedding = doc_embedding
+                document.document_metadata = {
+                    "team_name": item['team_name'],
+                    "team_id": item['team_id'],
+                    "channel_name": item['channel_name'],
+                    "channel_id": item['channel_id'],
+                    "start_date": item['start_date'],
+                    "end_date": item['end_date'],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
+                if documents_indexed % 10 == 0:
+                    logger.info(
+                        "Committing batch: %s Teams messages processed so far",
+                        documents_indexed,
+                    )
+                    await session.commit()
+
+            except Exception as e:
+                logger.error(f"Error processing Teams message: {e!s}", exc_info=True)
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
+                documents_failed += 1
+                continue
+
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit for any remaining documents not yet committed in batches
         logger.info(
             "Final commit: Total %s Teams messages processed", documents_indexed
         )
-        await session.commit()
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Teams document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise
 
-        # Prepare result message
-        result_message = None
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
         if skipped_channels:
-            result_message = f"Processed {total_processed} messages. Skipped {len(skipped_channels)} channels: {', '.join(skipped_channels)}"
-        else:
-            result_message = f"Processed {total_processed} messages."
+            warning_parts.append(f"{len(skipped_channels)} channels skipped")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
 
         # Log success
         await task_logger.log_task_success(
             log_entry,
             f"Successfully completed Teams indexing for connector {connector_id}",
             {
-                "messages_processed": total_processed,
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
                 "skipped_channels_count": len(skipped_channels),
-                "result_message": result_message,
             },
         )
 
         logger.info(
-            "Teams indexing completed: %s new messages, %s skipped",
+            "Teams indexing completed: %s ready, %s skipped, %s failed "
+            "(%s duplicate content)",
             documents_indexed,
             documents_skipped,
+            documents_failed,
+            duplicate_content_count,
         )
-        return (
-            total_processed,
-            None,
-        )  # Return None on success (result_message is for logging only)
+        return documents_indexed, warning_message
 
     except SQLAlchemyError as db_error:
         await session.rollback()

From 1d870e45a48b12b8577d1bc73f7630fe0ce0d325 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 03:54:24 +0530
Subject: [PATCH 23/36] feat: implement two-phase document indexing for
 Confluence and Jira connectors with real-time status updates

---
 .../connector_indexers/confluence_indexer.py  | 336 ++++++++++--------
 .../tasks/connector_indexers/jira_indexer.py  | 328 +++++++++--------
 2 files changed, 369 insertions(+), 295 deletions(-)

diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
index 74b4cc23d..7fd842996 100644
--- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
@@ -1,5 +1,9 @@
 """
 Confluence connector indexer.
+
+Provides real-time document status updates during indexing using a two-phase approach:
+- Phase 1: Create all documents with PENDING status (visible in UI immediately)
+- Phase 2: Process each document one by one (PENDING → PROCESSING → READY/FAILED)
 """
 
 import contextlib
@@ -12,7 +16,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
 from app.connectors.confluence_history import ConfluenceHistoryConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -29,6 +33,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -180,22 +185,22 @@ async def index_confluence_pages(
                     await confluence_client.close()
             return 0, f"Error fetching Confluence pages: {e!s}"
 
-        # Process and index each page
+        # =======================================================================
+        # PHASE 1: Analyze all pages, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
         documents_indexed = 0
-        skipped_pages = []
         documents_skipped = 0
+        documents_failed = 0
+        duplicate_content_count = 0
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
 
+        pages_to_process = []  # List of dicts with document and page data
+        new_documents_created = False
+
         for page in pages:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
             try:
                 page_id = page.get("id")
                 page_title = page.get("title", "")
@@ -205,7 +210,6 @@ async def index_confluence_pages(
                     logger.warning(
                         f"Skipping page with missing ID or title: {page_id or 'Unknown'}"
                     )
-                    skipped_pages.append(f"{page_title or 'Unknown'} (missing data)")
                     documents_skipped += 1
                     continue
 
@@ -236,7 +240,6 @@ async def index_confluence_pages(
 
                 if not full_content.strip():
                     logger.warning(f"Skipping page with no content: {page_title}")
-                    skipped_pages.append(f"{page_title} (no content)")
                     documents_skipped += 1
                     continue
 
@@ -258,74 +261,25 @@ async def index_confluence_pages(
                 if existing_document:
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
-                        logger.info(
-                            f"Document for Confluence page {page_title} unchanged. Skipping."
-                        )
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            existing_document.status = DocumentStatus.ready()
                         documents_skipped += 1
                         continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Confluence page {page_title}. Updating document."
-                        )
 
-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "page_title": page_title,
-                                "page_id": page_id,
-                                "space_id": space_id,
-                                "comment_count": comment_count,
-                                "document_type": "Confluence Page",
-                                "connector_type": "Confluence",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                full_content, user_llm, document_metadata
-                            )
-                        else:
-                            summary_content = f"Confluence Page: {page_title}\n\nSpace ID: {space_id}\n\n"
-                            if page_content:
-                                content_preview = page_content[:1000]
-                                if len(page_content) > 1000:
-                                    content_preview += "..."
-                                summary_content += (
-                                    f"Content Preview: {content_preview}\n\n"
-                                )
-                            summary_content += f"Comments: {comment_count}"
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(full_content)
-
-                        # Update existing document
-                        existing_document.title = page_title
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
-                            "page_id": page_id,
-                            "page_title": page_title,
-                            "space_id": space_id,
-                            "comment_count": comment_count,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(
-                            f"Successfully updated Confluence page {page_title}"
-                        )
-                        continue
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    pages_to_process.append({
+                        'document': existing_document,
+                        'is_new': False,
+                        'full_content': full_content,
+                        'page_content': page_content,
+                        'content_hash': content_hash,
+                        'page_id': page_id,
+                        'page_title': page_title,
+                        'space_id': space_id,
+                        'comment_count': comment_count,
+                    })
+                    continue
 
                 # Document doesn't exist by unique_identifier_hash
                 # Check if a document with the same content_hash exists (from another connector)
@@ -340,51 +294,11 @@ async def index_confluence_pages(
                         f"(existing document ID: {duplicate_by_content.id}, "
                         f"type: {duplicate_by_content.document_type}). Skipping."
                     )
+                    duplicate_content_count += 1
                     documents_skipped += 1
                     continue
 
-                # Document doesn't exist - create new one
-                # Generate summary with metadata
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "page_title": page_title,
-                        "page_id": page_id,
-                        "space_id": space_id,
-                        "comment_count": comment_count,
-                        "document_type": "Confluence Page",
-                        "connector_type": "Confluence",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        full_content, user_llm, document_metadata
-                    )
-                else:
-                    # Fallback to simple summary if no LLM configured
-                    summary_content = (
-                        f"Confluence Page: {page_title}\n\nSpace ID: {space_id}\n\n"
-                    )
-                    if page_content:
-                        # Take first 500 characters of content for summary
-                        content_preview = page_content[:1000]
-                        if len(page_content) > 1000:
-                            content_preview += "..."
-                        summary_content += f"Content Preview: {content_preview}\n\n"
-                    summary_content += f"Comments: {comment_count}"
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                # Process chunks - using the full page content with comments
-                chunks = await create_document_chunks(full_content)
-
-                # Create and store new document
-                logger.info(f"Creating new document for page {page_title}")
+                # Create new document with PENDING status (visible in UI immediately)
                 document = Document(
                     search_space_id=search_space_id,
                     title=page_title,
@@ -394,23 +308,122 @@ async def index_confluence_pages(
                         "page_title": page_title,
                         "space_id": space_id,
                         "comment_count": comment_count,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        "connector_id": connector_id,
                     },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                     unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                     updated_at=get_current_timestamp(),
                     created_by_id=user_id,
                     connector_id=connector_id,
                 )
-
                 session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new page {page_title}")
+                new_documents_created = True
 
-                # Batch commit every 10 documents
+                pages_to_process.append({
+                    'document': document,
+                    'is_new': True,
+                    'full_content': full_content,
+                    'page_content': page_content,
+                    'content_hash': content_hash,
+                    'page_id': page_id,
+                    'page_title': page_title,
+                    'space_id': space_id,
+                    'comment_count': comment_count,
+                })
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(pages_to_process)} documents")
+
+        for item in pages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata = {
+                        "page_title": item['page_title'],
+                        "page_id": item['page_id'],
+                        "space_id": item['space_id'],
+                        "comment_count": item['comment_count'],
+                        "document_type": "Confluence Page",
+                        "connector_type": "Confluence",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item['full_content'], user_llm, document_metadata
+                    )
+                else:
+                    # Fallback to simple summary if no LLM configured
+                    summary_content = (
+                        f"Confluence Page: {item['page_title']}\n\nSpace ID: {item['space_id']}\n\n"
+                    )
+                    if item['page_content']:
+                        # Take first 1000 characters of content for summary
+                        content_preview = item['page_content'][:1000]
+                        if len(item['page_content']) > 1000:
+                            content_preview += "..."
+                        summary_content += f"Content Preview: {content_preview}\n\n"
+                    summary_content += f"Comments: {item['comment_count']}"
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                # Process chunks - using the full page content with comments
+                chunks = await create_document_chunks(item['full_content'])
+
+                # Update document to READY with actual content
+                document.title = item['page_title']
+                document.content = summary_content
+                document.content_hash = item['content_hash']
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "page_id": item['page_id'],
+                    "page_title": item['page_title'],
+                    "space_id": item['space_id'],
+                    "comment_count": item['comment_count'],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                 if documents_indexed % 10 == 0:
                     logger.info(
                         f"Committing batch: {documents_indexed} Confluence pages processed so far"
@@ -419,53 +432,78 @@ async def index_confluence_pages(
 
             except Exception as e:
                 logger.error(
-                    f"Error processing page {page.get('title', 'Unknown')}: {e!s}",
+                    f"Error processing page {item.get('page_title', 'Unknown')}: {e!s}",
                     exc_info=True,
                 )
-                skipped_pages.append(
-                    f"{page.get('title', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
+                documents_failed += 1
                 continue  # Skip this page and continue with others
 
-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
-        # Final commit for any remaining documents not yet committed in batches
+        # Final commit to ensure all documents are persisted (safety net)
         logger.info(
             f"Final commit: Total {documents_indexed} Confluence pages processed"
         )
-        await session.commit()
-        logger.info(
-            "Successfully committed all Confluence document changes to database"
-        )
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Confluence document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same page was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
 
         # Log success
         await task_logger.log_task_success(
             log_entry,
             f"Successfully completed Confluence indexing for connector {connector_id}",
             {
-                "pages_processed": total_processed,
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
-                "skipped_pages_count": len(skipped_pages),
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
             },
         )
 
         logger.info(
-            f"Confluence indexing completed: {documents_indexed} new pages, {documents_skipped} skipped"
+            f"Confluence indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
         )
 
         # Close the client connection
         if confluence_client:
             await confluence_client.close()
 
-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
+        return documents_indexed, warning_message
 
     except SQLAlchemyError as db_error:
         await session.rollback()
diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
index 508834b4f..038df0f46 100644
--- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
@@ -1,5 +1,9 @@
 """
 Jira connector indexer.
+
+Provides real-time document status updates during indexing using a two-phase approach:
+- Phase 1: Create all documents with PENDING status (visible in UI immediately)
+- Phase 2: Process each document one by one (PENDING → PROCESSING → READY/FAILED)
 """
 
 import contextlib
@@ -12,7 +16,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
 from app.connectors.jira_history import JiraHistoryConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -29,6 +33,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -174,22 +179,22 @@ async def index_jira_issues(
             logger.error(f"Error fetching Jira issues: {e!s}", exc_info=True)
             return 0, f"Error fetching Jira issues: {e!s}"
 
-        # Process and index each issue
+        # =======================================================================
+        # PHASE 1: Analyze all issues, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
         documents_indexed = 0
-        skipped_issues = []
         documents_skipped = 0
+        documents_failed = 0
+        duplicate_content_count = 0
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
 
+        issues_to_process = []  # List of dicts with document and issue data
+        new_documents_created = False
+
         for issue in issues:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
             try:
                 issue_id = issue.get("key")
                 issue_identifier = issue.get("key", "")
@@ -199,9 +204,6 @@ async def index_jira_issues(
                     logger.warning(
                         f"Skipping issue with missing ID or title: {issue_id or 'Unknown'}"
                     )
-                    skipped_issues.append(
-                        f"{issue_identifier or 'Unknown'} (missing data)"
-                    )
                     documents_skipped += 1
                     continue
 
@@ -215,7 +217,6 @@ async def index_jira_issues(
                     logger.warning(
                         f"Skipping issue with no content: {issue_identifier} - {issue_title}"
                     )
-                    skipped_issues.append(f"{issue_identifier} (no content)")
                     documents_skipped += 1
                     continue
 
@@ -237,71 +238,25 @@ async def index_jira_issues(
                 if existing_document:
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
-                        logger.info(
-                            f"Document for Jira issue {issue_identifier} unchanged. Skipping."
-                        )
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            existing_document.status = DocumentStatus.ready()
                         documents_skipped += 1
                         continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Jira issue {issue_identifier}. Updating document."
-                        )
 
-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "issue_key": issue_identifier,
-                                "issue_title": issue_title,
-                                "status": formatted_issue.get("status", "Unknown"),
-                                "priority": formatted_issue.get("priority", "Unknown"),
-                                "comment_count": comment_count,
-                                "document_type": "Jira Issue",
-                                "connector_type": "Jira",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                issue_content, user_llm, document_metadata
-                            )
-                        else:
-                            summary_content = f"Jira Issue {issue_identifier}: {issue_title}\n\nStatus: {formatted_issue.get('status', 'Unknown')}\n\n"
-                            if formatted_issue.get("description"):
-                                summary_content += f"Description: {formatted_issue.get('description')}\n\n"
-                            summary_content += f"Comments: {comment_count}"
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(issue_content)
-
-                        # Update existing document
-                        existing_document.title = f"{issue_identifier}: {issue_title}"
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
-                            "issue_id": issue_id,
-                            "issue_identifier": issue_identifier,
-                            "issue_title": issue_title,
-                            "state": formatted_issue.get("status", "Unknown"),
-                            "comment_count": comment_count,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(
-                            f"Successfully updated Jira issue {issue_identifier}"
-                        )
-                        continue
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    issues_to_process.append({
+                        'document': existing_document,
+                        'is_new': False,
+                        'issue_content': issue_content,
+                        'content_hash': content_hash,
+                        'issue_id': issue_id,
+                        'issue_identifier': issue_identifier,
+                        'issue_title': issue_title,
+                        'formatted_issue': formatted_issue,
+                        'comment_count': comment_count,
+                    })
+                    continue
 
                 # Document doesn't exist by unique_identifier_hash
                 # Check if a document with the same content_hash exists (from another connector)
@@ -316,50 +271,11 @@ async def index_jira_issues(
                         f"(existing document ID: {duplicate_by_content.id}, "
                         f"type: {duplicate_by_content.document_type}). Skipping."
                     )
+                    duplicate_content_count += 1
                     documents_skipped += 1
                     continue
 
-                # Document doesn't exist - create new one
-                # Generate summary with metadata
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "issue_key": issue_identifier,
-                        "issue_title": issue_title,
-                        "status": formatted_issue.get("status", "Unknown"),
-                        "priority": formatted_issue.get("priority", "Unknown"),
-                        "comment_count": comment_count,
-                        "document_type": "Jira Issue",
-                        "connector_type": "Jira",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        issue_content, user_llm, document_metadata
-                    )
-                else:
-                    # Fallback to simple summary if no LLM configured
-                    summary_content = f"Jira Issue {issue_identifier}: {issue_title}\n\nStatus: {formatted_issue.get('status', 'Unknown')}\n\n"
-                    if formatted_issue.get("description"):
-                        summary_content += (
-                            f"Description: {formatted_issue.get('description')}\n\n"
-                        )
-                    summary_content += f"Comments: {comment_count}"
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                # Process chunks - using the full issue content with comments
-                chunks = await create_document_chunks(issue_content)
-
-                # Create and store new document
-                logger.info(
-                    f"Creating new document for issue {issue_identifier} - {issue_title}"
-                )
+                # Create new document with PENDING status (visible in UI immediately)
                 document = Document(
                     search_space_id=search_space_id,
                     title=f"{issue_identifier}: {issue_title}",
@@ -370,25 +286,120 @@ async def index_jira_issues(
                         "issue_title": issue_title,
                         "state": formatted_issue.get("status", "Unknown"),
                         "comment_count": comment_count,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        "connector_id": connector_id,
                     },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                     unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                     updated_at=get_current_timestamp(),
                     created_by_id=user_id,
                     connector_id=connector_id,
                 )
-
                 session.add(document)
-                documents_indexed += 1
-                logger.info(
-                    f"Successfully indexed new issue {issue_identifier} - {issue_title}"
+                new_documents_created = True
+
+                issues_to_process.append({
+                    'document': document,
+                    'is_new': True,
+                    'issue_content': issue_content,
+                    'content_hash': content_hash,
+                    'issue_id': issue_id,
+                    'issue_identifier': issue_identifier,
+                    'issue_title': issue_title,
+                    'formatted_issue': formatted_issue,
+                    'comment_count': comment_count,
+                })
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(issues_to_process)} documents")
+
+        for item in issues_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
                 )
 
-                # Batch commit every 10 documents
+                if user_llm:
+                    document_metadata = {
+                        "issue_key": item['issue_identifier'],
+                        "issue_title": item['issue_title'],
+                        "status": item['formatted_issue'].get("status", "Unknown"),
+                        "priority": item['formatted_issue'].get("priority", "Unknown"),
+                        "comment_count": item['comment_count'],
+                        "document_type": "Jira Issue",
+                        "connector_type": "Jira",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item['issue_content'], user_llm, document_metadata
+                    )
+                else:
+                    # Fallback to simple summary if no LLM configured
+                    summary_content = f"Jira Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['formatted_issue'].get('status', 'Unknown')}\n\n"
+                    if item['formatted_issue'].get("description"):
+                        summary_content += (
+                            f"Description: {item['formatted_issue'].get('description')}\n\n"
+                        )
+                    summary_content += f"Comments: {item['comment_count']}"
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                # Process chunks - using the full issue content with comments
+                chunks = await create_document_chunks(item['issue_content'])
+
+                # Update document to READY with actual content
+                document.title = f"{item['issue_identifier']}: {item['issue_title']}"
+                document.content = summary_content
+                document.content_hash = item['content_hash']
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "issue_id": item['issue_id'],
+                    "issue_identifier": item['issue_identifier'],
+                    "issue_title": item['issue_title'],
+                    "state": item['formatted_issue'].get("status", "Unknown"),
+                    "comment_count": item['comment_count'],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                 if documents_indexed % 10 == 0:
                     logger.info(
                         f"Committing batch: {documents_indexed} Jira issues processed so far"
@@ -397,48 +408,73 @@ async def index_jira_issues(
 
             except Exception as e:
                 logger.error(
-                    f"Error processing issue {issue.get('identifier', 'Unknown')}: {e!s}",
+                    f"Error processing issue {item.get('issue_identifier', 'Unknown')}: {e!s}",
                     exc_info=True,
                 )
-                skipped_issues.append(
-                    f"{issue.get('identifier', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
+                documents_failed += 1
                 continue  # Skip this issue and continue with others
 
-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
-        # Final commit for any remaining documents not yet committed in batches
+        # Final commit to ensure all documents are persisted (safety net)
         logger.info(f"Final commit: Total {documents_indexed} Jira issues processed")
-        await session.commit()
-        logger.info("Successfully committed all JIRA document changes to database")
+        try:
+            await session.commit()
+            logger.info("Successfully committed all JIRA document changes to database")
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same issue was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
 
         # Log success
         await task_logger.log_task_success(
             log_entry,
             f"Successfully completed JIRA indexing for connector {connector_id}",
             {
-                "issues_processed": total_processed,
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
-                "skipped_issues_count": len(skipped_issues),
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
             },
         )
 
         logger.info(
-            f"JIRA indexing completed: {documents_indexed} new issues, {documents_skipped} skipped"
+            f"JIRA indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed "
+            f"({duplicate_content_count} duplicate content)"
         )
 
         # Clean up the connector
         await jira_client.close()
 
-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
+        return documents_indexed, warning_message
 
     except SQLAlchemyError as db_error:
         await session.rollback()

From 108e8c960ff68e7954042334de1c57675f467192 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 03:54:38 +0530
Subject: [PATCH 24/36] fix: adjust opacity of clock icon in status indicator
 for better visibility

---
 .../documents/(manage)/components/DocumentsTableShell.tsx       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index beb808191..fb0d72fae 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -42,7 +42,7 @@ function StatusIndicator({ status }: { status?: DocumentStatus }) {
 				<Tooltip>
 					<TooltipTrigger asChild>
 						<div className="flex items-center justify-center">
-							<Clock className="h-5 w-5 text-muted-foreground" />
+							<Clock className="h-5 w-5 text-muted-foreground/60" />
 						</div>
 					</TooltipTrigger>
 					<TooltipContent side="top">Pending - waiting to be synced</TooltipContent>

From bfa3be655ef9a9f5ab78595f6258b10a6053bef4 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 04:06:14 +0530
Subject: [PATCH 25/36] feat: implement two-phase document indexing for ClickUp
 and GitHub connectors with real-time status updates

---
 .../connector_indexers/clickup_indexer.py     | 310 ++++++++-----
 .../connector_indexers/github_indexer.py      | 436 ++++++++++--------
 2 files changed, 440 insertions(+), 306 deletions(-)

diff --git a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
index 2b8789e0c..934e56744 100644
--- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
@@ -1,5 +1,9 @@
 """
 ClickUp connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """
 
 import contextlib
@@ -12,7 +16,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
 from app.connectors.clickup_history import ClickUpHistoryConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -28,6 +32,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -141,10 +146,18 @@ async def index_clickup_tasks(
 
         documents_indexed = 0
         documents_skipped = 0
+        documents_failed = 0
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
 
+        # =======================================================================
+        # PHASE 1: Collect all tasks and create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        tasks_to_process = []  # List of dicts with document and task data
+        new_documents_created = False
+
         # Iterate workspaces and fetch tasks
         for workspace in workspaces:
             workspace_id = workspace.get("id")
@@ -183,15 +196,6 @@ async def index_clickup_tasks(
             )
 
             for task in tasks:
-                # Check if it's time for a heartbeat update
-                if (
-                    on_heartbeat_callback
-                    and (time.time() - last_heartbeat_time)
-                    >= HEARTBEAT_INTERVAL_SECONDS
-                ):
-                    await on_heartbeat_callback(documents_indexed)
-                    last_heartbeat_time = time.time()
-
                 try:
                     task_id = task.get("id")
                     task_name = task.get("name", "Untitled Task")
@@ -255,74 +259,35 @@ async def index_clickup_tasks(
                     if existing_document:
                         # Document exists - check if content has changed
                         if existing_document.content_hash == content_hash:
+                            # Ensure status is ready (might have been stuck in processing/pending)
+                            if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                                existing_document.status = DocumentStatus.ready()
                             logger.info(
                                 f"Document for ClickUp task {task_name} unchanged. Skipping."
                             )
                             documents_skipped += 1
                             continue
                         else:
-                            # Content has changed - update the existing document
+                            # Queue existing document for update (will be set to processing in Phase 2)
                             logger.info(
-                                f"Content changed for ClickUp task {task_name}. Updating document."
-                            )
-
-                            # Generate summary with metadata
-                            user_llm = await get_user_long_context_llm(
-                                session, user_id, search_space_id
-                            )
-
-                            if user_llm:
-                                document_metadata = {
-                                    "task_id": task_id,
-                                    "task_name": task_name,
-                                    "task_status": task_status,
-                                    "task_priority": task_priority,
-                                    "task_list": task_list_name,
-                                    "task_space": task_space_name,
-                                    "assignees": len(task_assignees),
-                                    "document_type": "ClickUp Task",
-                                    "connector_type": "ClickUp",
-                                }
-                                (
-                                    summary_content,
-                                    summary_embedding,
-                                ) = await generate_document_summary(
-                                    task_content, user_llm, document_metadata
-                                )
-                            else:
-                                summary_content = task_content
-                                summary_embedding = (
-                                    config.embedding_model_instance.embed(task_content)
-                                )
-
-                            # Process chunks
-                            chunks = await create_document_chunks(task_content)
-
-                            # Update existing document
-                            existing_document.title = task_name
-                            existing_document.content = summary_content
-                            existing_document.content_hash = content_hash
-                            existing_document.embedding = summary_embedding
-                            existing_document.document_metadata = {
-                                "task_id": task_id,
-                                "task_name": task_name,
-                                "task_status": task_status,
-                                "task_priority": task_priority,
-                                "task_assignees": task_assignees,
-                                "task_due_date": task_due_date,
-                                "task_created": task_created,
-                                "task_updated": task_updated,
-                                "indexed_at": datetime.now().strftime(
-                                    "%Y-%m-%d %H:%M:%S"
-                                ),
-                            }
-                            existing_document.chunks = chunks
-                            existing_document.updated_at = get_current_timestamp()
-
-                            documents_indexed += 1
-                            logger.info(
-                                f"Successfully updated ClickUp task {task_name}"
+                                f"Content changed for ClickUp task {task_name}. Queuing for update."
                             )
+                            tasks_to_process.append({
+                                'document': existing_document,
+                                'is_new': False,
+                                'task_content': task_content,
+                                'content_hash': content_hash,
+                                'task_id': task_id,
+                                'task_name': task_name,
+                                'task_status': task_status,
+                                'task_priority': task_priority,
+                                'task_list_name': task_list_name,
+                                'task_space_name': task_space_name,
+                                'task_assignees': task_assignees,
+                                'task_due_date': task_due_date,
+                                'task_created': task_created,
+                                'task_updated': task_updated,
+                            })
                             continue
 
                     # Document doesn't exist by unique_identifier_hash
@@ -341,39 +306,7 @@ async def index_clickup_tasks(
                         documents_skipped += 1
                         continue
 
-                    # Document doesn't exist - create new one
-                    # Generate summary with metadata
-                    user_llm = await get_user_long_context_llm(
-                        session, user_id, search_space_id
-                    )
-
-                    if user_llm:
-                        document_metadata = {
-                            "task_id": task_id,
-                            "task_name": task_name,
-                            "task_status": task_status,
-                            "task_priority": task_priority,
-                            "task_list": task_list_name,
-                            "task_space": task_space_name,
-                            "assignees": len(task_assignees),
-                            "document_type": "ClickUp Task",
-                            "connector_type": "ClickUp",
-                        }
-                        (
-                            summary_content,
-                            summary_embedding,
-                        ) = await generate_document_summary(
-                            task_content, user_llm, document_metadata
-                        )
-                    else:
-                        # Fallback to simple summary if no LLM configured
-                        summary_content = task_content
-                        summary_embedding = config.embedding_model_instance.embed(
-                            task_content
-                        )
-
-                    chunks = await create_document_chunks(task_content)
-
+                    # Create new document with PENDING status (visible in UI immediately)
                     document = Document(
                         search_space_id=search_space_id,
                         title=task_name,
@@ -387,44 +320,174 @@ async def index_clickup_tasks(
                             "task_due_date": task_due_date,
                             "task_created": task_created,
                             "task_updated": task_updated,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                            "connector_id": connector_id,
                         },
-                        content=summary_content,
-                        content_hash=content_hash,
+                        content="Pending...",  # Placeholder until processed
+                        content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                         unique_identifier_hash=unique_identifier_hash,
-                        embedding=summary_embedding,
-                        chunks=chunks,
+                        embedding=None,
+                        chunks=[],  # Empty at creation - safe for async
+                        status=DocumentStatus.pending(),  # Pending until processing starts
                         updated_at=get_current_timestamp(),
                         created_by_id=user_id,
                         connector_id=connector_id,
                     )
-
                     session.add(document)
-                    documents_indexed += 1
-                    logger.info(f"Successfully indexed new task {task_name}")
+                    new_documents_created = True
 
-                    # Batch commit every 10 documents
-                    if documents_indexed % 10 == 0:
-                        logger.info(
-                            f"Committing batch: {documents_indexed} ClickUp tasks processed so far"
-                        )
-                        await session.commit()
+                    tasks_to_process.append({
+                        'document': document,
+                        'is_new': True,
+                        'task_content': task_content,
+                        'content_hash': content_hash,
+                        'task_id': task_id,
+                        'task_name': task_name,
+                        'task_status': task_status,
+                        'task_priority': task_priority,
+                        'task_list_name': task_list_name,
+                        'task_space_name': task_space_name,
+                        'task_assignees': task_assignees,
+                        'task_due_date': task_due_date,
+                        'task_created': task_created,
+                        'task_updated': task_updated,
+                    })
 
                 except Exception as e:
                     logger.error(
-                        f"Error processing task {task.get('name', 'Unknown')}: {e!s}",
+                        f"Error in Phase 1 for task {task.get('name', 'Unknown')}: {e!s}",
                         exc_info=True,
                     )
-                    documents_skipped += 1
+                    documents_failed += 1
+                    continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([t for t in tasks_to_process if t['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(tasks_to_process)} documents")
+
+        for item in tasks_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "task_id": item['task_id'],
+                        "task_name": item['task_name'],
+                        "task_status": item['task_status'],
+                        "task_priority": item['task_priority'],
+                        "task_list": item['task_list_name'],
+                        "task_space": item['task_space_name'],
+                        "assignees": len(item['task_assignees']),
+                        "document_type": "ClickUp Task",
+                        "connector_type": "ClickUp",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item['task_content'], user_llm, document_metadata_for_summary
+                    )
+                else:
+                    summary_content = item['task_content']
+                    summary_embedding = config.embedding_model_instance.embed(
+                        item['task_content']
+                    )
+
+                chunks = await create_document_chunks(item['task_content'])
+
+                # Update document to READY with actual content
+                document.title = item['task_name']
+                document.content = summary_content
+                document.content_hash = item['content_hash']
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "task_id": item['task_id'],
+                    "task_name": item['task_name'],
+                    "task_status": item['task_status'],
+                    "task_priority": item['task_priority'],
+                    "task_assignees": item['task_assignees'],
+                    "task_due_date": item['task_due_date'],
+                    "task_created": item['task_created'],
+                    "task_updated": item['task_updated'],
+                    "connector_id": connector_id,
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
+                if documents_indexed % 10 == 0:
+                    logger.info(
+                        f"Committing batch: {documents_indexed} ClickUp tasks processed so far"
+                    )
+                    await session.commit()
+
+            except Exception as e:
+                logger.error(
+                    f"Error processing task {item.get('task_name', 'Unknown')}: {e!s}",
+                    exc_info=True,
+                )
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
+                documents_failed += 1
+                continue
 
         total_processed = documents_indexed
 
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit for any remaining documents not yet committed in batches
         logger.info(f"Final commit: Total {documents_indexed} ClickUp tasks processed")
-        await session.commit()
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all ClickUp document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same task was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
 
         await task_logger.log_task_success(
             log_entry,
@@ -433,11 +496,12 @@ async def index_clickup_tasks(
                 "pages_processed": total_processed,
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
             },
         )
 
         logger.info(
-            f"clickup indexing completed: {documents_indexed} new tasks, {documents_skipped} skipped"
+            f"clickup indexing completed: {documents_indexed} ready, {documents_skipped} skipped, {documents_failed} failed"
         )
 
         # Close client connection
diff --git a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
index 848db7623..b37989a84 100644
--- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
@@ -3,6 +3,10 @@ GitHub connector indexer using gitingest.
 
 This indexer processes entire repository digests in one pass, dramatically
 reducing LLM API calls compared to the previous file-by-file approach.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """
 
 import time
@@ -14,7 +18,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
 from app.connectors.github_connector import GitHubConnector, RepositoryDigest
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -30,6 +34,8 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
+    update_connector_last_indexed,
 )
 
 # Type hint for heartbeat callback
@@ -164,7 +170,7 @@ async def index_github_repos(
             )
             return 0, f"Failed to initialize GitHub client: {e!s}"
 
-        # 4. Process each repository with gitingest
+        # 4. Process each repository with gitingest using 2-phase approach
         await task_logger.log_task_progress(
             log_entry,
             f"Starting gitingest processing for {len(repo_full_names_to_index)} repositories",
@@ -181,24 +187,25 @@ async def index_github_repos(
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
         documents_indexed = 0
+        documents_skipped = 0
+        documents_failed = 0
+
+        # =======================================================================
+        # PHASE 1: Analyze all repos and create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        repos_to_process = []  # List of dicts with document and digest data
+        new_documents_created = False
 
         for repo_full_name in repo_full_names_to_index:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
             if not repo_full_name or not isinstance(repo_full_name, str):
                 logger.warning(f"Skipping invalid repository entry: {repo_full_name}")
                 continue
 
-            logger.info(f"Ingesting repository: {repo_full_name}")
-
             try:
+                logger.info(f"Phase 1: Analyzing repository: {repo_full_name}")
+
                 # Run gitingest via subprocess (isolated from event loop)
-                # Using to_thread to not block the async database operations
                 import asyncio
 
                 digest = await asyncio.to_thread(
@@ -212,30 +219,248 @@ async def index_github_repos(
                     errors.append(f"No digest for {repo_full_name}")
                     continue
 
-                # Process the digest and create documents
-                docs_created = await _process_repository_digest(
-                    session=session,
-                    digest=digest,
-                    search_space_id=search_space_id,
-                    user_id=user_id,
-                    task_logger=task_logger,
-                    log_entry=log_entry,
-                    connector_id=connector_id,
+                # Generate unique identifier based on repo name
+                unique_identifier_hash = generate_unique_identifier_hash(
+                    DocumentType.GITHUB_CONNECTOR, repo_full_name, search_space_id
                 )
 
-                documents_processed += docs_created
-                logger.info(
-                    f"Created {docs_created} documents from repository: {repo_full_name}"
+                # Generate content hash from digest
+                full_content = digest.full_digest
+                content_hash = generate_content_hash(full_content, search_space_id)
+
+                # Check if document with this unique identifier already exists
+                existing_document = await check_document_by_unique_identifier(
+                    session, unique_identifier_hash
                 )
 
+                if existing_document:
+                    # Document exists - check if content has changed
+                    if existing_document.content_hash == content_hash:
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            existing_document.status = DocumentStatus.ready()
+                        logger.info(f"Repository {repo_full_name} unchanged. Skipping.")
+                        documents_skipped += 1
+                        continue
+
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    logger.info(
+                        f"Content changed for repository {repo_full_name}. Queuing for update."
+                    )
+                    repos_to_process.append({
+                        'document': existing_document,
+                        'is_new': False,
+                        'digest': digest,
+                        'content_hash': content_hash,
+                        'repo_full_name': repo_full_name,
+                        'unique_identifier_hash': unique_identifier_hash,
+                    })
+                    continue
+
+                # Document doesn't exist by unique_identifier_hash
+                # Check if a document with the same content_hash exists (from another connector)
+                with session.no_autoflush:
+                    duplicate_by_content = await check_duplicate_document_by_hash(
+                        session, content_hash
+                    )
+
+                if duplicate_by_content:
+                    logger.info(
+                        f"Repository {repo_full_name} already indexed by another connector "
+                        f"(existing document ID: {duplicate_by_content.id}, "
+                        f"type: {duplicate_by_content.document_type}). Skipping."
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Create new document with PENDING status (visible in UI immediately)
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=repo_full_name,
+                    document_type=DocumentType.GITHUB_CONNECTOR,
+                    document_metadata={
+                        "repository_full_name": repo_full_name,
+                        "url": f"https://github.com/{repo_full_name}",
+                        "branch": digest.branch,
+                        "ingestion_method": "gitingest",
+                        "connector_id": connector_id,
+                    },
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+                    unique_identifier_hash=unique_identifier_hash,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
+                    updated_at=get_current_timestamp(),
+                    created_by_id=user_id,
+                    connector_id=connector_id,
+                )
+                session.add(document)
+                new_documents_created = True
+
+                repos_to_process.append({
+                    'document': document,
+                    'is_new': True,
+                    'digest': digest,
+                    'content_hash': content_hash,
+                    'repo_full_name': repo_full_name,
+                    'unique_identifier_hash': unique_identifier_hash,
+                })
+
             except Exception as repo_err:
                 logger.error(
-                    f"Failed to process repository {repo_full_name}: {repo_err}"
+                    f"Error in Phase 1 for repository {repo_full_name}: {repo_err}",
+                    exc_info=True,
                 )
+                errors.append(f"Phase 1 error for {repo_full_name}: {repo_err}")
+                documents_failed += 1
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([r for r in repos_to_process if r['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(repos_to_process)} documents")
+
+        for item in repos_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            digest = item['digest']
+            repo_full_name = item['repo_full_name']
+
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                document_metadata_for_summary = {
+                    "repository": repo_full_name,
+                    "document_type": "GitHub Repository",
+                    "connector_type": "GitHub",
+                    "ingestion_method": "gitingest",
+                    "file_tree": digest.tree[:2000] if len(digest.tree) > 2000 else digest.tree,
+                    "estimated_tokens": digest.estimated_tokens,
+                }
+
+                if user_llm:
+                    # Prepare content for summarization
+                    summary_content = digest.full_digest
+                    if len(summary_content) > MAX_DIGEST_CHARS:
+                        summary_content = (
+                            f"# Repository: {repo_full_name}\n\n"
+                            f"## File Structure\n\n{digest.tree}\n\n"
+                            f"## File Contents (truncated)\n\n{digest.content[: MAX_DIGEST_CHARS - len(digest.tree) - 200]}..."
+                        )
+
+                    summary_text, summary_embedding = await generate_document_summary(
+                        summary_content, user_llm, document_metadata_for_summary
+                    )
+                else:
+                    # Fallback to simple summary if no LLM configured
+                    summary_text = (
+                        f"# GitHub Repository: {repo_full_name}\n\n"
+                        f"## Summary\n{digest.summary}\n\n"
+                        f"## File Structure\n{digest.tree[:3000]}"
+                    )
+                    summary_embedding = config.embedding_model_instance.embed(summary_text)
+
+                # Chunk the full digest content for granular search
+                try:
+                    chunks_data = await create_document_chunks(digest.content)
+                except Exception as chunk_err:
+                    logger.error(f"Failed to chunk repository {repo_full_name}: {chunk_err}")
+                    chunks_data = await _simple_chunk_content(digest.content)
+
+                # Update document to READY with actual content
+                doc_metadata = {
+                    "repository_full_name": repo_full_name,
+                    "url": f"https://github.com/{repo_full_name}",
+                    "branch": digest.branch,
+                    "ingestion_method": "gitingest",
+                    "file_tree": digest.tree,
+                    "gitingest_summary": digest.summary,
+                    "estimated_tokens": digest.estimated_tokens,
+                    "connector_id": connector_id,
+                    "indexed_at": datetime.now(UTC).isoformat(),
+                }
+
+                document.title = repo_full_name
+                document.content = summary_text
+                document.content_hash = item['content_hash']
+                document.embedding = summary_embedding
+                document.document_metadata = doc_metadata
+                safe_set_chunks(document, chunks_data)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_processed += 1
+                documents_indexed += 1
+
+                logger.info(
+                    f"Created document for repository {repo_full_name} "
+                    f"with {len(chunks_data)} chunks"
+                )
+
+                # Batch commit every 5 documents (repositories are large)
+                if documents_indexed % 5 == 0:
+                    logger.info(
+                        f"Committing batch: {documents_indexed} GitHub repos processed so far"
+                    )
+                    await session.commit()
+
+            except Exception as repo_err:
+                logger.error(
+                    f"Error processing repository {repo_full_name}: {repo_err}",
+                    exc_info=True,
+                )
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(repo_err))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
                 errors.append(f"Failed processing {repo_full_name}: {repo_err}")
+                documents_failed += 1
+                continue
+
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit
-        await session.commit()
+        logger.info(f"Final commit: Total {documents_processed} GitHub repositories processed")
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all GitHub document changes to database"
+            )
+        except Exception as e:
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise
+
         logger.info(
             f"Finished GitHub indexing for connector {connector_id}. "
             f"Created {documents_processed} documents."
@@ -247,6 +472,8 @@ async def index_github_repos(
             f"Successfully completed GitHub indexing for connector {connector_id}",
             {
                 "documents_processed": documents_processed,
+                "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                 "errors_count": len(errors),
                 "repo_count": len(repo_full_names_to_index),
                 "method": "gitingest",
@@ -286,163 +513,6 @@ async def index_github_repos(
     return documents_processed, error_message
 
 
-async def _process_repository_digest(
-    session: AsyncSession,
-    digest: RepositoryDigest,
-    search_space_id: int,
-    user_id: str,
-    task_logger: TaskLoggingService,
-    log_entry,
-    connector_id: int,
-) -> int:
-    """
-    Process a repository digest and create documents.
-
-    For each repository, we create:
-    1. One main document with the repository summary
-    2. Chunks from the full digest content for granular search
-
-    Args:
-        session: Database session
-        digest: The repository digest from gitingest
-        search_space_id: ID of the search space
-        user_id: ID of the user
-        task_logger: Task logging service
-        log_entry: Current log entry
-
-    Returns:
-        Number of documents created
-    """
-    repo_full_name = digest.repo_full_name
-    documents_created = 0
-
-    # Generate unique identifier based on repo name and content hash
-    # This allows updates when repo content changes
-    full_content = digest.full_digest
-    content_hash = generate_content_hash(full_content, search_space_id)
-
-    # Use repo name as the unique identifier (one document per repo)
-    unique_identifier_hash = generate_unique_identifier_hash(
-        DocumentType.GITHUB_CONNECTOR, repo_full_name, search_space_id
-    )
-
-    # Check if document with this unique identifier already exists
-    existing_document = await check_document_by_unique_identifier(
-        session, unique_identifier_hash
-    )
-
-    if existing_document:
-        # Document exists - check if content has changed
-        if existing_document.content_hash == content_hash:
-            logger.info(f"Repository {repo_full_name} unchanged. Skipping.")
-            return 0
-        else:
-            logger.info(
-                f"Content changed for repository {repo_full_name}. Updating document."
-            )
-            # Delete existing document to replace with new one
-            await session.delete(existing_document)
-            await session.flush()
-    else:
-        # Document doesn't exist by unique_identifier_hash
-        # Check if a document with the same content_hash exists (from another connector)
-        with session.no_autoflush:
-            duplicate_by_content = await check_duplicate_document_by_hash(
-                session, content_hash
-            )
-
-        if duplicate_by_content:
-            logger.info(
-                f"Repository {repo_full_name} already indexed by another connector "
-                f"(existing document ID: {duplicate_by_content.id}, "
-                f"type: {duplicate_by_content.document_type}). Skipping."
-            )
-            return 0
-
-    # Generate summary using LLM (ONE call per repository!)
-    user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
-
-    document_metadata = {
-        "repository": repo_full_name,
-        "document_type": "GitHub Repository",
-        "connector_type": "GitHub",
-        "ingestion_method": "gitingest",
-        "file_tree": digest.tree[:2000] if len(digest.tree) > 2000 else digest.tree,
-        "estimated_tokens": digest.estimated_tokens,
-    }
-
-    if user_llm:
-        # Prepare content for summarization
-        # Include tree structure and truncated content if too large
-        summary_content = digest.full_digest
-        if len(summary_content) > MAX_DIGEST_CHARS:
-            # Truncate but keep the tree and beginning of content
-            summary_content = (
-                f"# Repository: {repo_full_name}\n\n"
-                f"## File Structure\n\n{digest.tree}\n\n"
-                f"## File Contents (truncated)\n\n{digest.content[: MAX_DIGEST_CHARS - len(digest.tree) - 200]}..."
-            )
-
-        summary_text, summary_embedding = await generate_document_summary(
-            summary_content, user_llm, document_metadata
-        )
-    else:
-        # Fallback to simple summary if no LLM configured
-        summary_text = (
-            f"# GitHub Repository: {repo_full_name}\n\n"
-            f"## Summary\n{digest.summary}\n\n"
-            f"## File Structure\n{digest.tree[:3000]}"
-        )
-        summary_embedding = config.embedding_model_instance.embed(summary_text)
-
-    # Chunk the full digest content for granular search
-    try:
-        # Use the content (not the summary) for chunking
-        # This preserves file-level granularity in search
-        chunks_data = await create_document_chunks(digest.content)
-    except Exception as chunk_err:
-        logger.error(f"Failed to chunk repository {repo_full_name}: {chunk_err}")
-        # Fall back to a simpler chunking approach
-        chunks_data = await _simple_chunk_content(digest.content)
-
-    # Create the document
-    doc_metadata = {
-        "repository_full_name": repo_full_name,
-        "url": f"https://github.com/{repo_full_name}",
-        "branch": digest.branch,
-        "ingestion_method": "gitingest",
-        "file_tree": digest.tree,
-        "gitingest_summary": digest.summary,
-        "estimated_tokens": digest.estimated_tokens,
-        "indexed_at": datetime.now(UTC).isoformat(),
-    }
-
-    document = Document(
-        title=repo_full_name,
-        document_type=DocumentType.GITHUB_CONNECTOR,
-        document_metadata=doc_metadata,
-        content=summary_text,
-        content_hash=content_hash,
-        unique_identifier_hash=unique_identifier_hash,
-        embedding=summary_embedding,
-        search_space_id=search_space_id,
-        chunks=chunks_data,
-        updated_at=get_current_timestamp(),
-        created_by_id=user_id,
-        connector_id=connector_id,
-    )
-
-    session.add(document)
-    documents_created += 1
-
-    logger.info(
-        f"Created document for repository {repo_full_name} "
-        f"with {len(chunks_data)} chunks"
-    )
-
-    return documents_created
-
-
 async def _simple_chunk_content(content: str, chunk_size: int = 4000) -> list:
     """
     Simple fallback chunking when the regular chunker fails.

From 0f61a249c0f322d1ebf4e4bcf1c84272bbf8a403 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 04:31:55 +0530
Subject: [PATCH 26/36] feat: implement two-phase document indexing for
 BookStack, Elasticsearch, and Luma connectors with real-time status updates

---
 .../connector_indexers/bookstack_indexer.py   | 307 +++++++++------
 .../elasticsearch_indexer.py                  | 267 +++++++++----
 .../tasks/connector_indexers/luma_indexer.py  | 370 ++++++++++--------
 3 files changed, 580 insertions(+), 364 deletions(-)

diff --git a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
index f1338564e..fbf90b345 100644
--- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
@@ -1,5 +1,9 @@
 """
 BookStack connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Collect all pages and create pending documents (visible in UI immediately)
+- Phase 2: Process each page: pending → processing → ready/failed
 """
 
 import time
@@ -11,7 +15,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
 from app.connectors.bookstack_connector import BookStackConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -28,6 +32,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -184,22 +189,22 @@ async def index_bookstack_pages(
             logger.error(f"Error fetching BookStack pages: {e!s}", exc_info=True)
             return 0, f"Error fetching BookStack pages: {e!s}"
 
-        # Process and index each page
+        # =======================================================================
+        # PHASE 1: Analyze all pages, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
         documents_indexed = 0
         skipped_pages = []
         documents_skipped = 0
+        documents_failed = 0
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
 
+        pages_to_process = []  # List of dicts with document and page data
+        new_documents_created = False
+
         for page in pages:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
             try:
                 page_id = page.get("id")
                 page_name = page.get("name", "")
@@ -218,7 +223,7 @@ async def index_bookstack_pages(
 
                 # Fetch full page content (Markdown preferred)
                 try:
-                    page_detail, page_content = bookstack_client.get_page_with_content(
+                    _, page_content = bookstack_client.get_page_with_content(
                         page_id, use_markdown=True
                     )
                 except Exception as e:
@@ -252,82 +257,34 @@ async def index_bookstack_pages(
                 # Build page URL
                 page_url = f"{bookstack_base_url}/books/{book_slug}/page/{page_slug}"
 
-                # Build document metadata
-                doc_metadata = {
-                    "page_id": page_id,
-                    "page_name": page_name,
-                    "page_slug": page_slug,
-                    "book_id": book_id,
-                    "book_slug": book_slug,
-                    "chapter_id": chapter_id,
-                    "base_url": bookstack_base_url,
-                    "page_url": page_url,
-                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                }
-
                 if existing_document:
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            existing_document.status = DocumentStatus.ready()
                         logger.info(
                             f"Document for BookStack page {page_name} unchanged. Skipping."
                         )
                         documents_skipped += 1
                         continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for BookStack page {page_name}. Updating document."
-                        )
 
-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            summary_metadata = {
-                                "page_name": page_name,
-                                "page_id": page_id,
-                                "book_id": book_id,
-                                "document_type": "BookStack Page",
-                                "connector_type": "BookStack",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                full_content, user_llm, summary_metadata
-                            )
-                        else:
-                            summary_content = (
-                                f"BookStack Page: {page_name}\n\nBook ID: {book_id}\n\n"
-                            )
-                            if page_content:
-                                content_preview = page_content[:1000]
-                                if len(page_content) > 1000:
-                                    content_preview += "..."
-                                summary_content += (
-                                    f"Content Preview: {content_preview}\n\n"
-                                )
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(full_content)
-
-                        # Update existing document
-                        existing_document.title = page_name
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = doc_metadata
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(f"Successfully updated BookStack page {page_name}")
-                        continue
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    pages_to_process.append({
+                        'document': existing_document,
+                        'is_new': False,
+                        'page_id': page_id,
+                        'page_name': page_name,
+                        'page_slug': page_slug,
+                        'book_id': book_id,
+                        'book_slug': book_slug,
+                        'chapter_id': chapter_id,
+                        'page_url': page_url,
+                        'page_content': page_content,
+                        'full_content': full_content,
+                        'content_hash': content_hash,
+                    })
+                    continue
 
                 # Document doesn't exist by unique_identifier_hash
                 # Check if a document with the same content_hash exists (from another connector)
@@ -345,17 +302,104 @@ async def index_bookstack_pages(
                     documents_skipped += 1
                     continue
 
-                # Document doesn't exist - create new one
-                # Generate summary with metadata
+                # Create new document with PENDING status (visible in UI immediately)
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=page_name,
+                    document_type=DocumentType.BOOKSTACK_CONNECTOR,
+                    document_metadata={
+                        "page_id": page_id,
+                        "page_name": page_name,
+                        "page_slug": page_slug,
+                        "book_id": book_id,
+                        "book_slug": book_slug,
+                        "chapter_id": chapter_id,
+                        "base_url": bookstack_base_url,
+                        "page_url": page_url,
+                        "connector_id": connector_id,
+                    },
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+                    unique_identifier_hash=unique_identifier_hash,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
+                    updated_at=get_current_timestamp(),
+                    created_by_id=user_id,
+                    connector_id=connector_id,
+                )
+                session.add(document)
+                new_documents_created = True
+
+                pages_to_process.append({
+                    'document': document,
+                    'is_new': True,
+                    'page_id': page_id,
+                    'page_name': page_name,
+                    'page_slug': page_slug,
+                    'book_id': book_id,
+                    'book_slug': book_slug,
+                    'chapter_id': chapter_id,
+                    'page_url': page_url,
+                    'page_content': page_content,
+                    'full_content': full_content,
+                    'content_hash': content_hash,
+                })
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(pages_to_process)} documents")
+
+        for item in pages_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
                 user_llm = await get_user_long_context_llm(
                     session, user_id, search_space_id
                 )
 
+                # Build document metadata
+                doc_metadata = {
+                    "page_id": item['page_id'],
+                    "page_name": item['page_name'],
+                    "page_slug": item['page_slug'],
+                    "book_id": item['book_id'],
+                    "book_slug": item['book_slug'],
+                    "chapter_id": item['chapter_id'],
+                    "base_url": bookstack_base_url,
+                    "page_url": item['page_url'],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+
                 if user_llm:
                     summary_metadata = {
-                        "page_name": page_name,
-                        "page_id": page_id,
-                        "book_id": book_id,
+                        "page_name": item['page_name'],
+                        "page_id": item['page_id'],
+                        "book_id": item['book_id'],
                         "document_type": "BookStack Page",
                         "connector_type": "BookStack",
                     }
@@ -363,17 +407,17 @@ async def index_bookstack_pages(
                         summary_content,
                         summary_embedding,
                     ) = await generate_document_summary(
-                        full_content, user_llm, summary_metadata
+                        item['full_content'], user_llm, summary_metadata
                     )
                 else:
                     # Fallback to simple summary if no LLM configured
                     summary_content = (
-                        f"BookStack Page: {page_name}\n\nBook ID: {book_id}\n\n"
+                        f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n"
                     )
-                    if page_content:
+                    if item['page_content']:
                         # Take first 1000 characters of content for summary
-                        content_preview = page_content[:1000]
-                        if len(page_content) > 1000:
+                        content_preview = item['page_content'][:1000]
+                        if len(item['page_content']) > 1000:
                             content_preview += "..."
                         summary_content += f"Content Preview: {content_preview}\n\n"
                     summary_embedding = config.embedding_model_instance.embed(
@@ -381,30 +425,21 @@ async def index_bookstack_pages(
                     )
 
                 # Process chunks - using the full page content
-                chunks = await create_document_chunks(full_content)
+                chunks = await create_document_chunks(item['full_content'])
 
-                # Create and store new document
-                logger.info(f"Creating new document for page {page_name}")
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=page_name,
-                    document_type=DocumentType.BOOKSTACK_CONNECTOR,
-                    document_metadata=doc_metadata,
-                    content=summary_content,
-                    content_hash=content_hash,
-                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                    updated_at=get_current_timestamp(),
-                    created_by_id=user_id,
-                    connector_id=connector_id,
-                )
+                # Update document to READY with actual content
+                document.title = item['page_name']
+                document.content = summary_content
+                document.content_hash = item['content_hash']
+                document.embedding = summary_embedding
+                document.document_metadata = doc_metadata
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
 
-                session.add(document)
                 documents_indexed += 1
-                logger.info(f"Successfully indexed new page {page_name}")
 
-                # Batch commit every 10 documents
+                # Batch commit every 10 documents (for ready status updates)
                 if documents_indexed % 10 == 0:
                     logger.info(
                         f"Committing batch: {documents_indexed} BookStack pages processed so far"
@@ -413,46 +448,72 @@ async def index_bookstack_pages(
 
             except Exception as e:
                 logger.error(
-                    f"Error processing page {page.get('name', 'Unknown')}: {e!s}",
+                    f"Error processing page {item.get('page_name', 'Unknown')}: {e!s}",
                     exc_info=True,
                 )
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
                 skipped_pages.append(
-                    f"{page.get('name', 'Unknown')} (processing error)"
+                    f"{item.get('page_name', 'Unknown')} (processing error)"
                 )
-                documents_skipped += 1
-                continue  # Skip this page and continue with others
+                documents_failed += 1
+                continue
 
-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit for any remaining documents not yet committed in batches
         logger.info(
             f"Final commit: Total {documents_indexed} BookStack pages processed"
         )
-        await session.commit()
-        logger.info("Successfully committed all BookStack document changes to database")
+        try:
+            await session.commit()
+            logger.info("Successfully committed all BookStack document changes to database")
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same page was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
 
         # Log success
         await task_logger.log_task_success(
             log_entry,
             f"Successfully completed BookStack indexing for connector {connector_id}",
             {
-                "pages_processed": total_processed,
+                "pages_processed": documents_indexed,
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                 "skipped_pages_count": len(skipped_pages),
             },
         )
 
         logger.info(
-            f"BookStack indexing completed: {documents_indexed} new pages, {documents_skipped} skipped"
+            f"BookStack indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed"
         )
-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
+        return documents_indexed, warning_message
 
     except SQLAlchemyError as db_error:
         await session.rollback()
diff --git a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py
index fb6487474..97cd31a09 100644
--- a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py
@@ -1,5 +1,9 @@
 """
 Elasticsearch indexer for SurfSense
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Collect all documents and create pending documents (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """
 
 import json
@@ -13,7 +17,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select
 
 from app.connectors.elasticsearch_connector import ElasticsearchConnector
-from app.db import Document, DocumentType, SearchSourceConnector
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnector
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
     create_document_chunks,
@@ -25,6 +29,7 @@ from .base import (
     check_document_by_unique_identifier,
     check_duplicate_document_by_hash,
     get_current_timestamp,
+    safe_set_chunks,
 )
 
 # Type hint for heartbeat callback
@@ -164,6 +169,8 @@ async def index_elasticsearch_documents(
         )
 
         documents_processed = 0
+        documents_skipped = 0
+        documents_failed = 0
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
@@ -178,23 +185,22 @@ async def index_elasticsearch_documents(
                     "max_documents": max_documents,
                 },
             )
-            # Use scroll search for large result sets
+
+            # =======================================================================
+            # PHASE 1: Collect all documents from Elasticsearch and create pending documents
+            # This makes ALL documents visible in the UI immediately with pending status
+            # =======================================================================
+            docs_to_process = []  # List of dicts with document and ES data
+            new_documents_created = False
+            hits_collected = 0
+
             async for hit in es_connector.scroll_search(
                 index=index_name,
                 query=query,
                 size=min(max_documents, 100),  # Scroll in batches
                 fields=config.get("ELASTICSEARCH_FIELDS"),
             ):
-                # Check if it's time for a heartbeat update
-                if (
-                    on_heartbeat_callback
-                    and (time.time() - last_heartbeat_time)
-                    >= HEARTBEAT_INTERVAL_SECONDS
-                ):
-                    await on_heartbeat_callback(documents_processed)
-                    last_heartbeat_time = time.time()
-
-                if documents_processed >= max_documents:
+                if hits_collected >= max_documents:
                     break
 
                 try:
@@ -220,26 +226,12 @@ async def index_elasticsearch_documents(
 
                     if not content.strip():
                         logger.warning(f"Skipping document {doc_id} - no content found")
+                        documents_skipped += 1
                         continue
 
                     # Create content hash
                     content_hash = generate_content_hash(content, search_space_id)
 
-                    # Build metadata
-                    metadata = {
-                        "elasticsearch_id": doc_id,
-                        "elasticsearch_index": hit.get("_index", index_name),
-                        "elasticsearch_score": hit.get("_score"),
-                        "indexed_at": datetime.now().isoformat(),
-                        "source": "ELASTICSEARCH_CONNECTOR",
-                    }
-
-                    # Add any additional metadata fields specified in config
-                    if "ELASTICSEARCH_METADATA_FIELDS" in config:
-                        for field in config["ELASTICSEARCH_METADATA_FIELDS"]:
-                            if field in source:
-                                metadata[f"es_{field}"] = source[field]
-
                     # Build source-unique identifier and hash (prefer source id dedupe)
                     source_identifier = f"{hit.get('_index', index_name)}:{doc_id}"
                     unique_identifier_hash = generate_unique_identifier_hash(
@@ -258,98 +250,209 @@ async def index_elasticsearch_documents(
                         )
 
                     if existing_doc:
-                        # If content is unchanged, skip. Otherwise update the existing document.
+                        # If content is unchanged, skip. Otherwise queue for update.
                         if existing_doc.content_hash == content_hash:
+                            # Ensure status is ready (might have been stuck in processing/pending)
+                            if not DocumentStatus.is_state(existing_doc.status, DocumentStatus.READY):
+                                existing_doc.status = DocumentStatus.ready()
                             logger.info(
                                 f"Skipping ES doc {doc_id} — already indexed (doc id {existing_doc.id})"
                             )
-                            continue
-                        else:
-                            logger.info(
-                                f"Updating existing document {existing_doc.id} for ES doc {doc_id}"
-                            )
-                            existing_doc.title = title
-                            existing_doc.content = content
-                            existing_doc.content_hash = content_hash
-                            existing_doc.document_metadata = metadata
-                            existing_doc.unique_identifier_hash = unique_identifier_hash
-                            chunks = await create_document_chunks(content)
-                            existing_doc.chunks = chunks
-                            existing_doc.updated_at = get_current_timestamp()
-                            await session.flush()
-                            documents_processed += 1
-                            if documents_processed % 10 == 0:
-                                await session.commit()
+                            documents_skipped += 1
                             continue
 
-                    # Create document
+                        # Queue existing document for update (will be set to processing in Phase 2)
+                        docs_to_process.append({
+                            'document': existing_doc,
+                            'is_new': False,
+                            'doc_id': doc_id,
+                            'title': title,
+                            'content': content,
+                            'content_hash': content_hash,
+                            'unique_identifier_hash': unique_identifier_hash,
+                            'hit': hit,
+                            'source': source,
+                        })
+                        hits_collected += 1
+                        continue
+
+                    # Build metadata for new document
+                    metadata = {
+                        "elasticsearch_id": doc_id,
+                        "elasticsearch_index": hit.get("_index", index_name),
+                        "elasticsearch_score": hit.get("_score"),
+                        "source": "ELASTICSEARCH_CONNECTOR",
+                        "connector_id": connector_id,
+                    }
+
+                    # Add any additional metadata fields specified in config
+                    if "ELASTICSEARCH_METADATA_FIELDS" in config:
+                        for field in config["ELASTICSEARCH_METADATA_FIELDS"]:
+                            if field in source:
+                                metadata[f"es_{field}"] = source[field]
+
+                    # Create new document with PENDING status (visible in UI immediately)
                     document = Document(
                         title=title,
-                        content=content,
-                        content_hash=content_hash,
+                        content="Pending...",  # Placeholder until processed
+                        content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                         unique_identifier_hash=unique_identifier_hash,
                         document_type=DocumentType.ELASTICSEARCH_CONNECTOR,
                         document_metadata=metadata,
                         search_space_id=search_space_id,
+                        embedding=None,
+                        chunks=[],  # Empty at creation - safe for async
+                        status=DocumentStatus.pending(),  # Pending until processing starts
                         updated_at=get_current_timestamp(),
                         created_by_id=user_id,
                         connector_id=connector_id,
                     )
-
-                    # Create chunks and attach to document (persist via relationship)
-                    chunks = await create_document_chunks(content)
-                    document.chunks = chunks
                     session.add(document)
-                    await session.flush()
+                    new_documents_created = True
+
+                    docs_to_process.append({
+                        'document': document,
+                        'is_new': True,
+                        'doc_id': doc_id,
+                        'title': title,
+                        'content': content,
+                        'content_hash': content_hash,
+                        'unique_identifier_hash': unique_identifier_hash,
+                        'hit': hit,
+                        'source': source,
+                    })
+                    hits_collected += 1
+
+                except Exception as e:
+                    logger.error(f"Error in Phase 1 for ES doc: {e!s}", exc_info=True)
+                    documents_failed += 1
+                    continue
+
+            # Commit all pending documents - they all appear in UI now
+            if new_documents_created:
+                logger.info(f"Phase 1: Committing {len([d for d in docs_to_process if d['is_new']])} pending documents")
+                await session.commit()
+
+            # =======================================================================
+            # PHASE 2: Process each document one by one
+            # Each document transitions: pending → processing → ready/failed
+            # =======================================================================
+            logger.info(f"Phase 2: Processing {len(docs_to_process)} documents")
+
+            for item in docs_to_process:
+                # Send heartbeat periodically
+                if on_heartbeat_callback:
+                    current_time = time.time()
+                    if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                        await on_heartbeat_callback(documents_processed)
+                        last_heartbeat_time = current_time
+
+                document = item['document']
+                try:
+                    # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                    document.status = DocumentStatus.processing()
+                    await session.commit()
+
+                    # Build metadata
+                    metadata = {
+                        "elasticsearch_id": item['doc_id'],
+                        "elasticsearch_index": item['hit'].get("_index", index_name),
+                        "elasticsearch_score": item['hit'].get("_score"),
+                        "indexed_at": datetime.now().isoformat(),
+                        "source": "ELASTICSEARCH_CONNECTOR",
+                        "connector_id": connector_id,
+                    }
+
+                    # Add any additional metadata fields specified in config
+                    if "ELASTICSEARCH_METADATA_FIELDS" in config:
+                        for field in config["ELASTICSEARCH_METADATA_FIELDS"]:
+                            if field in item['source']:
+                                metadata[f"es_{field}"] = item['source'][field]
+
+                    # Create chunks
+                    chunks = await create_document_chunks(item['content'])
+
+                    # Update document to READY with actual content
+                    document.title = item['title']
+                    document.content = item['content']
+                    document.content_hash = item['content_hash']
+                    document.unique_identifier_hash = item['unique_identifier_hash']
+                    document.document_metadata = metadata
+                    safe_set_chunks(document, chunks)
+                    document.updated_at = get_current_timestamp()
+                    document.status = DocumentStatus.ready()
 
                     documents_processed += 1
 
+                    # Batch commit every 10 documents (for ready status updates)
                     if documents_processed % 10 == 0:
                         logger.info(
-                            f"Processed {documents_processed} Elasticsearch documents"
+                            f"Committing batch: {documents_processed} Elasticsearch documents processed so far"
                         )
                         await session.commit()
 
                 except Exception as e:
-                    msg = f"Error processing Elasticsearch document {hit.get('_id', 'unknown')}: {e}"
+                    msg = f"Error processing Elasticsearch document {item.get('doc_id', 'unknown')}: {e}"
                     logger.error(msg)
-                    await task_logger.log_task_failure(
-                        log_entry,
-                        "Document processing error",
-                        msg,
-                        {
-                            "document_id": hit.get("_id", "unknown"),
-                            "error_type": type(e).__name__,
-                        },
-                    )
+                    # Mark document as failed with reason (visible in UI)
+                    try:
+                        document.status = DocumentStatus.failed(str(e))
+                        document.updated_at = get_current_timestamp()
+                    except Exception as status_error:
+                        logger.error(f"Failed to update document status to failed: {status_error}")
+                    documents_failed += 1
                     continue
 
-            # Final commit
-            await session.commit()
+            # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+            # This ensures the UI shows "Last indexed" instead of "Never indexed"
+            if update_last_indexed:
+                connector.last_indexed_at = (
+                    datetime.now(UTC).isoformat().replace("+00:00", "Z")
+                )
+
+            # Final commit for any remaining documents not yet committed in batches
+            logger.info(f"Final commit: Total {documents_processed} Elasticsearch documents processed")
+            try:
+                await session.commit()
+                logger.info("Successfully committed all Elasticsearch document changes to database")
+            except Exception as e:
+                # Handle any remaining integrity errors gracefully (race conditions, etc.)
+                if (
+                    "duplicate key value violates unique constraint" in str(e).lower()
+                    or "uniqueviolationerror" in str(e).lower()
+                ):
+                    logger.warning(
+                        f"Duplicate content_hash detected during final commit. "
+                        f"This may occur if the same document was indexed by multiple connectors. "
+                        f"Rolling back and continuing. Error: {e!s}"
+                    )
+                    await session.rollback()
+                    # Don't fail the entire task - some documents may have been successfully indexed
+                else:
+                    raise
+
+            # Build warning message if there were issues
+            warning_parts = []
+            if documents_failed > 0:
+                warning_parts.append(f"{documents_failed} failed")
+            warning_message = ", ".join(warning_parts) if warning_parts else None
 
             await task_logger.log_task_success(
                 log_entry,
                 f"Successfully indexed {documents_processed} documents from Elasticsearch",
-                {"documents_indexed": documents_processed, "index": index_name},
+                {
+                    "documents_indexed": documents_processed,
+                    "documents_skipped": documents_skipped,
+                    "documents_failed": documents_failed,
+                    "index": index_name,
+                },
             )
             logger.info(
-                f"Successfully indexed {documents_processed} documents from Elasticsearch"
+                f"Elasticsearch indexing completed: {documents_processed} ready, "
+                f"{documents_skipped} skipped, {documents_failed} failed"
             )
 
-            # Update last indexed timestamp if requested
-            if update_last_indexed and documents_processed > 0:
-                # connector.last_indexed_at = datetime.now()
-                connector.last_indexed_at = (
-                    datetime.now(UTC).isoformat().replace("+00:00", "Z")
-                )
-                await session.commit()
-                await task_logger.log_task_progress(
-                    log_entry,
-                    "Updated connector.last_indexed_at",
-                    {"last_indexed_at": connector.last_indexed_at},
-                )
-
-            return documents_processed, None
+            return documents_processed, warning_message
 
         finally:
             # Clean up Elasticsearch connection
diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
index f4527843c..80d4ef3cf 100644
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@@ -1,5 +1,9 @@
 """
 Luma connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Collect all events and create pending documents (visible in UI immediately)
+- Phase 2: Process each event: pending → processing → ready/failed
 """
 
 import time
@@ -11,7 +15,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
 from app.connectors.luma_connector import LumaConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -27,6 +31,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -227,21 +232,22 @@ async def index_luma_events(
             logger.error(f"Error fetching Luma events: {e!s}", exc_info=True)
             return 0, f"Error fetching Luma events: {e!s}"
 
+        # =======================================================================
+        # PHASE 1: Analyze all events, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
         documents_indexed = 0
         documents_skipped = 0
+        documents_failed = 0
         skipped_events = []
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
 
+        events_to_process = []  # List of dicts with document and event data
+        new_documents_created = False
+
         for event in events:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
             try:
                 # Luma event structure fields - events have nested 'event' field
                 event_data = event.get("event", {})
@@ -298,91 +304,34 @@ async def index_luma_events(
                 if existing_document:
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            existing_document.status = DocumentStatus.ready()
                         logger.info(
                             f"Document for Luma event {event_name} unchanged. Skipping."
                         )
                         documents_skipped += 1
                         continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for Luma event {event_name}. Updating document."
-                        )
 
-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "event_id": event_id,
-                                "event_name": event_name,
-                                "event_url": event_url,
-                                "start_at": start_at,
-                                "end_at": end_at,
-                                "timezone": timezone,
-                                "location": location or "No location",
-                                "city": city,
-                                "hosts": host_names,
-                                "document_type": "Luma Event",
-                                "connector_type": "Luma",
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                event_markdown, user_llm, document_metadata
-                            )
-                        else:
-                            summary_content = f"Luma Event: {event_name}\n\n"
-                            if event_url:
-                                summary_content += f"URL: {event_url}\n"
-                            summary_content += f"Start: {start_at}\n"
-                            summary_content += f"End: {end_at}\n"
-                            if timezone:
-                                summary_content += f"Timezone: {timezone}\n"
-                            if location:
-                                summary_content += f"Location: {location}\n"
-                            if city:
-                                summary_content += f"City: {city}\n"
-                            if host_names:
-                                summary_content += f"Hosts: {host_names}\n"
-                            if description:
-                                desc_preview = description[:1000]
-                                if len(description) > 1000:
-                                    desc_preview += "..."
-                                summary_content += f"Description: {desc_preview}\n"
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(event_markdown)
-
-                        # Update existing document
-                        existing_document.title = event_name
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
-                            "event_id": event_id,
-                            "event_name": event_name,
-                            "event_url": event_url,
-                            "start_at": start_at,
-                            "end_at": end_at,
-                            "timezone": timezone,
-                            "location": location,
-                            "city": city,
-                            "hosts": host_names,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_indexed += 1
-                        logger.info(f"Successfully updated Luma event {event_name}")
-                        continue
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    events_to_process.append({
+                        'document': existing_document,
+                        'is_new': False,
+                        'event_id': event_id,
+                        'event_name': event_name,
+                        'event_url': event_url,
+                        'event_markdown': event_markdown,
+                        'content_hash': content_hash,
+                        'start_at': start_at,
+                        'end_at': end_at,
+                        'timezone': timezone,
+                        'location': location,
+                        'city': city,
+                        'host_names': host_names,
+                        'description': description,
+                        'cover_url': cover_url,
+                    })
+                    continue
 
                 # Document doesn't exist by unique_identifier_hash
                 # Check if a document with the same content_hash exists (from another connector)
@@ -400,59 +349,7 @@ async def index_luma_events(
                     documents_skipped += 1
                     continue
 
-                # Document doesn't exist - create new one
-                # Generate summary with metadata
-                user_llm = await get_user_long_context_llm(
-                    session, user_id, search_space_id
-                )
-
-                if user_llm:
-                    document_metadata = {
-                        "event_id": event_id,
-                        "event_name": event_name,
-                        "event_url": event_url,
-                        "start_at": start_at,
-                        "end_at": end_at,
-                        "timezone": timezone,
-                        "location": location or "No location",
-                        "city": city,
-                        "hosts": host_names,
-                        "document_type": "Luma Event",
-                        "connector_type": "Luma",
-                    }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        event_markdown, user_llm, document_metadata
-                    )
-                else:
-                    # Fallback to simple summary if no LLM configured
-                    summary_content = f"Luma Event: {event_name}\n\n"
-                    if event_url:
-                        summary_content += f"URL: {event_url}\n"
-                    summary_content += f"Start: {start_at}\n"
-                    summary_content += f"End: {end_at}\n"
-                    if timezone:
-                        summary_content += f"Timezone: {timezone}\n"
-                    if location:
-                        summary_content += f"Location: {location}\n"
-                    if city:
-                        summary_content += f"City: {city}\n"
-                    if host_names:
-                        summary_content += f"Hosts: {host_names}\n"
-                    if description:
-                        desc_preview = description[:1000]
-                        if len(description) > 1000:
-                            desc_preview += "..."
-                        summary_content += f"Description: {desc_preview}\n"
-
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                chunks = await create_document_chunks(event_markdown)
-
+                # Create new document with PENDING status (visible in UI immediately)
                 document = Document(
                     search_space_id=search_space_id,
                     title=event_name,
@@ -468,23 +365,147 @@ async def index_luma_events(
                         "city": city,
                         "hosts": host_names,
                         "cover_url": cover_url,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        "connector_id": connector_id,
                     },
-                    content=summary_content,
-                    content_hash=content_hash,
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
                     unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
                     updated_at=get_current_timestamp(),
                     created_by_id=user_id,
                     connector_id=connector_id,
                 )
-
                 session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new event {event_name}")
+                new_documents_created = True
 
-                # Batch commit every 10 documents
+                events_to_process.append({
+                    'document': document,
+                    'is_new': True,
+                    'event_id': event_id,
+                    'event_name': event_name,
+                    'event_url': event_url,
+                    'event_markdown': event_markdown,
+                    'content_hash': content_hash,
+                    'start_at': start_at,
+                    'end_at': end_at,
+                    'timezone': timezone,
+                    'location': location,
+                    'city': city,
+                    'host_names': host_names,
+                    'description': description,
+                    'cover_url': cover_url,
+                })
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(events_to_process)} documents")
+
+        for item in events_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Heavy processing (LLM, embeddings, chunks)
+                user_llm = await get_user_long_context_llm(
+                    session, user_id, search_space_id
+                )
+
+                if user_llm:
+                    document_metadata_for_summary = {
+                        "event_id": item['event_id'],
+                        "event_name": item['event_name'],
+                        "event_url": item['event_url'],
+                        "start_at": item['start_at'],
+                        "end_at": item['end_at'],
+                        "timezone": item['timezone'],
+                        "location": item['location'] or "No location",
+                        "city": item['city'],
+                        "hosts": item['host_names'],
+                        "document_type": "Luma Event",
+                        "connector_type": "Luma",
+                    }
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item['event_markdown'], user_llm, document_metadata_for_summary
+                    )
+                else:
+                    # Fallback to simple summary if no LLM configured
+                    summary_content = f"Luma Event: {item['event_name']}\n\n"
+                    if item['event_url']:
+                        summary_content += f"URL: {item['event_url']}\n"
+                    summary_content += f"Start: {item['start_at']}\n"
+                    summary_content += f"End: {item['end_at']}\n"
+                    if item['timezone']:
+                        summary_content += f"Timezone: {item['timezone']}\n"
+                    if item['location']:
+                        summary_content += f"Location: {item['location']}\n"
+                    if item['city']:
+                        summary_content += f"City: {item['city']}\n"
+                    if item['host_names']:
+                        summary_content += f"Hosts: {item['host_names']}\n"
+                    if item['description']:
+                        desc_preview = item['description'][:1000]
+                        if len(item['description']) > 1000:
+                            desc_preview += "..."
+                        summary_content += f"Description: {desc_preview}\n"
+
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                chunks = await create_document_chunks(item['event_markdown'])
+
+                # Update document to READY with actual content
+                document.title = item['event_name']
+                document.content = summary_content
+                document.content_hash = item['content_hash']
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    "event_id": item['event_id'],
+                    "event_name": item['event_name'],
+                    "event_url": item['event_url'],
+                    "start_at": item['start_at'],
+                    "end_at": item['end_at'],
+                    "timezone": item['timezone'],
+                    "location": item['location'],
+                    "city": item['city'],
+                    "hosts": item['host_names'],
+                    "cover_url": item['cover_url'],
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                documents_indexed += 1
+
+                # Batch commit every 10 documents (for ready status updates)
                 if documents_indexed % 10 == 0:
                     logger.info(
                         f"Committing batch: {documents_indexed} Luma events processed so far"
@@ -493,38 +514,69 @@ async def index_luma_events(
 
             except Exception as e:
                 logger.error(
-                    f"Error processing event {event.get('name', 'Unknown')}: {e!s}",
+                    f"Error processing event {item.get('event_name', 'Unknown')}: {e!s}",
                     exc_info=True,
                 )
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
                 skipped_events.append(
-                    f"{event.get('name', 'Unknown')} (processing error)"
+                    f"{item.get('event_name', 'Unknown')} (processing error)"
                 )
-                documents_skipped += 1
+                documents_failed += 1
                 continue
 
-        total_processed = documents_indexed
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        # This ensures the UI shows "Last indexed" instead of "Never indexed"
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit for any remaining documents not yet committed in batches
         logger.info(f"Final commit: Total {documents_indexed} Luma events processed")
-        await session.commit()
+        try:
+            await session.commit()
+            logger.info("Successfully committed all Luma document changes to database")
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same event was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
 
         await task_logger.log_task_success(
             log_entry,
             f"Successfully completed Luma indexing for connector {connector_id}",
             {
-                "events_processed": total_processed,
+                "events_processed": documents_indexed,
                 "documents_indexed": documents_indexed,
                 "documents_skipped": documents_skipped,
+                "documents_failed": documents_failed,
                 "skipped_events_count": len(skipped_events),
             },
         )
 
         logger.info(
-            f"Luma indexing completed: {documents_indexed} new events, {documents_skipped} skipped"
+            f"Luma indexing completed: {documents_indexed} ready, "
+            f"{documents_skipped} skipped, {documents_failed} failed"
         )
-        return total_processed, None
+        return documents_indexed, warning_message
 
     except SQLAlchemyError as db_error:
         await session.rollback()

From 629f6f9cf5e42b63ec50193b9c694da6056bab40 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 04:35:13 +0530
Subject: [PATCH 27/36] feat: implement two-phase document indexing for
 Obsidian and Circleback connectors with real-time status updates

---
 .../connector_indexers/obsidian_indexer.py    | 362 ++++++++++++------
 .../app/tasks/document_processors/base.py     |  28 ++
 .../circleback_processor.py                   | 196 ++++++----
 3 files changed, 394 insertions(+), 192 deletions(-)

diff --git a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py
index cfc321df1..0e6934e2c 100644
--- a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py
@@ -3,6 +3,10 @@ Obsidian connector indexer.
 
 Indexes markdown notes from a local Obsidian vault.
 This connector is only available in self-hosted mode.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """
 
 import os
@@ -17,7 +21,7 @@ from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -34,6 +38,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -307,25 +312,22 @@ async def index_obsidian_vault(
 
         logger.info(f"Processing {len(files)} files after date filtering")
 
-        # Get LLM for summarization
-        long_context_llm = await get_user_long_context_llm(
-            session, user_id, search_space_id
-        )
-
         indexed_count = 0
         skipped_count = 0
+        failed_count = 0
+        duplicate_content_count = 0
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
 
+        # =======================================================================
+        # PHASE 1: Analyze all files, create pending documents
+        # This makes ALL documents visible in the UI immediately with pending status
+        # =======================================================================
+        files_to_process = []  # List of dicts with document and file data
+        new_documents_created = False
+
         for file_info in files:
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(indexed_count)
-                last_heartbeat_time = time.time()
             try:
                 file_path = file_info["path"]
                 relative_path = file_info["relative_path"]
@@ -368,13 +370,143 @@ async def index_obsidian_vault(
                     search_space_id,
                 )
 
+                # Generate content hash
+                content_hash = generate_content_hash(content, search_space_id)
+
                 # Check for existing document
                 existing_document = await check_document_by_unique_identifier(
                     session, unique_identifier_hash
                 )
 
-                # Generate content hash
-                content_hash = generate_content_hash(content, search_space_id)
+                if existing_document:
+                    # Document exists - check if content has changed
+                    if existing_document.content_hash == content_hash:
+                        # Ensure status is ready (might have been stuck in processing/pending)
+                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            existing_document.status = DocumentStatus.ready()
+                        logger.debug(f"Note {title} unchanged, skipping")
+                        skipped_count += 1
+                        continue
+
+                    # Queue existing document for update (will be set to processing in Phase 2)
+                    files_to_process.append({
+                        'document': existing_document,
+                        'is_new': False,
+                        'file_info': file_info,
+                        'content': content,
+                        'body_content': body_content,
+                        'frontmatter': frontmatter,
+                        'wiki_links': wiki_links,
+                        'tags': tags,
+                        'title': title,
+                        'relative_path': relative_path,
+                        'content_hash': content_hash,
+                        'unique_identifier_hash': unique_identifier_hash,
+                    })
+                    continue
+
+                # Document doesn't exist by unique_identifier_hash
+                # Check if a document with the same content_hash exists (from another connector)
+                with session.no_autoflush:
+                    duplicate_by_content = await check_duplicate_document_by_hash(
+                        session, content_hash
+                    )
+
+                if duplicate_by_content:
+                    logger.info(
+                        f"Obsidian note {title} already indexed by another connector "
+                        f"(existing document ID: {duplicate_by_content.id}, "
+                        f"type: {duplicate_by_content.document_type}). Skipping."
+                    )
+                    duplicate_content_count += 1
+                    skipped_count += 1
+                    continue
+
+                # Create new document with PENDING status (visible in UI immediately)
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=title,
+                    document_type=DocumentType.OBSIDIAN_CONNECTOR,
+                    document_metadata={
+                        "vault_name": vault_name,
+                        "file_path": relative_path,
+                        "connector_id": connector_id,
+                    },
+                    content="Pending...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+                    unique_identifier_hash=unique_identifier_hash,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # Pending until processing starts
+                    updated_at=get_current_timestamp(),
+                    created_by_id=user_id,
+                    connector_id=connector_id,
+                )
+                session.add(document)
+                new_documents_created = True
+
+                files_to_process.append({
+                    'document': document,
+                    'is_new': True,
+                    'file_info': file_info,
+                    'content': content,
+                    'body_content': body_content,
+                    'frontmatter': frontmatter,
+                    'wiki_links': wiki_links,
+                    'tags': tags,
+                    'title': title,
+                    'relative_path': relative_path,
+                    'content_hash': content_hash,
+                    'unique_identifier_hash': unique_identifier_hash,
+                })
+
+            except Exception as e:
+                logger.exception(
+                    f"Error in Phase 1 for file {file_info.get('path', 'unknown')}: {e}"
+                )
+                failed_count += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each document one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(files_to_process)} documents")
+
+        # Get LLM for summarization
+        long_context_llm = await get_user_long_context_llm(
+            session, user_id, search_space_id
+        )
+
+        for item in files_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(indexed_count)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
+
+                # Extract data from item
+                title = item['title']
+                relative_path = item['relative_path']
+                content = item['content']
+                body_content = item['body_content']
+                frontmatter = item['frontmatter']
+                wiki_links = item['wiki_links']
+                tags = item['tags']
+                content_hash = item['content_hash']
+                file_info = item['file_info']
 
                 # Build metadata
                 document_metadata = {
@@ -404,134 +536,114 @@ async def index_obsidian_vault(
                 ]
                 document_string = build_document_metadata_string(metadata_sections)
 
-                if existing_document:
-                    # Check if content has changed
-                    if existing_document.content_hash == content_hash:
-                        logger.debug(f"Note {title} unchanged, skipping")
-                        skipped_count += 1
-                        continue
-
-                    # Update existing document
-                    logger.info(f"Updating note: {title}")
-
-                    # Generate new summary if content changed
-                    if long_context_llm:
-                        new_summary, _ = await generate_document_summary(
-                            document_string,
-                            long_context_llm,
-                            document_metadata,
-                        )
-                        # Store summary in metadata
-                        document_metadata["summary"] = new_summary
-
-                    # Add URL and connector_id to metadata
-                    document_metadata["url"] = (
-                        f"obsidian://{vault_name}/{relative_path}"
-                    )
-                    document_metadata["connector_id"] = connector_id
-
-                    existing_document.content = document_string
-                    existing_document.content_hash = content_hash
-                    existing_document.document_metadata = document_metadata
-                    existing_document.updated_at = get_current_timestamp()
-
-                    # Update embedding
-                    embedding = config.embedding_model_instance.embed(document_string)
-                    existing_document.embedding = embedding
-
-                    # Update chunks - delete old and create new
-                    existing_document.chunks.clear()
-                    new_chunks = await create_document_chunks(document_string)
-                    existing_document.chunks = new_chunks
-
-                    indexed_count += 1
-
-                else:
-                    # Document doesn't exist by unique_identifier_hash
-                    # Check if a document with the same content_hash exists (from another connector)
-                    with session.no_autoflush:
-                        duplicate_by_content = await check_duplicate_document_by_hash(
-                            session, content_hash
-                        )
-
-                    if duplicate_by_content:
-                        logger.info(
-                            f"Obsidian note {title} already indexed by another connector "
-                            f"(existing document ID: {duplicate_by_content.id}, "
-                            f"type: {duplicate_by_content.document_type}). Skipping."
-                        )
-                        skipped_count += 1
-                        continue
-
-                    # Create new document
-                    logger.info(f"Indexing new note: {title}")
-
-                    # Generate summary
-                    summary_content = ""
-                    if long_context_llm:
-                        summary_content, _ = await generate_document_summary(
-                            document_string,
-                            long_context_llm,
-                            document_metadata,
-                        )
-
-                    # Generate embedding
-                    embedding = config.embedding_model_instance.embed(document_string)
-
-                    # Add URL and summary to metadata
-                    document_metadata["url"] = (
-                        f"obsidian://{vault_name}/{relative_path}"
-                    )
-                    document_metadata["summary"] = summary_content
-                    document_metadata["connector_id"] = connector_id
-
-                    # Create chunks
-                    chunks = await create_document_chunks(document_string)
-
-                    # Create document
-                    new_document = Document(
-                        search_space_id=search_space_id,
-                        title=title,
-                        document_type=DocumentType.OBSIDIAN_CONNECTOR,
-                        content=document_string,
-                        content_hash=content_hash,
-                        unique_identifier_hash=unique_identifier_hash,
-                        document_metadata=document_metadata,
-                        embedding=embedding,
-                        chunks=chunks,
-                        updated_at=get_current_timestamp(),
-                        created_by_id=user_id,
-                        connector_id=connector_id,
+                # Generate summary
+                summary_content = ""
+                if long_context_llm:
+                    summary_content, _ = await generate_document_summary(
+                        document_string,
+                        long_context_llm,
+                        document_metadata,
                     )
 
-                    session.add(new_document)
+                # Generate embedding
+                embedding = config.embedding_model_instance.embed(document_string)
 
-                    indexed_count += 1
+                # Add URL and summary to metadata
+                document_metadata["url"] = f"obsidian://{vault_name}/{relative_path}"
+                document_metadata["summary"] = summary_content
+                document_metadata["connector_id"] = connector_id
+
+                # Create chunks
+                chunks = await create_document_chunks(document_string)
+
+                # Update document to READY with actual content
+                document.title = title
+                document.content = document_string
+                document.content_hash = content_hash
+                document.embedding = embedding
+                document.document_metadata = document_metadata
+                safe_set_chunks(document, chunks)
+                document.updated_at = get_current_timestamp()
+                document.status = DocumentStatus.ready()
+
+                indexed_count += 1
+
+                # Batch commit every 10 documents (for ready status updates)
+                if indexed_count % 10 == 0:
+                    logger.info(
+                        f"Committing batch: {indexed_count} Obsidian notes processed so far"
+                    )
+                    await session.commit()
 
             except Exception as e:
                 logger.exception(
-                    f"Error processing file {file_info.get('path', 'unknown')}: {e}"
+                    f"Error processing file {item.get('file_info', {}).get('path', 'unknown')}: {e}"
                 )
-                skipped_count += 1
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e))
+                    document.updated_at = get_current_timestamp()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
+                failed_count += 1
                 continue
 
-        # Update connector's last indexed timestamp
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
         await update_connector_last_indexed(session, connector, update_last_indexed)
 
-        # Commit all changes
-        await session.commit()
+        # Final commit for any remaining documents not yet committed in batches
+        logger.info(
+            f"Final commit: Total {indexed_count} Obsidian notes processed"
+        )
+        try:
+            await session.commit()
+            logger.info(
+                "Successfully committed all Obsidian document changes to database"
+            )
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully (race conditions, etc.)
+            if (
+                "duplicate key value violates unique constraint" in str(e).lower()
+                or "uniqueviolationerror" in str(e).lower()
+            ):
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"This may occur if the same note was indexed by multiple connectors. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+                # Don't fail the entire task - some documents may have been successfully indexed
+            else:
+                raise
+
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if failed_count > 0:
+            warning_parts.append(f"{failed_count} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
+
+        total_processed = indexed_count
 
         await task_logger.log_task_success(
             log_entry,
-            f"Successfully indexed {indexed_count} Obsidian notes (skipped {skipped_count})",
+            f"Successfully completed Obsidian vault indexing for connector {connector_id}",
             {
-                "indexed_count": indexed_count,
-                "skipped_count": skipped_count,
-                "total_files": len(files),
+                "notes_processed": total_processed,
+                "documents_indexed": indexed_count,
+                "documents_skipped": skipped_count,
+                "documents_failed": failed_count,
+                "duplicate_content_count": duplicate_content_count,
             },
         )
 
-        return indexed_count, None
+        logger.info(
+            f"Obsidian vault indexing completed: {indexed_count} ready, "
+            f"{skipped_count} skipped, {failed_count} failed "
+            f"({duplicate_content_count} duplicate content)"
+        )
+        return total_processed, warning_message
 
     except SQLAlchemyError as e:
         logger.exception(f"Database error during Obsidian indexing: {e}")
diff --git a/surfsense_backend/app/tasks/document_processors/base.py b/surfsense_backend/app/tasks/document_processors/base.py
index f29207448..c8046868c 100644
--- a/surfsense_backend/app/tasks/document_processors/base.py
+++ b/surfsense_backend/app/tasks/document_processors/base.py
@@ -14,6 +14,34 @@ from app.db import Document
 md = MarkdownifyTransformer()
 
 
+def safe_set_chunks(document: Document, chunks: list) -> None:
+    """
+    Safely assign chunks to a document without triggering lazy loading.
+    
+    ALWAYS use this instead of `document.chunks = chunks` to avoid
+    SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
+    
+    Why this is needed:
+    - Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
+      load the OLD chunks first (for comparison/orphan detection)
+    - This lazy loading fails in async context with asyncpg driver
+    - set_committed_value bypasses this by setting the value directly
+    
+    This function is safe regardless of how the document was loaded
+    (with or without selectinload).
+    
+    Args:
+        document: The Document object to update
+        chunks: List of Chunk objects to assign
+    
+    Example:
+        # Instead of: document.chunks = chunks (DANGEROUS!)
+        safe_set_chunks(document, chunks)  # Always safe
+    """
+    from sqlalchemy.orm.attributes import set_committed_value
+    set_committed_value(document, 'chunks', chunks)
+
+
 def get_current_timestamp() -> datetime:
     """
     Get the current timestamp with timezone for updated_at field.
diff --git a/surfsense_backend/app/tasks/document_processors/circleback_processor.py b/surfsense_backend/app/tasks/document_processors/circleback_processor.py
index f412b51dd..e9c395c83 100644
--- a/surfsense_backend/app/tasks/document_processors/circleback_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/circleback_processor.py
@@ -3,6 +3,11 @@ Circleback meeting document processor.
 
 This module processes meeting data received from Circleback webhooks
 and stores it as searchable documents in the database.
+
+Implements real-time document status updates for UI feedback:
+- Create document with 'pending' status (visible in UI immediately)
+- Set to 'processing' while processing content
+- Set to 'ready' or 'failed' when complete
 """
 
 import logging
@@ -14,6 +19,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.db import (
     Document,
+    DocumentStatus,
     DocumentType,
     SearchSourceConnector,
     SearchSourceConnectorType,
@@ -30,6 +36,7 @@ from app.utils.document_converters import (
 from .base import (
     check_document_by_unique_identifier,
     get_current_timestamp,
+    safe_set_chunks,
 )
 
 logger = logging.getLogger(__name__)
@@ -47,6 +54,11 @@ async def add_circleback_meeting_document(
     """
     Process and store a Circleback meeting document.
 
+    Implements real-time document status updates:
+    - Phase 1: Create document with 'pending' status (visible in UI immediately)
+    - Phase 2: Set to 'processing' while processing content
+    - Phase 3: Set to 'ready' or 'failed' when complete
+
     Args:
         session: Database session
         meeting_id: Circleback meeting ID
@@ -59,6 +71,7 @@ async def add_circleback_meeting_document(
     Returns:
         Document object if successful, None if failed or duplicate
     """
+    document = None
     try:
         # Generate unique identifier hash using Circleback meeting ID
         unique_identifier = f"circleback_{meeting_id}"
@@ -77,6 +90,10 @@ async def add_circleback_meeting_document(
         if existing_document:
             # Document exists - check if content has changed
             if existing_document.content_hash == content_hash:
+                # Ensure status is ready (might have been stuck in processing/pending)
+                if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                    existing_document.status = DocumentStatus.ready()
+                    await session.commit()
                 logger.info(f"Circleback meeting {meeting_id} unchanged. Skipping.")
                 return existing_document
             else:
@@ -84,7 +101,79 @@ async def add_circleback_meeting_document(
                 logger.info(
                     f"Content changed for Circleback meeting {meeting_id}. Updating document."
                 )
+                document = existing_document
+                # Set to PROCESSING status and commit - shows "processing" in UI
+                document.status = DocumentStatus.processing()
+                await session.commit()
+        else:
+            # =======================================================================
+            # PHASE 1: Create document with PENDING status
+            # This makes the document visible in the UI immediately
+            # =======================================================================
+            
+            # Fetch the user who set up the Circleback connector (preferred)
+            # or fall back to search space owner if no connector found
+            created_by_user_id = None
 
+            # Try to find the Circleback connector for this search space
+            connector_result = await session.execute(
+                select(SearchSourceConnector.user_id).where(
+                    SearchSourceConnector.search_space_id == search_space_id,
+                    SearchSourceConnector.connector_type
+                    == SearchSourceConnectorType.CIRCLEBACK_CONNECTOR,
+                )
+            )
+            connector_user = connector_result.scalar_one_or_none()
+
+            if connector_user:
+                # Use the user who set up the Circleback connector
+                created_by_user_id = connector_user
+            else:
+                # Fallback: use search space owner if no connector found
+                search_space_result = await session.execute(
+                    select(SearchSpace.user_id).where(SearchSpace.id == search_space_id)
+                )
+                created_by_user_id = search_space_result.scalar_one_or_none()
+
+            # Create new document with PENDING status (visible in UI immediately)
+            document = Document(
+                search_space_id=search_space_id,
+                title=meeting_name,
+                document_type=DocumentType.CIRCLEBACK,
+                document_metadata={
+                    "CIRCLEBACK_MEETING_ID": meeting_id,
+                    "MEETING_NAME": meeting_name,
+                    "SOURCE": "CIRCLEBACK_WEBHOOK",
+                    "connector_id": connector_id,
+                },
+                content="Pending...",  # Placeholder until processed
+                content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
+                unique_identifier_hash=unique_identifier_hash,
+                embedding=None,
+                chunks=[],  # Empty at creation - safe for async
+                status=DocumentStatus.pending(),  # Pending until processing starts
+                content_needs_reindexing=False,
+                updated_at=get_current_timestamp(),
+                created_by_id=created_by_user_id,
+                connector_id=connector_id,
+            )
+            session.add(document)
+            # Commit immediately so document appears in UI with pending status
+            await session.commit()
+            logger.info(
+                f"Created pending Circleback meeting document {meeting_id} in search space {search_space_id}"
+            )
+
+            # =======================================================================
+            # PHASE 2: Set to PROCESSING status
+            # =======================================================================
+            document.status = DocumentStatus.processing()
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 3: Process the document content
+        # =======================================================================
+        
         # Get LLM for generating summary
         llm = await get_document_summary_llm(session, search_space_id)
         if not llm:
@@ -100,7 +189,7 @@ async def add_circleback_meeting_document(
             summary_embedding = None
         else:
             # Generate summary with metadata
-            document_metadata = {
+            summary_metadata = {
                 "meeting_name": meeting_name,
                 "meeting_id": meeting_id,
                 "document_type": "Circleback Meeting",
@@ -111,7 +200,7 @@ async def add_circleback_meeting_document(
                 },
             }
             summary_content, summary_embedding = await generate_document_summary(
-                markdown_content, llm, document_metadata
+                markdown_content, llm, summary_metadata
             )
 
         # Process chunks
@@ -126,7 +215,7 @@ async def add_circleback_meeting_document(
                 f"Failed to convert Circleback meeting {meeting_id} to BlockNote JSON, document will not be editable"
             )
 
-        # Prepare document metadata
+        # Prepare final document metadata
         document_metadata = {
             "CIRCLEBACK_MEETING_ID": meeting_id,
             "MEETING_NAME": meeting_name,
@@ -134,77 +223,34 @@ async def add_circleback_meeting_document(
             **metadata,
         }
 
-        # Fetch the user who set up the Circleback connector (preferred)
-        # or fall back to search space owner if no connector found
-        created_by_user_id = None
+        # =======================================================================
+        # PHASE 4: Update document to READY status with actual content
+        # =======================================================================
+        document.title = meeting_name
+        document.content = summary_content
+        document.content_hash = content_hash
+        if summary_embedding is not None:
+            document.embedding = summary_embedding
+        document.document_metadata = document_metadata
+        safe_set_chunks(document, chunks)
+        document.blocknote_document = blocknote_json
+        document.content_needs_reindexing = False
+        document.updated_at = get_current_timestamp()
+        document.status = DocumentStatus.ready()
+        # Ensure connector_id is set (backfill for documents created before this field)
+        if connector_id is not None:
+            document.connector_id = connector_id
 
-        # Try to find the Circleback connector for this search space
-        connector_result = await session.execute(
-            select(SearchSourceConnector.user_id).where(
-                SearchSourceConnector.search_space_id == search_space_id,
-                SearchSourceConnector.connector_type
-                == SearchSourceConnectorType.CIRCLEBACK_CONNECTOR,
-            )
-        )
-        connector_user = connector_result.scalar_one_or_none()
-
-        if connector_user:
-            # Use the user who set up the Circleback connector
-            created_by_user_id = connector_user
-        else:
-            # Fallback: use search space owner if no connector found
-            search_space_result = await session.execute(
-                select(SearchSpace.user_id).where(SearchSpace.id == search_space_id)
-            )
-            created_by_user_id = search_space_result.scalar_one_or_none()
-
-        # Update or create document
+        await session.commit()
+        await session.refresh(document)
+        
         if existing_document:
-            # Update existing document
-            existing_document.title = meeting_name
-            existing_document.content = summary_content
-            existing_document.content_hash = content_hash
-            if summary_embedding is not None:
-                existing_document.embedding = summary_embedding
-            existing_document.document_metadata = document_metadata
-            existing_document.chunks = chunks
-            existing_document.blocknote_document = blocknote_json
-            existing_document.content_needs_reindexing = False
-            existing_document.updated_at = get_current_timestamp()
-            # Ensure connector_id is set (backfill for documents created before this field)
-            if connector_id is not None:
-                existing_document.connector_id = connector_id
-
-            await session.commit()
-            await session.refresh(existing_document)
-            document = existing_document
             logger.info(
                 f"Updated Circleback meeting document {meeting_id} in search space {search_space_id}"
             )
         else:
-            # Create new document
-            document = Document(
-                search_space_id=search_space_id,
-                title=meeting_name,
-                document_type=DocumentType.CIRCLEBACK,
-                document_metadata=document_metadata,
-                content=summary_content,
-                embedding=summary_embedding,
-                chunks=chunks,
-                content_hash=content_hash,
-                unique_identifier_hash=unique_identifier_hash,
-                blocknote_document=blocknote_json,
-                content_needs_reindexing=False,
-                updated_at=get_current_timestamp(),
-                created_by_id=created_by_user_id,
-                connector_id=connector_id,
-            )
-
-            session.add(document)
-            await session.commit()
-            await session.refresh(document)
             logger.info(
-                f"Created new Circleback meeting document {meeting_id} in search space {search_space_id}"
+                f"Processed Circleback meeting document {meeting_id} in search space {search_space_id} - now ready"
             )
 
         return document
@@ -214,8 +260,24 @@ async def add_circleback_meeting_document(
         logger.error(
             f"Database error processing Circleback meeting {meeting_id}: {db_error}"
         )
+        # Mark document as failed if it was created
+        if document is not None:
+            try:
+                document.status = DocumentStatus.failed(str(db_error))
+                document.updated_at = get_current_timestamp()
+                await session.commit()
+            except Exception as status_error:
+                logger.error(f"Failed to update document status to failed: {status_error}")
         raise db_error
     except Exception as e:
         await session.rollback()
         logger.error(f"Failed to process Circleback meeting {meeting_id}: {e!s}")
+        # Mark document as failed if it was created
+        if document is not None:
+            try:
+                document.status = DocumentStatus.failed(str(e))
+                document.updated_at = get_current_timestamp()
+                await session.commit()
+            except Exception as status_error:
+                logger.error(f"Failed to update document status to failed: {status_error}")
         raise RuntimeError(f"Failed to process Circleback meeting: {e!s}") from e

From 5d2da0847eaf48ecb1374702699382f80877d069 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 04:54:29 +0530
Subject: [PATCH 28/36] refactor: update connector mutation atoms to handle
 optional searchSpaceId and improve query invalidation logic

---
 .../connectors/connector-mutation.atoms.ts    | 20 ++++---
 .../hooks/use-connector-dialog.ts             | 59 ++++++++-----------
 2 files changed, 36 insertions(+), 43 deletions(-)

diff --git a/surfsense_web/atoms/connectors/connector-mutation.atoms.ts b/surfsense_web/atoms/connectors/connector-mutation.atoms.ts
index 70b5b0322..b928f8631 100644
--- a/surfsense_web/atoms/connectors/connector-mutation.atoms.ts
+++ b/surfsense_web/atoms/connectors/connector-mutation.atoms.ts
@@ -1,5 +1,4 @@
 import { atomWithMutation } from "jotai-tanstack-query";
-import { toast } from "sonner";
 import type {
 	CreateConnectorRequest,
 	DeleteConnectorRequest,
@@ -17,15 +16,16 @@ export const createConnectorMutationAtom = atomWithMutation((get) => {
 	const searchSpaceId = get(activeSearchSpaceIdAtom);
 
 	return {
-		mutationKey: cacheKeys.connectors.all(searchSpaceId!),
+		mutationKey: cacheKeys.connectors.all(searchSpaceId ?? ""),
 		enabled: !!searchSpaceId,
 		mutationFn: async (request: CreateConnectorRequest) => {
 			return connectorsApiService.createConnector(request);
 		},
 
 		onSuccess: () => {
+			if (!searchSpaceId) return;
 			queryClient.invalidateQueries({
-				queryKey: cacheKeys.connectors.all(searchSpaceId!),
+				queryKey: cacheKeys.connectors.all(searchSpaceId),
 			});
 		},
 	};
@@ -35,15 +35,16 @@ export const updateConnectorMutationAtom = atomWithMutation((get) => {
 	const searchSpaceId = get(activeSearchSpaceIdAtom);
 
 	return {
-		mutationKey: cacheKeys.connectors.all(searchSpaceId!),
+		mutationKey: cacheKeys.connectors.all(searchSpaceId ?? ""),
 		enabled: !!searchSpaceId,
 		mutationFn: async (request: UpdateConnectorRequest) => {
 			return connectorsApiService.updateConnector(request);
 		},
 
 		onSuccess: (_, request: UpdateConnectorRequest) => {
+			if (!searchSpaceId) return;
 			queryClient.invalidateQueries({
-				queryKey: cacheKeys.connectors.all(searchSpaceId!),
+				queryKey: cacheKeys.connectors.all(searchSpaceId),
 			});
 			queryClient.invalidateQueries({
 				queryKey: cacheKeys.connectors.byId(String(request.id)),
@@ -56,15 +57,16 @@ export const deleteConnectorMutationAtom = atomWithMutation((get) => {
 	const searchSpaceId = get(activeSearchSpaceIdAtom);
 
 	return {
-		mutationKey: cacheKeys.connectors.all(searchSpaceId!),
+		mutationKey: cacheKeys.connectors.all(searchSpaceId ?? ""),
 		enabled: !!searchSpaceId,
 		mutationFn: async (request: DeleteConnectorRequest) => {
 			return connectorsApiService.deleteConnector(request);
 		},
 
 		onSuccess: (_, request: DeleteConnectorRequest) => {
+			if (!searchSpaceId) return;
 			queryClient.setQueryData(
-				cacheKeys.connectors.all(searchSpaceId!),
+				cacheKeys.connectors.all(searchSpaceId),
 				(oldData: GetConnectorsResponse | undefined) => {
 					if (!oldData) return oldData;
 					return oldData.filter((connector) => connector.id !== request.id);
@@ -88,9 +90,9 @@ export const indexConnectorMutationAtom = atomWithMutation((get) => {
 		},
 
 		onSuccess: (response: IndexConnectorResponse) => {
-			toast.success(response.message);
+			if (!searchSpaceId) return;
 			queryClient.invalidateQueries({
-				queryKey: cacheKeys.connectors.all(searchSpaceId!),
+				queryKey: cacheKeys.connectors.all(searchSpaceId),
 			});
 			queryClient.invalidateQueries({
 				queryKey: cacheKeys.connectors.byId(String(response.connector_id)),
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index 118ca66ce..0ab333457 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -295,6 +295,7 @@ export const useConnectorDialog = () => {
 		connectingConnectorType,
 		viewingAccountsType,
 		viewingMCPList,
+		setIsOpen,
 	]);
 
 	// Detect OAuth success / Failure and transition to config view
@@ -345,12 +346,13 @@ export const useConnectorDialog = () => {
 						const connectorId = parseInt(params.connectorId, 10);
 						newConnector = result.data.find((c: SearchSourceConnector) => c.id === connectorId);
 
-						// If we found the connector, find the matching OAuth/Composio connector by type
-						if (newConnector) {
-							oauthConnector =
-								OAUTH_CONNECTORS.find((c) => c.connectorType === newConnector!.connector_type) ||
-								COMPOSIO_CONNECTORS.find((c) => c.connectorType === newConnector!.connector_type);
-						}
+					// If we found the connector, find the matching OAuth/Composio connector by type
+					if (newConnector) {
+						const connectorType = newConnector.connector_type;
+						oauthConnector =
+							OAUTH_CONNECTORS.find((c) => c.connectorType === connectorType) ||
+							COMPOSIO_CONNECTORS.find((c) => c.connectorType === connectorType);
+					}
 					}
 
 					// If we don't have a connector yet, try to find by connector param
@@ -359,11 +361,12 @@ export const useConnectorDialog = () => {
 							OAUTH_CONNECTORS.find((c) => c.id === params.connector) ||
 							COMPOSIO_CONNECTORS.find((c) => c.id === params.connector);
 
-						if (oauthConnector) {
-							newConnector = result.data.find(
-								(c: SearchSourceConnector) => c.connector_type === oauthConnector!.connectorType
-							);
-						}
+					if (oauthConnector) {
+						const oauthConnectorType = oauthConnector.connectorType;
+						newConnector = result.data.find(
+							(c: SearchSourceConnector) => c.connector_type === oauthConnectorType
+						);
+					}
 					}
 
 					if (newConnector && oauthConnector) {
@@ -401,7 +404,7 @@ export const useConnectorDialog = () => {
 			// Invalid query params - log but don't crash
 			console.warn("Invalid connector popup query params in OAuth success handler:", error);
 		}
-	}, [searchParams, searchSpaceId, refetchAllConnectors]);
+	}, [searchParams, searchSpaceId, refetchAllConnectors, setIsOpen]);
 
 	// Handle OAuth connection
 	const handleConnectOAuth = useCallback(
@@ -516,7 +519,7 @@ export const useConnectorDialog = () => {
 		} finally {
 			setConnectingId(null);
 		}
-	}, [searchSpaceId, createConnector, refetchAllConnectors]);
+	}, [searchSpaceId, createConnector, refetchAllConnectors, setIsOpen]);
 
 	// Handle connecting non-OAuth connectors (like Tavily API)
 	const handleConnectNonOAuth = useCallback(
@@ -676,15 +679,11 @@ export const useConnectorDialog = () => {
 									},
 								});
 
-								const successMessage =
-									currentConnectorType === "MCP_CONNECTOR"
-										? `${connector.name} added successfully`
-										: `${connectorTitle} connected and indexing started!`;
-								toast.success(successMessage, {
-									description: periodicEnabledForIndexing
-										? `Periodic sync enabled every ${getFrequencyLabel(frequencyMinutesForIndexing)}.`
-										: "You can continue working while we sync your data.",
-								});
+							const successMessage =
+								currentConnectorType === "MCP_CONNECTOR"
+									? `${connector.name} added successfully`
+									: `${connectorTitle} connected and syncing started!`;
+							toast.success(successMessage);
 
 								const url = new URL(window.location.href);
 								url.searchParams.delete("modal");
@@ -784,7 +783,6 @@ export const useConnectorDialog = () => {
 			updateConnector,
 			indexConnector,
 			router,
-			getFrequencyLabel,
 		]
 	);
 
@@ -1012,11 +1010,7 @@ export const useConnectorDialog = () => {
 					);
 				}
 
-				toast.success(`${indexingConfig.connectorTitle} indexing started`, {
-					description: periodicEnabled
-						? `Periodic sync enabled every ${getFrequencyLabel(frequencyMinutes)}.`
-						: "You can continue working while we sync your data.",
-				});
+				toast.success(`${indexingConfig.connectorTitle} indexing started`);
 
 				// Update URL - the effect will handle closing the modal and clearing state
 				const url = new URL(window.location.href);
@@ -1047,7 +1041,6 @@ export const useConnectorDialog = () => {
 			updateConnector,
 			periodicEnabled,
 			frequencyMinutes,
-			getFrequencyLabel,
 			router,
 			indexingConnectorConfig,
 		]
@@ -1428,9 +1421,7 @@ export const useConnectorDialog = () => {
 						end_date: endDateStr,
 					},
 				});
-				toast.success("Indexing started", {
-					description: "You can continue working while we sync your data.",
-				});
+				toast.success("Indexing started");
 
 				// Invalidate queries to refresh data
 				queryClient.invalidateQueries({
@@ -1447,7 +1438,7 @@ export const useConnectorDialog = () => {
 				}
 			}
 		},
-		[searchSpaceId, indexConnector, queryClient]
+		[searchSpaceId, indexConnector]
 	);
 
 	// Handle going back from edit view
@@ -1529,7 +1520,7 @@ export const useConnectorDialog = () => {
 				}
 			}
 		},
-		[activeTab, isStartingIndexing, isDisconnecting, isSaving, isCreatingConnector]
+		[activeTab, isStartingIndexing, isDisconnecting, isSaving, isCreatingConnector, setIsOpen]
 	);
 
 	// Handle tab change

From cc1e796c1295b1650803c855fa603db1442fab14 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 04:54:50 +0530
Subject: [PATCH 29/36] feat: implement two-phase document indexing for
 webcrawler and YouTube video processors with real-time status updates

---
 .../connector_indexers/webcrawler_indexer.py  | 396 ++++++++++--------
 .../document_processors/youtube_processor.py  | 265 +++++++-----
 2 files changed, 375 insertions(+), 286 deletions(-)

diff --git a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
index cb11a6ec2..5d25b4623 100644
--- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
@@ -1,5 +1,9 @@
 """
 Webcrawler connector indexer.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+- Phase 2: Process each document: pending → processing → ready/failed
 """
 
 import time
@@ -11,7 +15,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
 from app.connectors.webcrawler_connector import WebCrawlerConnector
-from app.db import Document, DocumentType, SearchSourceConnectorType
+from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -28,6 +32,7 @@ from .base import (
     get_connector_by_id,
     get_current_timestamp,
     logger,
+    safe_set_chunks,
     update_connector_last_indexed,
 )
 
@@ -49,7 +54,11 @@ async def index_crawled_urls(
     on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, str | None]:
     """
-    Index web page URLs.
+    Index web page URLs with real-time document status updates.
+
+    Implements 2-phase approach for real-time UI feedback:
+    - Phase 1: Create all documents with 'pending' status (visible in UI immediately)
+    - Phase 2: Process each document: pending → processing → ready/failed
 
     Args:
         session: Database session
@@ -138,9 +147,9 @@ async def index_crawled_urls(
 
         await task_logger.log_task_progress(
             log_entry,
-            f"Starting to crawl {len(urls)} URLs",
+            f"Starting to process {len(urls)} URLs",
             {
-                "stage": "crawling",
+                "stage": "processing",
                 "total_urls": len(urls),
             },
         )
@@ -148,28 +157,118 @@ async def index_crawled_urls(
         documents_indexed = 0
         documents_updated = 0
         documents_skipped = 0
-        failed_urls = []
+        documents_failed = 0
+        duplicate_content_count = 0
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
 
-        for idx, url in enumerate(urls, 1):
-            # Check if it's time for a heartbeat update
-            if (
-                on_heartbeat_callback
-                and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS
-            ):
-                await on_heartbeat_callback(documents_indexed)
-                last_heartbeat_time = time.time()
+        # =======================================================================
+        # PHASE 1: Analyze all URLs, create pending documents for new ones
+        # This makes ALL new documents visible in the UI immediately with pending status
+        # =======================================================================
+        urls_to_process = []  # List of dicts with document and URL data
+        new_documents_created = False
+
+        for url in urls:
             try:
-                logger.info(f"Processing URL {idx}/{len(urls)}: {url}")
+                # Generate unique identifier hash for this URL
+                unique_identifier_hash = generate_unique_identifier_hash(
+                    DocumentType.CRAWLED_URL, url, search_space_id
+                )
+
+                # Check if document with this unique identifier already exists
+                existing_document = await check_document_by_unique_identifier(
+                    session, unique_identifier_hash
+                )
+
+                if existing_document:
+                    # Document exists - check if it's already being processed
+                    if DocumentStatus.is_state(existing_document.status, DocumentStatus.PENDING):
+                        logger.info(f"URL {url} already pending. Skipping.")
+                        documents_skipped += 1
+                        continue
+                    if DocumentStatus.is_state(existing_document.status, DocumentStatus.PROCESSING):
+                        logger.info(f"URL {url} already processing. Skipping.")
+                        documents_skipped += 1
+                        continue
+
+                    # Queue existing document for potential update check
+                    urls_to_process.append({
+                        'document': existing_document,
+                        'is_new': False,
+                        'url': url,
+                        'unique_identifier_hash': unique_identifier_hash,
+                    })
+                    continue
+
+                # Create new document with PENDING status (visible in UI immediately)
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=url[:100],  # Placeholder - URL as title (truncated)
+                    document_type=DocumentType.CRAWLED_URL,
+                    document_metadata={
+                        "url": url,
+                        "connector_id": connector_id,
+                    },
+                    content="Pending crawl...",  # Placeholder content
+                    content_hash=unique_identifier_hash,  # Temporary unique value
+                    unique_identifier_hash=unique_identifier_hash,
+                    embedding=None,
+                    chunks=[],  # Empty at creation - safe for async
+                    status=DocumentStatus.pending(),  # PENDING status - visible in UI
+                    updated_at=get_current_timestamp(),
+                    created_by_id=user_id,
+                    connector_id=connector_id,
+                )
+                session.add(document)
+                new_documents_created = True
+
+                urls_to_process.append({
+                    'document': document,
+                    'is_new': True,
+                    'url': url,
+                    'unique_identifier_hash': unique_identifier_hash,
+                })
+
+            except Exception as e:
+                logger.error(f"Error in Phase 1 for URL {url}: {e!s}", exc_info=True)
+                documents_failed += 1
+                continue
+
+        # Commit all pending documents - they all appear in UI now
+        if new_documents_created:
+            logger.info(f"Phase 1: Committing {len([u for u in urls_to_process if u['is_new']])} pending documents")
+            await session.commit()
+
+        # =======================================================================
+        # PHASE 2: Process each URL one by one
+        # Each document transitions: pending → processing → ready/failed
+        # =======================================================================
+        logger.info(f"Phase 2: Processing {len(urls_to_process)} URLs")
+
+        for item in urls_to_process:
+            # Send heartbeat periodically
+            if on_heartbeat_callback:
+                current_time = time.time()
+                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
+                    await on_heartbeat_callback(documents_indexed + documents_updated)
+                    last_heartbeat_time = current_time
+
+            document = item['document']
+            url = item['url']
+            is_new = item['is_new']
+
+            try:
+                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
+                document.status = DocumentStatus.processing()
+                await session.commit()
 
                 await task_logger.log_task_progress(
                     log_entry,
-                    f"Crawling URL {idx}/{len(urls)}: {url}",
+                    f"Crawling URL: {url}",
                     {
                         "stage": "crawling_url",
-                        "url_index": idx,
                         "url": url,
                     },
                 )
@@ -179,7 +278,10 @@ async def index_crawled_urls(
 
                 if error or not crawl_result:
                     logger.warning(f"Failed to crawl URL {url}: {error}")
-                    failed_urls.append((url, error or "Unknown error"))
+                    document.status = DocumentStatus.failed(error or "Crawl failed")
+                    document.updated_at = get_current_timestamp()
+                    await session.commit()
+                    documents_failed += 1
                     continue
 
                 # Extract content and metadata
@@ -189,23 +291,16 @@ async def index_crawled_urls(
 
                 if not content.strip():
                     logger.warning(f"Skipping URL with no content: {url}")
-                    failed_urls.append((url, "No content extracted"))
-                    documents_skipped += 1
+                    document.status = DocumentStatus.failed("No content extracted")
+                    document.updated_at = get_current_timestamp()
+                    await session.commit()
+                    documents_failed += 1
                     continue
 
-                # Format content as structured document for summary generation (includes all metadata)
-                structured_document = crawler.format_to_structured_document(
-                    crawl_result
-                )
-
-                # Generate unique identifier hash for this URL
-                unique_identifier_hash = generate_unique_identifier_hash(
-                    DocumentType.CRAWLED_URL, url, search_space_id
-                )
+                # Format content as structured document for summary generation
+                structured_document = crawler.format_to_structured_document(crawl_result)
 
                 # Generate content hash using a version WITHOUT metadata
-                # This ensures the hash only changes when actual content changes,
-                # not when metadata (which contains dynamic fields like timestamps, IDs, etc.) changes
                 structured_document_for_hash = crawler.format_to_structured_document(
                     crawl_result, exclude_metadata=True
                 )
@@ -213,114 +308,51 @@ async def index_crawled_urls(
                     structured_document_for_hash, search_space_id
                 )
 
-                # Check if document with this unique identifier already exists
-                existing_document = await check_document_by_unique_identifier(
-                    session, unique_identifier_hash
-                )
-
                 # Extract useful metadata
                 title = metadata.get("title", url)
                 description = metadata.get("description", "")
                 language = metadata.get("language", "")
 
-                if existing_document:
-                    # Document exists - check if content has changed
-                    if existing_document.content_hash == content_hash:
-                        logger.info(f"Document for URL {url} unchanged. Skipping.")
-                        documents_skipped += 1
-                        continue
-                    else:
-                        # Content has changed - update the existing document
-                        logger.info(
-                            f"Content changed for URL {url}. Updating document."
-                        )
+                # Update title immediately for better UX
+                document.title = title
+                await session.commit()
 
-                        # Generate summary with metadata
-                        user_llm = await get_user_long_context_llm(
-                            session, user_id, search_space_id
-                        )
-
-                        if user_llm:
-                            document_metadata = {
-                                "url": url,
-                                "title": title,
-                                "description": description,
-                                "language": language,
-                                "document_type": "Crawled URL",
-                                "crawler_type": crawler_type,
-                            }
-                            (
-                                summary_content,
-                                summary_embedding,
-                            ) = await generate_document_summary(
-                                structured_document, user_llm, document_metadata
-                            )
-                        else:
-                            # Fallback to simple summary if no LLM configured
-                            summary_content = f"Crawled URL: {title}\n\n"
-                            summary_content += f"URL: {url}\n"
-                            if description:
-                                summary_content += f"Description: {description}\n"
-                            if language:
-                                summary_content += f"Language: {language}\n"
-                            summary_content += f"Crawler: {crawler_type}\n\n"
-
-                            # Add content preview
-                            content_preview = content[:1000]
-                            if len(content) > 1000:
-                                content_preview += "..."
-                            summary_content += f"Content Preview:\n{content_preview}\n"
-
-                            summary_embedding = config.embedding_model_instance.embed(
-                                summary_content
-                            )
-
-                        # Process chunks
-                        chunks = await create_document_chunks(content)
-
-                        # Update existing document
-                        existing_document.title = title
-                        existing_document.content = summary_content
-                        existing_document.content_hash = content_hash
-                        existing_document.embedding = summary_embedding
-                        existing_document.document_metadata = {
-                            **metadata,
-                            "crawler_type": crawler_type,
-                            "last_crawled_at": datetime.now().strftime(
-                                "%Y-%m-%d %H:%M:%S"
-                            ),
-                        }
-                        existing_document.chunks = chunks
-                        existing_document.updated_at = get_current_timestamp()
-
-                        documents_updated += 1
-                        logger.info(f"Successfully updated URL {url}")
-                        continue
-
-                # Document doesn't exist by unique_identifier_hash
-                # Check if a document with the same content_hash exists (from another connector)
-                with session.no_autoflush:
-                    duplicate_by_content = await check_duplicate_document_by_hash(
-                        session, content_hash
-                    )
-
-                if duplicate_by_content:
-                    logger.info(
-                        f"URL {url} already indexed by another connector "
-                        f"(existing document ID: {duplicate_by_content.id}, "
-                        f"type: {duplicate_by_content.document_type}). Skipping."
-                    )
+                # For existing documents, check if content has changed
+                if not is_new and document.content_hash == content_hash:
+                    logger.info(f"Document for URL {url} unchanged. Marking as ready.")
+                    # Ensure status is ready (might have been stuck)
+                    document.status = DocumentStatus.ready()
+                    await session.commit()
                     documents_skipped += 1
                     continue
 
-                # Document doesn't exist - create new one
-                # Generate summary with metadata
+                # For new documents, check if duplicate content exists elsewhere
+                if is_new:
+                    with session.no_autoflush:
+                        duplicate_by_content = await check_duplicate_document_by_hash(
+                            session, content_hash
+                        )
+
+                    if duplicate_by_content:
+                        logger.info(
+                            f"URL {url} already indexed by another connector "
+                            f"(existing document ID: {duplicate_by_content.id}). "
+                            f"Marking as failed."
+                        )
+                        document.status = DocumentStatus.failed("Duplicate content exists")
+                        document.updated_at = get_current_timestamp()
+                        await session.commit()
+                        duplicate_content_count += 1
+                        documents_skipped += 1
+                        continue
+
+                # Generate summary with LLM
                 user_llm = await get_user_long_context_llm(
                     session, user_id, search_space_id
                 )
 
                 if user_llm:
-                    document_metadata = {
+                    document_metadata_for_summary = {
                         "url": url,
                         "title": title,
                         "description": description,
@@ -328,11 +360,8 @@ async def index_crawled_urls(
                         "document_type": "Crawled URL",
                         "crawler_type": crawler_type,
                     }
-                    (
-                        summary_content,
-                        summary_embedding,
-                    ) = await generate_document_summary(
-                        structured_document, user_llm, document_metadata
+                    summary_content, summary_embedding = await generate_document_summary(
+                        structured_document, user_llm, document_metadata_for_summary
                     )
                 else:
                     # Fallback to simple summary if no LLM configured
@@ -354,32 +383,32 @@ async def index_crawled_urls(
                         summary_content
                     )
 
+                # Process chunks
                 chunks = await create_document_chunks(content)
 
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=title,
-                    document_type=DocumentType.CRAWLED_URL,
-                    document_metadata={
-                        **metadata,
-                        "crawler_type": crawler_type,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                    },
-                    content=summary_content,
-                    content_hash=content_hash,
-                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                    updated_at=get_current_timestamp(),
-                    created_by_id=user_id,
-                    connector_id=connector_id,
-                )
+                # Update document to READY with actual content
+                document.title = title
+                document.content = summary_content
+                document.content_hash = content_hash
+                document.embedding = summary_embedding
+                document.document_metadata = {
+                    **metadata,
+                    "crawler_type": crawler_type,
+                    "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "connector_id": connector_id,
+                }
+                safe_set_chunks(document, chunks)
+                document.status = DocumentStatus.ready()  # READY status
+                document.updated_at = get_current_timestamp()
 
-                session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new URL {url}")
+                if is_new:
+                    documents_indexed += 1
+                else:
+                    documents_updated += 1
 
-                # Batch commit every 10 documents
+                logger.info(f"Successfully processed URL {url}")
+
+                # Batch commit every 10 documents (for ready status updates)
                 if (documents_indexed + documents_updated) % 10 == 0:
                     logger.info(
                         f"Committing batch: {documents_indexed + documents_updated} URLs processed so far"
@@ -387,32 +416,47 @@ async def index_crawled_urls(
                     await session.commit()
 
             except Exception as e:
-                logger.error(
-                    f"Error processing URL {url}: {e!s}",
-                    exc_info=True,
-                )
-                failed_urls.append((url, str(e)))
+                logger.error(f"Error processing URL {url}: {e!s}", exc_info=True)
+                # Mark document as failed with reason (visible in UI)
+                try:
+                    document.status = DocumentStatus.failed(str(e)[:200])
+                    document.updated_at = get_current_timestamp()
+                    await session.commit()
+                except Exception as status_error:
+                    logger.error(f"Failed to update document status to failed: {status_error}")
+                documents_failed += 1
                 continue
 
         total_processed = documents_indexed + documents_updated
 
-        if total_processed > 0:
-            await update_connector_last_indexed(session, connector, update_last_indexed)
+        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
+        await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit for any remaining documents not yet committed in batches
         logger.info(
             f"Final commit: Total {documents_indexed} new, {documents_updated} updated URLs processed"
         )
-        await session.commit()
+        try:
+            await session.commit()
+            logger.info("Successfully committed all webcrawler document changes to database")
+        except Exception as e:
+            # Handle any remaining integrity errors gracefully
+            if "duplicate key value violates unique constraint" in str(e).lower():
+                logger.warning(
+                    f"Duplicate content_hash detected during final commit. "
+                    f"Rolling back and continuing. Error: {e!s}"
+                )
+                await session.rollback()
+            else:
+                raise
 
-        # Log failed URLs if any (for debugging purposes)
-        if failed_urls:
-            failed_summary = "; ".join(
-                [f"{url}: {error}" for url, error in failed_urls[:5]]
-            )
-            if len(failed_urls) > 5:
-                failed_summary += f" (and {len(failed_urls) - 5} more)"
-            logger.warning(f"Some URLs failed to index: {failed_summary}")
+        # Build warning message if there were issues
+        warning_parts = []
+        if duplicate_content_count > 0:
+            warning_parts.append(f"{duplicate_content_count} duplicate")
+        if documents_failed > 0:
+            warning_parts.append(f"{documents_failed} failed")
+        warning_message = ", ".join(warning_parts) if warning_parts else None
 
         await task_logger.log_task_success(
             log_entry,
@@ -422,19 +466,21 @@ async def index_crawled_urls(
                 "documents_indexed": documents_indexed,
                 "documents_updated": documents_updated,
                 "documents_skipped": documents_skipped,
-                "failed_urls_count": len(failed_urls),
+                "documents_failed": documents_failed,
+                "duplicate_content_count": duplicate_content_count,
             },
         )
 
         logger.info(
             f"Web page indexing completed: {documents_indexed} new, "
             f"{documents_updated} updated, {documents_skipped} skipped, "
-            f"{len(failed_urls)} failed"
+            f"{documents_failed} failed"
         )
-        return (
-            total_processed,
-            None,
-        )  # Return None on success (result_message is for logging only)
+
+        if warning_message:
+            return total_processed, f"Completed with issues: {warning_message}"
+
+        return total_processed, None
 
     except SQLAlchemyError as db_error:
         await session.rollback()
@@ -482,9 +528,7 @@ async def get_crawled_url_documents(
     )
 
     if connector_id:
-        # Filter by connector if needed - you might need to add a connector_id field to Document
-        # or filter by some other means depending on your schema
-        pass
+        query = query.filter(Document.connector_id == connector_id)
 
     result = await session.execute(query)
     documents = result.scalars().all()
diff --git a/surfsense_backend/app/tasks/document_processors/youtube_processor.py b/surfsense_backend/app/tasks/document_processors/youtube_processor.py
index 7251fb22f..19092b592 100644
--- a/surfsense_backend/app/tasks/document_processors/youtube_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/youtube_processor.py
@@ -1,5 +1,9 @@
 """
 YouTube video document processor.
+
+Implements 2-phase document status updates for real-time UI feedback:
+- Phase 1: Create document with 'pending' status (visible in UI immediately)
+- Phase 2: Process document: pending → processing → ready/failed
 """
 
 import logging
@@ -10,7 +14,7 @@ from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 from youtube_transcript_api import YouTubeTranscriptApi
 
-from app.db import Document, DocumentType
+from app.db import Document, DocumentStatus, DocumentType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
 from app.utils.document_converters import (
@@ -23,6 +27,7 @@ from app.utils.document_converters import (
 from .base import (
     check_document_by_unique_identifier,
     get_current_timestamp,
+    safe_set_chunks,
 )
 
 
@@ -58,6 +63,10 @@ async def add_youtube_video_document(
     """
     Process a YouTube video URL, extract transcripts, and store as a document.
 
+    Implements 2-phase document status updates for real-time UI feedback:
+    - Phase 1: Create document with 'pending' status (visible in UI immediately)
+    - Phase 2: Process document: pending → processing → ready/failed
+
     Args:
         session: Database session for storing the document
         url: YouTube video URL (supports standard, shortened, and embed formats)
@@ -82,15 +91,18 @@ async def add_youtube_video_document(
         metadata={"url": url, "user_id": str(user_id)},
     )
 
+    document = None
+    video_id = None
+    is_new_document = False
+
     try:
-        # Extract video ID from URL
+        # Extract video ID from URL (lightweight operation)
         await task_logger.log_task_progress(
             log_entry,
             f"Extracting video ID from URL: {url}",
             {"stage": "video_id_extraction"},
         )
 
-        # Get video ID
         video_id = get_youtube_video_id(url)
         if not video_id:
             raise ValueError(f"Could not extract video ID from URL: {url}")
@@ -101,13 +113,79 @@ async def add_youtube_video_document(
             {"stage": "video_id_extracted", "video_id": video_id},
         )
 
-        # Get video metadata
+        # Generate unique identifier hash for this YouTube video
+        unique_identifier_hash = generate_unique_identifier_hash(
+            DocumentType.YOUTUBE_VIDEO, video_id, search_space_id
+        )
+
+        # Check if document with this unique identifier already exists
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Checking for existing video: {video_id}",
+            {"stage": "duplicate_check", "video_id": video_id},
+        )
+
+        existing_document = await check_document_by_unique_identifier(
+            session, unique_identifier_hash
+        )
+
+        # =======================================================================
+        # PHASE 1: Create pending document or prepare existing for update
+        # =======================================================================
+        if existing_document:
+            document = existing_document
+            is_new_document = False
+            # Check if already being processed
+            if DocumentStatus.is_state(existing_document.status, DocumentStatus.PENDING):
+                logging.info(f"YouTube video {video_id} already pending. Returning existing.")
+                return existing_document
+            if DocumentStatus.is_state(existing_document.status, DocumentStatus.PROCESSING):
+                logging.info(f"YouTube video {video_id} already processing. Returning existing.")
+                return existing_document
+        else:
+            # Create new document with PENDING status (visible in UI immediately)
+            await task_logger.log_task_progress(
+                log_entry,
+                f"Creating pending document for video: {video_id}",
+                {"stage": "pending_document_creation"},
+            )
+
+            document = Document(
+                title=f"YouTube Video: {video_id}",  # Placeholder title
+                document_type=DocumentType.YOUTUBE_VIDEO,
+                document_metadata={
+                    "url": url,
+                    "video_id": video_id,
+                },
+                content="Processing video...",  # Placeholder content
+                content_hash=unique_identifier_hash,  # Temporary unique value
+                unique_identifier_hash=unique_identifier_hash,
+                embedding=None,
+                chunks=[],  # Empty at creation
+                status=DocumentStatus.pending(),  # PENDING status - visible in UI
+                search_space_id=search_space_id,
+                updated_at=get_current_timestamp(),
+                created_by_id=user_id,
+            )
+            session.add(document)
+            await session.commit()  # Document visible in UI now with pending status!
+            is_new_document = True
+
+            logging.info(f"Created pending document for YouTube video {video_id}")
+
+        # =======================================================================
+        # PHASE 2: Set to PROCESSING and do heavy work
+        # =======================================================================
+        document.status = DocumentStatus.processing()
+        await session.commit()  # UI shows "processing" status
+
         await task_logger.log_task_progress(
             log_entry,
             f"Fetching video metadata for: {video_id}",
             {"stage": "metadata_fetch"},
         )
 
+        # Fetch video metadata
         params = {
             "format": "json",
             "url": f"https://www.youtube.com/watch?v={video_id}",
@@ -120,6 +198,10 @@ async def add_youtube_video_document(
         ):
             video_data = await response.json()
 
+        # Update title immediately for better UX (user sees actual title sooner)
+        document.title = video_data.get("title", f"YouTube Video: {video_id}")
+        await session.commit()
+
         await task_logger.log_task_progress(
             log_entry,
             f"Video metadata fetched: {video_data.get('title', 'Unknown')}",
@@ -204,53 +286,26 @@ async def add_youtube_video_document(
         document_parts.append("</DOCUMENT>")
         combined_document_string = "\n".join(document_parts)
 
-        # Generate unique identifier hash for this YouTube video
-        unique_identifier_hash = generate_unique_identifier_hash(
-            DocumentType.YOUTUBE_VIDEO, video_id, search_space_id
-        )
-
         # Generate content hash
         content_hash = generate_content_hash(combined_document_string, search_space_id)
 
-        # Check if document with this unique identifier already exists
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Checking for existing video: {video_id}",
-            {"stage": "duplicate_check", "video_id": video_id},
-        )
+        # For existing documents, check if content has changed
+        if not is_new_document and existing_document.content_hash == content_hash:
+            await task_logger.log_task_success(
+                log_entry,
+                f"YouTube video document unchanged: {video_data.get('title', 'YouTube Video')}",
+                {
+                    "duplicate_detected": True,
+                    "existing_document_id": existing_document.id,
+                    "video_id": video_id,
+                },
+            )
+            logging.info(f"Document for YouTube video {video_id} unchanged. Marking as ready.")
+            document.status = DocumentStatus.ready()
+            await session.commit()
+            return document
 
-        existing_document = await check_document_by_unique_identifier(
-            session, unique_identifier_hash
-        )
-
-        if existing_document:
-            # Document exists - check if content has changed
-            if existing_document.content_hash == content_hash:
-                await task_logger.log_task_success(
-                    log_entry,
-                    f"YouTube video document unchanged: {video_data.get('title', 'YouTube Video')}",
-                    {
-                        "duplicate_detected": True,
-                        "existing_document_id": existing_document.id,
-                        "video_id": video_id,
-                    },
-                )
-                logging.info(
-                    f"Document for YouTube video {video_id} unchanged. Skipping."
-                )
-                return existing_document
-            else:
-                # Content has changed - update the existing document
-                logging.info(
-                    f"Content changed for YouTube video {video_id}. Updating document."
-                )
-                await task_logger.log_task_progress(
-                    log_entry,
-                    f"Updating YouTube video document: {video_data.get('title', 'YouTube Video')}",
-                    {"stage": "document_update", "video_id": video_id},
-                )
-
-        # Get LLM for summary generation (needed for both create and update)
+        # Get LLM for summary generation
         await task_logger.log_task_progress(
             log_entry,
             f"Preparing for summary generation: {video_data.get('title', 'YouTube Video')}",
@@ -272,7 +327,7 @@ async def add_youtube_video_document(
         )
 
         # Generate summary with metadata
-        document_metadata = {
+        document_metadata_for_summary = {
             "url": url,
             "video_id": video_id,
             "title": video_data.get("title", "YouTube Video"),
@@ -282,7 +337,7 @@ async def add_youtube_video_document(
             "has_transcript": "No captions available" not in transcript_text,
         }
         summary_content, summary_embedding = await generate_document_summary(
-            combined_document_string, user_llm, document_metadata
+            combined_document_string, user_llm, document_metadata_for_summary
         )
 
         # Process chunks
@@ -304,65 +359,33 @@ async def add_youtube_video_document(
 
         chunks = await create_document_chunks(combined_document_string)
 
-        # Update or create document
-        if existing_document:
-            # Update existing document
-            await task_logger.log_task_progress(
-                log_entry,
-                f"Updating YouTube video document in database: {video_data.get('title', 'YouTube Video')}",
-                {"stage": "document_update", "chunks_count": len(chunks)},
-            )
+        # =======================================================================
+        # PHASE 3: Update document to READY with all content
+        # =======================================================================
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Finalizing document: {video_data.get('title', 'YouTube Video')}",
+            {"stage": "document_finalization", "chunks_count": len(chunks)},
+        )
 
-            existing_document.title = video_data.get("title", "YouTube Video")
-            existing_document.content = summary_content
-            existing_document.content_hash = content_hash
-            existing_document.embedding = summary_embedding
-            existing_document.document_metadata = {
-                "url": url,
-                "video_id": video_id,
-                "video_title": video_data.get("title", "YouTube Video"),
-                "author": video_data.get("author_name", "Unknown"),
-                "thumbnail": video_data.get("thumbnail_url", ""),
-            }
-            existing_document.chunks = chunks
-            existing_document.blocknote_document = blocknote_json
-            existing_document.updated_at = get_current_timestamp()
+        document.title = video_data.get("title", "YouTube Video")
+        document.content = summary_content
+        document.content_hash = content_hash
+        document.embedding = summary_embedding
+        document.document_metadata = {
+            "url": url,
+            "video_id": video_id,
+            "video_title": video_data.get("title", "YouTube Video"),
+            "author": video_data.get("author_name", "Unknown"),
+            "thumbnail": video_data.get("thumbnail_url", ""),
+        }
+        safe_set_chunks(document, chunks)
+        document.blocknote_document = blocknote_json
+        document.status = DocumentStatus.ready()  # READY status - fully processed
+        document.updated_at = get_current_timestamp()
 
-            await session.commit()
-            await session.refresh(existing_document)
-            document = existing_document
-        else:
-            # Create new document
-            await task_logger.log_task_progress(
-                log_entry,
-                f"Creating YouTube video document in database: {video_data.get('title', 'YouTube Video')}",
-                {"stage": "document_creation", "chunks_count": len(chunks)},
-            )
-
-            document = Document(
-                title=video_data.get("title", "YouTube Video"),
-                document_type=DocumentType.YOUTUBE_VIDEO,
-                document_metadata={
-                    "url": url,
-                    "video_id": video_id,
-                    "video_title": video_data.get("title", "YouTube Video"),
-                    "author": video_data.get("author_name", "Unknown"),
-                    "thumbnail": video_data.get("thumbnail_url", ""),
-                },
-                content=summary_content,
-                embedding=summary_embedding,
-                chunks=chunks,
-                search_space_id=search_space_id,
-                content_hash=content_hash,
-                unique_identifier_hash=unique_identifier_hash,
-                blocknote_document=blocknote_json,
-                updated_at=get_current_timestamp(),
-                created_by_id=user_id,
-            )
-
-            session.add(document)
-            await session.commit()
-            await session.refresh(document)
+        await session.commit()
+        await session.refresh(document)
 
         # Log success
         await task_logger.log_task_success(
@@ -380,27 +403,49 @@ async def add_youtube_video_document(
         )
 
         return document
+
     except SQLAlchemyError as db_error:
-        await session.rollback()
+        # Mark document as failed if it exists
+        if document:
+            try:
+                document.status = DocumentStatus.failed(f"Database error: {str(db_error)[:150]}")
+                document.updated_at = get_current_timestamp()
+                await session.commit()
+            except Exception:
+                await session.rollback()
+        else:
+            await session.rollback()
+
         await task_logger.log_task_failure(
             log_entry,
             f"Database error while processing YouTube video: {url}",
             str(db_error),
             {
                 "error_type": "SQLAlchemyError",
-                "video_id": video_id if "video_id" in locals() else None,
+                "video_id": video_id,
             },
         )
         raise db_error
+
     except Exception as e:
-        await session.rollback()
+        # Mark document as failed if it exists
+        if document:
+            try:
+                document.status = DocumentStatus.failed(str(e)[:200])
+                document.updated_at = get_current_timestamp()
+                await session.commit()
+            except Exception:
+                await session.rollback()
+        else:
+            await session.rollback()
+
         await task_logger.log_task_failure(
             log_entry,
             f"Failed to process YouTube video: {url}",
             str(e),
             {
                 "error_type": type(e).__name__,
-                "video_id": video_id if "video_id" in locals() else None,
+                "video_id": video_id,
             },
         )
         logging.error(f"Failed to process YouTube video: {e!s}")

From f56f5a281e86d4dd1730e3f6b7f26b0937231c16 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 05:15:35 +0530
Subject: [PATCH 30/36] fix: disable Edit and Delete actions while processing
 in RowActions component

---
 .../documents/(manage)/components/RowActions.tsx  | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
index 4133f2960..4f23693ad 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
@@ -53,7 +53,8 @@ export function RowActions({
 		document.document_type as (typeof NON_DELETABLE_DOCUMENT_TYPES)[number]
 	);
 
-	// Delete is disabled while processing
+	// Edit and Delete are disabled while processing
+	const isEditDisabled = isBeingProcessed;
 	const isDeleteDisabled = isBeingProcessed;
 
 	const handleDelete = async () => {
@@ -97,7 +98,11 @@ export function RowActions({
 							</Button>
 						</DropdownMenuTrigger>
 						<DropdownMenuContent align="end" className="w-40">
-							<DropdownMenuItem onClick={handleEdit}>
+							<DropdownMenuItem
+								onClick={() => !isEditDisabled && handleEdit()}
+								disabled={isEditDisabled}
+								className={isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""}
+							>
 								<Pencil className="mr-2 h-4 w-4" />
 								<span>Edit</span>
 							</DropdownMenuItem>
@@ -142,7 +147,11 @@ export function RowActions({
 							</Button>
 						</DropdownMenuTrigger>
 						<DropdownMenuContent align="end" className="w-40">
-							<DropdownMenuItem onClick={handleEdit}>
+							<DropdownMenuItem
+								onClick={() => !isEditDisabled && handleEdit()}
+								disabled={isEditDisabled}
+								className={isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""}
+							>
 								<Pencil className="mr-2 h-4 w-4" />
 								<span>Edit</span>
 							</DropdownMenuItem>

From ed2fc5c6365608ff1a003bc39357fe3a7c40c4bf Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 05:15:47 +0530
Subject: [PATCH 31/36] feat: enhance document upload process with two-phase
 indexing and real-time status updates

---
 .../app/routes/documents_routes.py            |  99 +++++-
 .../app/tasks/celery_tasks/document_tasks.py  | 292 ++++++++++++++++
 .../document_processors/file_processors.py    | 314 ++++++++++++++++++
 3 files changed, 694 insertions(+), 11 deletions(-)

diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py
index b905ebf91..00c80dcb5 100644
--- a/surfsense_backend/app/routes/documents_routes.py
+++ b/surfsense_backend/app/routes/documents_routes.py
@@ -113,9 +113,23 @@ async def create_documents_file_upload(
     user: User = Depends(current_active_user),
 ):
     """
-    Upload files as documents.
+    Upload files as documents with real-time status tracking.
+    
+    Implements 2-phase document status updates for real-time UI feedback:
+    - Phase 1: Create all documents with 'pending' status (visible in UI immediately via ElectricSQL)
+    - Phase 2: Celery processes each file: pending → processing → ready/failed
+    
     Requires DOCUMENTS_CREATE permission.
     """
+    from datetime import datetime
+
+    from app.db import DocumentStatus
+    from app.tasks.document_processors.base import (
+        check_document_by_unique_identifier,
+        get_current_timestamp,
+    )
+    from app.utils.document_converters import generate_unique_identifier_hash
+
     try:
         # Check permission
         await check_permission(
@@ -129,38 +143,101 @@ async def create_documents_file_upload(
         if not files:
             raise HTTPException(status_code=400, detail="No files provided")
 
+        created_documents: list[Document] = []
+        files_to_process: list[tuple[Document, str, str]] = []  # (document, temp_path, filename)
+        skipped_duplicates = 0
+
+        # ===== PHASE 1: Create pending documents for all files =====
+        # This makes ALL documents visible in the UI immediately with pending status
         for file in files:
             try:
-                # Save file to a temporary location to avoid stream issues
                 import os
                 import tempfile
 
-                # Create temp file
+                # Save file to temp location
                 with tempfile.NamedTemporaryFile(
-                    delete=False, suffix=os.path.splitext(file.filename)[1]
+                    delete=False, suffix=os.path.splitext(file.filename or "")[1]
                 ) as temp_file:
                     temp_path = temp_file.name
 
-                # Write uploaded file to temp file
                 content = await file.read()
                 with open(temp_path, "wb") as f:
                     f.write(content)
 
-                from app.tasks.celery_tasks.document_tasks import (
-                    process_file_upload_task,
+                file_size = len(content)
+
+                # Generate unique identifier for deduplication check
+                unique_identifier_hash = generate_unique_identifier_hash(
+                    DocumentType.FILE, file.filename or "unknown", search_space_id
                 )
 
-                process_file_upload_task.delay(
-                    temp_path, file.filename, search_space_id, str(user.id)
+                # Check if document already exists (by unique identifier)
+                existing = await check_document_by_unique_identifier(
+                    session, unique_identifier_hash
                 )
+                if existing:
+                    # Clean up temp file for duplicates
+                    os.unlink(temp_path)
+                    skipped_duplicates += 1
+                    continue
+
+                # Create pending document (visible immediately in UI via ElectricSQL)
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=file.filename or "Uploaded File",
+                    document_type=DocumentType.FILE,
+                    document_metadata={
+                        "FILE_NAME": file.filename,
+                        "file_size": file_size,
+                        "upload_time": datetime.now().isoformat(),
+                    },
+                    content="Processing...",  # Placeholder until processed
+                    content_hash=unique_identifier_hash,  # Temporary, updated when ready
+                    unique_identifier_hash=unique_identifier_hash,
+                    embedding=None,
+                    status=DocumentStatus.pending(),  # Shows "pending" in UI
+                    updated_at=get_current_timestamp(),
+                    created_by_id=str(user.id),
+                )
+                session.add(document)
+                created_documents.append(document)
+                files_to_process.append((document, temp_path, file.filename or "unknown"))
+
             except Exception as e:
                 raise HTTPException(
                     status_code=422,
                     detail=f"Failed to process file {file.filename}: {e!s}",
                 ) from e
 
-        await session.commit()
-        return {"message": "Files uploaded for processing"}
+        # Commit all pending documents - they appear in UI immediately via ElectricSQL
+        if created_documents:
+            await session.commit()
+            # Refresh to get generated IDs
+            for doc in created_documents:
+                await session.refresh(doc)
+
+        # ===== PHASE 2: Dispatch Celery tasks for each file =====
+        # Each task will update document status: pending → processing → ready/failed
+        from app.tasks.celery_tasks.document_tasks import (
+            process_file_upload_with_document_task,
+        )
+
+        for document, temp_path, filename in files_to_process:
+            process_file_upload_with_document_task.delay(
+                document_id=document.id,
+                temp_path=temp_path,
+                filename=filename,
+                search_space_id=search_space_id,
+                user_id=str(user.id),
+            )
+
+        return {
+            "message": "Files uploaded for processing",
+            "document_ids": [doc.id for doc in created_documents],
+            "total_files": len(files),
+            "pending_files": len(files_to_process),
+            "skipped_duplicates": skipped_duplicates,
+        }
     except HTTPException:
         raise
     except Exception as e:
diff --git a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
index f310bb03e..cd5537927 100644
--- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
@@ -537,6 +537,298 @@ async def _process_file_upload(
             raise
 
 
+@celery_app.task(name="process_file_upload_with_document", bind=True)
+def process_file_upload_with_document_task(
+    self,
+    document_id: int,
+    temp_path: str,
+    filename: str,
+    search_space_id: int,
+    user_id: str,
+):
+    """
+    Celery task to process uploaded file with existing pending document.
+    
+    This task is used by the 2-phase document upload flow:
+    - Phase 1 (API): Creates pending document (visible in UI immediately)
+    - Phase 2 (this task): Updates document status: pending → processing → ready/failed
+    
+    Args:
+        document_id: ID of the pending document created in Phase 1
+        temp_path: Path to the uploaded file
+        filename: Original filename
+        search_space_id: ID of the search space
+        user_id: ID of the user
+    """
+    import asyncio
+    import os
+    import traceback
+
+    logger.info(
+        f"[process_file_upload_with_document] Task started - document_id: {document_id}, "
+        f"file: {filename}, search_space_id: {search_space_id}"
+    )
+
+    # Check if file exists and is accessible
+    if not os.path.exists(temp_path):
+        logger.error(
+            f"[process_file_upload_with_document] File does not exist: {temp_path}. "
+            "The temp file may have been cleaned up before the task ran."
+        )
+        # Mark document as failed since file is missing
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            loop.run_until_complete(
+                _mark_document_failed(
+                    document_id,
+                    "File not found - temp file may have been cleaned up",
+                )
+            )
+        finally:
+            loop.close()
+        return
+
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    try:
+        loop.run_until_complete(
+            _process_file_with_document(
+                document_id, temp_path, filename, search_space_id, user_id
+            )
+        )
+        logger.info(
+            f"[process_file_upload_with_document] Task completed successfully for: {filename}"
+        )
+    except Exception as e:
+        logger.error(
+            f"[process_file_upload_with_document] Task failed for {filename}: {e}\n"
+            f"Traceback:\n{traceback.format_exc()}"
+        )
+        raise
+    finally:
+        loop.close()
+
+
+async def _mark_document_failed(document_id: int, reason: str):
+    """Mark a document as failed when task cannot proceed."""
+    from app.db import Document, DocumentStatus
+    from app.tasks.document_processors.base import get_current_timestamp
+
+    async with get_celery_session_maker()() as session:
+        document = await session.get(Document, document_id)
+        if document:
+            document.status = DocumentStatus.failed(reason)
+            document.updated_at = get_current_timestamp()
+            await session.commit()
+            logger.info(f"Marked document {document_id} as failed: {reason}")
+
+
+async def _process_file_with_document(
+    document_id: int,
+    temp_path: str,
+    filename: str,
+    search_space_id: int,
+    user_id: str,
+):
+    """
+    Process file and update existing pending document status.
+    
+    This function implements Phase 2 of the 2-phase document upload:
+    - Sets document status to 'processing' (shows spinner in UI)
+    - Processes the file (parsing, embedding, chunking)
+    - Updates document to 'ready' on success or 'failed' on error
+    """
+    import os
+
+    from app.db import Document, DocumentStatus
+    from app.tasks.document_processors.base import get_current_timestamp
+    from app.tasks.document_processors.file_processors import (
+        process_file_in_background_with_document,
+    )
+
+    logger.info(
+        f"[_process_file_with_document] Starting async processing for: {filename}"
+    )
+
+    async with get_celery_session_maker()() as session:
+        logger.info(
+            f"[_process_file_with_document] Database session created for: {filename}"
+        )
+        task_logger = TaskLoggingService(session, search_space_id)
+
+        # Get the document
+        document = await session.get(Document, document_id)
+        if not document:
+            logger.error(f"Document {document_id} not found")
+            return
+
+        # Get file size for notification metadata
+        try:
+            file_size = os.path.getsize(temp_path)
+            logger.info(f"[_process_file_with_document] File size: {file_size} bytes")
+        except Exception as e:
+            logger.warning(f"[_process_file_with_document] Could not get file size: {e}")
+            file_size = None
+
+        # Create notification for document processing
+        logger.info(f"[_process_file_with_document] Creating notification for: {filename}")
+        notification = (
+            await NotificationService.document_processing.notify_processing_started(
+                session=session,
+                user_id=UUID(user_id),
+                document_type="FILE",
+                document_name=filename,
+                search_space_id=search_space_id,
+                file_size=file_size,
+            )
+        )
+
+        log_entry = await task_logger.log_task_start(
+            task_name="process_file_upload_with_document",
+            source="document_processor",
+            message=f"Starting file processing for: {filename} (document_id: {document_id})",
+            metadata={
+                "document_type": "FILE",
+                "document_id": document_id,
+                "filename": filename,
+                "file_path": temp_path,
+                "user_id": user_id,
+            },
+        )
+
+        try:
+            # Set status to PROCESSING (shows spinner in UI via ElectricSQL)
+            document.status = DocumentStatus.processing()
+            await session.commit()
+            logger.info(
+                f"[_process_file_with_document] Document {document_id} status set to 'processing'"
+            )
+
+            # Process the file and update document
+            result = await process_file_in_background_with_document(
+                document=document,
+                file_path=temp_path,
+                filename=filename,
+                search_space_id=search_space_id,
+                user_id=user_id,
+                session=session,
+                task_logger=task_logger,
+                log_entry=log_entry,
+                notification=notification,
+            )
+
+            # Update notification on success
+            if result:
+                await (
+                    NotificationService.document_processing.notify_processing_completed(
+                        session=session,
+                        notification=notification,
+                        document_id=result.id,
+                        chunks_count=None,
+                    )
+                )
+                logger.info(
+                    f"[_process_file_with_document] Successfully processed document {document_id}"
+                )
+            else:
+                # Duplicate detected - mark as failed
+                document.status = DocumentStatus.failed("Duplicate content detected")
+                document.updated_at = get_current_timestamp()
+                await session.commit()
+                await (
+                    NotificationService.document_processing.notify_processing_completed(
+                        session=session,
+                        notification=notification,
+                        error_message="Document already exists (duplicate)",
+                    )
+                )
+
+        except Exception as e:
+            # Import here to avoid circular dependencies
+            from fastapi import HTTPException
+
+            from app.services.page_limit_service import PageLimitExceededError
+
+            # Check if this is a page limit error
+            page_limit_error: PageLimitExceededError | None = None
+            if isinstance(e, PageLimitExceededError):
+                page_limit_error = e
+            elif (
+                isinstance(e, HTTPException)
+                and e.__cause__
+                and isinstance(e.__cause__, PageLimitExceededError)
+            ):
+                page_limit_error = e.__cause__
+
+            # Mark document as failed (shows error in UI via ElectricSQL)
+            error_message = str(e)[:500]
+            document.status = DocumentStatus.failed(error_message)
+            document.updated_at = get_current_timestamp()
+            await session.commit()
+            logger.info(
+                f"[_process_file_with_document] Document {document_id} marked as failed: {error_message[:100]}"
+            )
+
+            # Handle page limit errors with dedicated notification
+            if page_limit_error is not None:
+                try:
+                    await session.refresh(notification)
+                    await NotificationService.document_processing.notify_processing_completed(
+                        session=session,
+                        notification=notification,
+                        error_message="Page limit exceeded",
+                    )
+                    await NotificationService.page_limit.notify_page_limit_exceeded(
+                        session=session,
+                        user_id=UUID(user_id),
+                        document_name=filename,
+                        document_type="FILE",
+                        search_space_id=search_space_id,
+                        pages_used=page_limit_error.pages_used,
+                        pages_limit=page_limit_error.pages_limit,
+                        pages_to_add=page_limit_error.pages_to_add,
+                    )
+                except Exception as notif_error:
+                    logger.error(
+                        f"Failed to create page limit notification: {notif_error!s}"
+                    )
+            else:
+                # Update notification on failure
+                try:
+                    await session.refresh(notification)
+                    await NotificationService.document_processing.notify_processing_completed(
+                        session=session,
+                        notification=notification,
+                        error_message=str(e)[:100],
+                    )
+                except Exception as notif_error:
+                    logger.error(
+                        f"Failed to update notification on failure: {notif_error!s}"
+                    )
+
+            await task_logger.log_task_failure(
+                log_entry,
+                error_message[:100],
+                str(e),
+                {"error_type": type(e).__name__, "document_id": document_id},
+            )
+            logger.error(f"Error processing file {filename}: {e!s}")
+            raise
+
+        finally:
+            # Clean up temp file
+            if os.path.exists(temp_path):
+                try:
+                    os.unlink(temp_path)
+                    logger.info(f"[_process_file_with_document] Cleaned up temp file: {temp_path}")
+                except Exception as cleanup_error:
+                    logger.warning(
+                        f"[_process_file_with_document] Failed to clean up temp file: {cleanup_error}"
+                    )
+
+
 @celery_app.task(name="process_circleback_meeting", bind=True)
 def process_circleback_meeting_task(
     self,
diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py
index 4433cb11e..e14dc3f42 100644
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@@ -33,6 +33,7 @@ from .base import (
     check_document_by_unique_identifier,
     check_duplicate_document,
     get_current_timestamp,
+    safe_set_chunks,
 )
 from .markdown_processor import add_received_markdown_file_document
 
@@ -1612,3 +1613,316 @@ async def process_file_in_background(
 
         logging.error(f"Error processing file in background: {error_message}")
         raise  # Re-raise so the wrapper can also handle it
+
+
+async def process_file_in_background_with_document(
+    document: Document,
+    file_path: str,
+    filename: str,
+    search_space_id: int,
+    user_id: str,
+    session: AsyncSession,
+    task_logger: TaskLoggingService,
+    log_entry: Log,
+    connector: dict | None = None,
+    notification: Notification | None = None,
+) -> Document | None:
+    """
+    Process file and update existing pending document (2-phase pattern).
+    
+    This function is Phase 2 of the real-time document status updates:
+    - Phase 1 (API): Created document with pending status
+    - Phase 2 (this): Process file and update document to ready/failed
+    
+    The document already exists with pending status. This function:
+    1. Parses the file content (markdown, audio, or ETL services)
+    2. Updates the document with content, embeddings, and chunks
+    3. Sets status to 'ready' on success
+    
+    Args:
+        document: Existing document with pending status
+        file_path: Path to the uploaded file
+        filename: Original filename
+        search_space_id: ID of the search space
+        user_id: ID of the user
+        session: Database session
+        task_logger: Task logging service
+        log_entry: Log entry for this task
+        connector: Optional connector info for Google Drive files
+        notification: Optional notification for progress updates
+    
+    Returns:
+        Updated Document object if successful, None if duplicate content detected
+    """
+    import os
+
+    from app.config import config as app_config
+    from app.services.llm_service import get_user_long_context_llm
+    from app.utils.blocknote_converter import convert_markdown_to_blocknote
+
+    try:
+        markdown_content = None
+        etl_service = None
+
+        # ===== STEP 1: Parse file content based on type =====
+        
+        # Check if the file is a markdown or text file
+        if filename.lower().endswith((".md", ".markdown", ".txt")):
+            # Update notification: parsing stage
+            if notification:
+                await NotificationService.document_processing.notify_processing_progress(
+                    session, notification, stage="parsing", stage_message="Reading file"
+                )
+
+            await task_logger.log_task_progress(
+                log_entry,
+                f"Processing markdown/text file: {filename}",
+                {"file_type": "markdown", "processing_stage": "reading_file"},
+            )
+
+            # Read markdown content directly
+            with open(file_path, encoding="utf-8") as f:
+                markdown_content = f.read()
+            etl_service = "MARKDOWN"
+
+            # Clean up temp file
+            with contextlib.suppress(Exception):
+                os.unlink(file_path)
+
+        # Check if the file is an audio file
+        elif filename.lower().endswith(
+            (".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm")
+        ):
+            # Update notification: parsing stage (transcription)
+            if notification:
+                await NotificationService.document_processing.notify_processing_progress(
+                    session, notification, stage="parsing", stage_message="Transcribing audio"
+                )
+
+            await task_logger.log_task_progress(
+                log_entry,
+                f"Processing audio file for transcription: {filename}",
+                {"file_type": "audio", "processing_stage": "starting_transcription"},
+            )
+
+            # Transcribe audio
+            stt_service_type = (
+                "local"
+                if app_config.STT_SERVICE and app_config.STT_SERVICE.startswith("local/")
+                else "external"
+            )
+
+            if stt_service_type == "local":
+                from app.services.stt_service import stt_service
+
+                result = stt_service.transcribe_file(file_path)
+                transcribed_text = result.get("text", "")
+                if not transcribed_text:
+                    raise ValueError("Transcription returned empty text")
+                markdown_content = f"# Transcription of {filename}\n\n{transcribed_text}"
+            else:
+                with open(file_path, "rb") as audio_file:
+                    transcription_kwargs = {
+                        "model": app_config.STT_SERVICE,
+                        "file": audio_file,
+                        "api_key": app_config.STT_SERVICE_API_KEY,
+                    }
+                    if app_config.STT_SERVICE_API_BASE:
+                        transcription_kwargs["api_base"] = app_config.STT_SERVICE_API_BASE
+                    transcription_response = await atranscription(**transcription_kwargs)
+                    transcribed_text = transcription_response.get("text", "")
+                    if not transcribed_text:
+                        raise ValueError("Transcription returned empty text")
+                markdown_content = f"# Transcription of {filename}\n\n{transcribed_text}"
+
+            etl_service = "AUDIO_TRANSCRIPTION"
+            # Clean up temp file
+            with contextlib.suppress(Exception):
+                os.unlink(file_path)
+
+        else:
+            # Document files - use ETL service
+            from app.services.page_limit_service import PageLimitExceededError, PageLimitService
+
+            page_limit_service = PageLimitService(session)
+
+            # Estimate page count
+            try:
+                estimated_pages = page_limit_service.estimate_pages_before_processing(file_path)
+            except Exception:
+                file_size = os.path.getsize(file_path)
+                estimated_pages = max(1, file_size // (80 * 1024))
+
+            # Check page limit
+            await page_limit_service.check_page_limit(user_id, estimated_pages)
+
+            if app_config.ETL_SERVICE == "UNSTRUCTURED":
+                if notification:
+                    await NotificationService.document_processing.notify_processing_progress(
+                        session, notification, stage="parsing", stage_message="Extracting content"
+                    )
+
+                from langchain_unstructured import UnstructuredLoader
+
+                loader = UnstructuredLoader(
+                    file_path, mode="elements", post_processors=[], languages=["eng"],
+                    include_orig_elements=False, include_metadata=False, strategy="auto"
+                )
+                docs = await loader.aload()
+                markdown_content = await convert_document_to_markdown(docs)
+                actual_pages = page_limit_service.estimate_pages_from_elements(docs)
+                final_page_count = max(estimated_pages, actual_pages)
+                etl_service = "UNSTRUCTURED"
+
+                # Update page usage
+                await page_limit_service.update_page_usage(user_id, final_page_count, allow_exceed=True)
+
+            elif app_config.ETL_SERVICE == "LLAMACLOUD":
+                if notification:
+                    await NotificationService.document_processing.notify_processing_progress(
+                        session, notification, stage="parsing", stage_message="Extracting content"
+                    )
+
+                result = await parse_with_llamacloud_retry(
+                    file_path=file_path, estimated_pages=estimated_pages,
+                    task_logger=task_logger, log_entry=log_entry
+                )
+                markdown_documents = await result.aget_markdown_documents(split_by_page=False)
+                if not markdown_documents:
+                    raise RuntimeError(f"LlamaCloud parsing returned no documents: {filename}")
+                markdown_content = markdown_documents[0].text
+                etl_service = "LLAMACLOUD"
+
+                # Update page usage
+                await page_limit_service.update_page_usage(user_id, estimated_pages, allow_exceed=True)
+
+            elif app_config.ETL_SERVICE == "DOCLING":
+                if notification:
+                    await NotificationService.document_processing.notify_processing_progress(
+                        session, notification, stage="parsing", stage_message="Extracting content"
+                    )
+
+                # Suppress logging during Docling import
+                getLogger("docling.pipeline.base_pipeline").setLevel(ERROR)
+                getLogger("docling.document_converter").setLevel(ERROR)
+                getLogger("docling_core.transforms.chunker.hierarchical_chunker").setLevel(ERROR)
+
+                from docling.document_converter import DocumentConverter
+
+                converter = DocumentConverter()
+                result = converter.convert(file_path)
+                markdown_content = result.document.export_to_markdown()
+                etl_service = "DOCLING"
+
+                # Update page usage
+                await page_limit_service.update_page_usage(user_id, estimated_pages, allow_exceed=True)
+
+            else:
+                raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}")
+
+            # Clean up temp file
+            with contextlib.suppress(Exception):
+                os.unlink(file_path)
+
+        if not markdown_content:
+            raise RuntimeError(f"Failed to extract content from file: {filename}")
+
+        # ===== STEP 2: Check for duplicate content =====
+        content_hash = generate_content_hash(markdown_content, search_space_id)
+        
+        existing_by_content = await check_duplicate_document(session, content_hash)
+        if existing_by_content and existing_by_content.id != document.id:
+            # Duplicate content found - mark this document as failed
+            logging.info(
+                f"Duplicate content detected for {filename}, "
+                f"matches document {existing_by_content.id}"
+            )
+            return None
+
+        # ===== STEP 3: Generate embeddings and chunks =====
+        if notification:
+            await NotificationService.document_processing.notify_processing_progress(
+                session, notification, stage="chunking"
+            )
+
+        user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
+        
+        if user_llm:
+            document_metadata = {
+                "file_name": filename,
+                "etl_service": etl_service,
+                "document_type": "File Document",
+            }
+            summary_content, summary_embedding = await generate_document_summary(
+                markdown_content, user_llm, document_metadata
+            )
+        else:
+            # Fallback: use truncated content as summary
+            summary_content = markdown_content[:4000]
+            from app.config import config
+
+            summary_embedding = config.embedding_model_instance.embed(summary_content)
+
+        chunks = await create_document_chunks(markdown_content)
+
+        # Convert to BlockNote for editing
+        blocknote_json = await convert_markdown_to_blocknote(markdown_content)
+
+        # ===== STEP 4: Update document to READY =====
+        from sqlalchemy.orm.attributes import flag_modified
+
+        document.title = filename
+        document.content = summary_content
+        document.content_hash = content_hash
+        document.embedding = summary_embedding
+        document.document_metadata = {
+            "FILE_NAME": filename,
+            "ETL_SERVICE": etl_service or "UNKNOWN",
+            **(document.document_metadata or {}),
+        }
+        flag_modified(document, "document_metadata")
+        
+        # Use safe_set_chunks to avoid async issues
+        safe_set_chunks(document, chunks)
+        
+        document.blocknote_document = blocknote_json
+        document.content_needs_reindexing = False
+        document.updated_at = get_current_timestamp()
+        document.status = DocumentStatus.ready()  # Shows checkmark in UI
+
+        await session.commit()
+        await session.refresh(document)
+
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully processed file: {filename}",
+            {
+                "document_id": document.id,
+                "content_hash": content_hash,
+                "file_type": etl_service,
+                "chunks_count": len(chunks),
+            },
+        )
+
+        return document
+
+    except Exception as e:
+        await session.rollback()
+
+        from app.services.page_limit_service import PageLimitExceededError
+
+        if isinstance(e, PageLimitExceededError):
+            error_message = str(e)
+        elif isinstance(e, HTTPException) and "page limit" in str(e.detail).lower():
+            error_message = str(e.detail)
+        else:
+            error_message = f"Failed to process file: {filename}"
+
+        await task_logger.log_task_failure(
+            log_entry,
+            error_message,
+            str(e),
+            {"error_type": type(e).__name__, "filename": filename, "document_id": document.id},
+        )
+        logging.error(f"Error processing file with document: {error_message}")
+        raise

From 00a617ef179cba71a9dad23f8e2d1fdb4493aea6 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 05:31:45 +0530
Subject: [PATCH 32/36] feat: enhance stale notification cleanup task to mark
 associated documents as failed

---
 .../stale_notification_cleanup_task.py        | 142 +++++++++++++++---
 1 file changed, 124 insertions(+), 18 deletions(-)

diff --git a/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py b/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py
index 9041655b0..ff7a11645 100644
--- a/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py
+++ b/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py
@@ -4,33 +4,41 @@ This task runs periodically (every 5 minutes by default) to find notifications
 that are stuck in "in_progress" status but don't have an active Redis heartbeat key.
 These are marked as "failed" to prevent the frontend from showing a perpetual "syncing" state.
 
+Additionally, it cleans up documents stuck in pending/processing state that belong
+to connectors with stale notifications.
+
 Detection mechanism:
 - Active indexing tasks set a Redis key with TTL (2 minutes) as a heartbeat
 - If the task crashes, the Redis key expires automatically
 - This cleanup task checks for in-progress notifications without a Redis heartbeat key
 - Such notifications are marked as failed with O(1) batch UPDATE
+- Documents with pending/processing status for those connectors are also marked as failed
 """
 
+import contextlib
 import json
 import logging
 import os
 from datetime import UTC, datetime
 
 import redis
-from sqlalchemy import and_, text
+from sqlalchemy import and_, or_, text
 from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
 from sqlalchemy.future import select
 from sqlalchemy.pool import NullPool
 
 from app.celery_app import celery_app
 from app.config import config
-from app.db import Notification
+from app.db import Document, DocumentStatus, Notification
 
 logger = logging.getLogger(__name__)
 
 # Redis client for checking heartbeats
 _redis_client: redis.Redis | None = None
 
+# Error message shown to users when sync is interrupted
+STALE_SYNC_ERROR_MESSAGE = "Sync was interrupted unexpectedly. Please retry."
+
 
 def get_redis_client() -> redis.Redis:
     """Get or create Redis client for heartbeat checking."""
@@ -70,6 +78,7 @@ def cleanup_stale_indexing_notifications_task():
     - Do NOT have a corresponding Redis heartbeat key (meaning task crashed)
 
     And marks them as failed with O(1) batch UPDATE.
+    Also marks associated pending/processing documents as failed.
     """
     import asyncio
 
@@ -86,15 +95,20 @@ async def _cleanup_stale_notifications():
     """Find and mark stale connector indexing notifications as failed.
 
     Uses Redis TTL-based detection:
-    1. Find all in-progress notifications
+    1. Find all in-progress notifications with their connector_id
     2. Check which ones are missing their Redis heartbeat key
     3. Mark those as failed with O(1) batch UPDATE using JSONB || operator
+    4. Mark associated documents (pending/processing) as failed
     """
     async with get_celery_session_maker()() as session:
         try:
             # Find all in-progress connector indexing notifications
+            # Fetch full metadata to properly extract connector_id
             result = await session.execute(
-                select(Notification.id).where(
+                select(
+                    Notification.id,
+                    Notification.notification_metadata,
+                ).where(
                     and_(
                         Notification.type == "connector_indexing",
                         Notification.notification_metadata["status"].astext
@@ -102,24 +116,37 @@ async def _cleanup_stale_notifications():
                     )
                 )
             )
-            in_progress_ids = [row[0] for row in result.fetchall()]
+            in_progress_rows = result.fetchall()
 
-            if not in_progress_ids:
+            if not in_progress_rows:
                 logger.debug("No in-progress connector indexing notifications found")
                 return
 
             # Check which ones are missing heartbeat keys in Redis
             redis_client = get_redis_client()
             stale_notification_ids = []
+            stale_connector_ids = []
 
-            for notification_id in in_progress_ids:
+            for row in in_progress_rows:
+                notification_id = row[0]
+                metadata = row[1]  # Full metadata dict
                 heartbeat_key = _get_heartbeat_key(notification_id)
                 if not redis_client.exists(heartbeat_key):
                     stale_notification_ids.append(notification_id)
+                    # Extract connector_id from metadata dict for document cleanup
+                    if metadata and isinstance(metadata, dict):
+                        connector_id = metadata.get("connector_id")
+                        logger.debug(
+                            f"Notification {notification_id} metadata: {metadata}, "
+                            f"connector_id: {connector_id}"
+                        )
+                        if connector_id is not None:
+                            with contextlib.suppress(ValueError, TypeError):
+                                stale_connector_ids.append(int(connector_id))
 
             if not stale_notification_ids:
                 logger.debug(
-                    f"All {len(in_progress_ids)} in-progress notifications have active Redis heartbeats"
+                    f"All {len(in_progress_rows)} in-progress notifications have active Redis heartbeats"
                 )
                 return
 
@@ -127,18 +154,17 @@ async def _cleanup_stale_notifications():
                 f"Found {len(stale_notification_ids)} stale connector indexing notifications "
                 f"(no Redis heartbeat key): {stale_notification_ids}"
             )
-
-            # O(1) Batch UPDATE using JSONB || operator
-            # This merges the update data into existing notification_metadata
-            # Also updates title and message for proper UI display
-            error_message = (
-                "Something went wrong while syncing your content. Please retry."
+            logger.info(
+                f"Connector IDs for document cleanup: {stale_connector_ids}"
             )
 
+            # O(1) Batch UPDATE notifications using JSONB || operator
+            # This merges the update data into existing notification_metadata
+            # Also updates title and message for proper UI display
             update_data = {
                 "status": "failed",
                 "completed_at": datetime.now(UTC).isoformat(),
-                "error_message": error_message,
+                "error_message": STALE_SYNC_ERROR_MESSAGE,
                 "sync_stage": "failed",
             }
 
@@ -152,16 +178,96 @@ async def _cleanup_stale_notifications():
                 """),
                 {
                     "update_json": json.dumps(update_data),
-                    "display_message": f"{error_message}",
+                    "display_message": STALE_SYNC_ERROR_MESSAGE,
                     "ids": stale_notification_ids,
                 },
             )
 
-            await session.commit()
             logger.info(
-                f"Successfully marked {len(stale_notification_ids)} stale notifications as failed (batch UPDATE)"
+                f"Successfully marked {len(stale_notification_ids)} stale notifications as failed"
             )
 
+            # ===== Clean up stuck documents for stale connectors =====
+            if stale_connector_ids:
+                await _cleanup_stuck_documents(session, stale_connector_ids)
+
+            await session.commit()
+
         except Exception as e:
             logger.error(f"Error cleaning up stale notifications: {e!s}", exc_info=True)
             await session.rollback()
+
+
+async def _cleanup_stuck_documents(session, connector_ids: list[int]):
+    """
+    Mark documents stuck in pending/processing state as failed for given connectors.
+
+    This ensures that when a connector sync is interrupted, all partially-processed
+    documents are marked with a clear error state instead of being stuck indefinitely.
+
+    Args:
+        session: Database session
+        connector_ids: List of connector IDs whose documents should be cleaned up
+    """
+    if not connector_ids:
+        return
+
+    try:
+        # Count documents that will be affected (for logging)
+        count_result = await session.execute(
+            select(Document.id).where(
+                and_(
+                    Document.connector_id.in_(connector_ids),
+                    or_(
+                        Document.status["state"].astext == DocumentStatus.PENDING,
+                        Document.status["state"].astext == DocumentStatus.PROCESSING,
+                    ),
+                )
+            )
+        )
+        stuck_doc_ids = [row[0] for row in count_result.fetchall()]
+
+        if not stuck_doc_ids:
+            logger.debug(f"No stuck documents found for connector IDs: {connector_ids}")
+            return
+
+        logger.warning(
+            f"Found {len(stuck_doc_ids)} stuck documents (pending/processing) "
+            f"for connector IDs {connector_ids}: {stuck_doc_ids[:20]}..."  # Log first 20
+        )
+
+        # O(1) Batch UPDATE: Mark all stuck documents as failed using JSONB
+        # The error message matches what we show in notifications
+        failed_status = DocumentStatus.failed(STALE_SYNC_ERROR_MESSAGE)
+
+        await session.execute(
+            text("""
+                UPDATE documents 
+                SET status = CAST(:failed_status AS jsonb),
+                    updated_at = :now
+                WHERE connector_id = ANY(:connector_ids)
+                  AND (
+                      status->>'state' = :pending_state
+                      OR status->>'state' = :processing_state
+                  )
+            """),
+            {
+                "failed_status": json.dumps(failed_status),
+                "now": datetime.now(UTC),
+                "connector_ids": connector_ids,
+                "pending_state": DocumentStatus.PENDING,
+                "processing_state": DocumentStatus.PROCESSING,
+            },
+        )
+
+        logger.info(
+            f"Successfully marked {len(stuck_doc_ids)} stuck documents as failed "
+            f"for connector IDs: {connector_ids}"
+        )
+
+    except Exception as e:
+        logger.error(
+            f"Error cleaning up stuck documents for connectors {connector_ids}: {e!s}",
+            exc_info=True,
+        )
+        # Don't raise - let the notification cleanup continue even if document cleanup fails

From aa66928154aacb1e2f8a0fdc4cdc4679d9d2d0b0 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 05:35:15 +0530
Subject: [PATCH 33/36] chore: ran linting

---
 .../versions/92_add_document_status_column.py |   3 -
 .../connectors/composio_gmail_connector.py    | 104 +--
 .../composio_google_calendar_connector.py     |  99 ++-
 .../composio_google_drive_connector.py        | 164 ++--
 surfsense_backend/app/db.py                   |  29 +-
 .../app/routes/documents_routes.py            |  24 +-
 surfsense_backend/app/schemas/documents.py    |   9 +-
 .../app/services/connector_service.py         |  12 +-
 .../app/tasks/celery_tasks/document_tasks.py  |  18 +-
 .../stale_notification_cleanup_task.py        |   4 +-
 .../connector_indexers/airtable_indexer.py    | 105 ++-
 .../app/tasks/connector_indexers/base.py      |  13 +-
 .../connector_indexers/bookstack_indexer.py   | 116 +--
 .../connector_indexers/clickup_indexer.py     | 124 +--
 .../connector_indexers/confluence_indexer.py  |  96 ++-
 .../connector_indexers/discord_indexer.py     | 100 ++-
 .../elasticsearch_indexer.py                  |  90 +-
 .../connector_indexers/github_indexer.py      |  74 +-
 .../google_calendar_indexer.py                | 119 +--
 .../google_drive_indexer.py                   |  67 +-
 .../google_gmail_indexer.py                   | 101 ++-
 .../tasks/connector_indexers/jira_indexer.py  |  94 ++-
 .../connector_indexers/linear_indexer.py      | 107 ++-
 .../tasks/connector_indexers/luma_indexer.py  | 148 ++--
 .../connector_indexers/notion_indexer.py      |  73 +-
 .../connector_indexers/obsidian_indexer.py    |  96 ++-
 .../tasks/connector_indexers/slack_indexer.py |  86 +-
 .../tasks/connector_indexers/teams_indexer.py |  98 ++-
 .../connector_indexers/webcrawler_indexer.py  |  67 +-
 .../app/tasks/document_processors/base.py     |  13 +-
 .../circleback_processor.py                   |  18 +-
 .../document_processors/file_processors.py    | 124 ++-
 .../document_processors/youtube_processor.py  |  24 +-
 .../(manage)/components/DocumentTypeIcon.tsx  |   4 +-
 .../(manage)/components/DocumentsFilters.tsx  | 192 +++--
 .../components/DocumentsTableShell.tsx        |  82 +-
 .../(manage)/components/RowActions.tsx        |  34 +-
 .../documents/(manage)/page.tsx               |  68 +-
 .../connector-dialog.atoms.ts                 |   1 -
 .../assistant-ui/connector-popup.tsx          |   4 +-
 .../hooks/use-connector-dialog.ts             |  36 +-
 .../components/theme/theme-toggle.tsx         | 787 +++++++++---------
 surfsense_web/hooks/use-documents.ts          |  31 +-
 surfsense_web/lib/electric/client.ts          |  25 +-
 44 files changed, 2025 insertions(+), 1658 deletions(-)

diff --git a/surfsense_backend/alembic/versions/92_add_document_status_column.py b/surfsense_backend/alembic/versions/92_add_document_status_column.py
index 550faa3c3..8204096aa 100644
--- a/surfsense_backend/alembic/versions/92_add_document_status_column.py
+++ b/surfsense_backend/alembic/versions/92_add_document_status_column.py
@@ -13,8 +13,6 @@ Changes:
 
 from collections.abc import Sequence
 
-import sqlalchemy as sa
-
 from alembic import op
 
 # revision identifiers, used by Alembic.
@@ -77,4 +75,3 @@ def downgrade() -> None:
         END$$;
         """
     )
-
diff --git a/surfsense_backend/app/connectors/composio_gmail_connector.py b/surfsense_backend/app/connectors/composio_gmail_connector.py
index 870053c7f..4764a0a41 100644
--- a/surfsense_backend/app/connectors/composio_gmail_connector.py
+++ b/surfsense_backend/app/connectors/composio_gmail_connector.py
@@ -285,24 +285,28 @@ async def _analyze_gmail_messages_phase1(
             if existing_document:
                 if existing_document.content_hash == content_hash:
                     # Ensure status is ready (might have been stuck in processing/pending)
-                    if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                    if not DocumentStatus.is_state(
+                        existing_document.status, DocumentStatus.READY
+                    ):
                         existing_document.status = DocumentStatus.ready()
                     documents_skipped += 1
                     continue
 
                 # Queue existing document for update (will be set to processing in Phase 2)
-                messages_to_process.append({
-                    'document': existing_document,
-                    'is_new': False,
-                    'markdown_content': markdown_content,
-                    'content_hash': content_hash,
-                    'message_id': message_id,
-                    'thread_id': thread_id,
-                    'subject': subject,
-                    'sender': sender,
-                    'date_str': date_str,
-                    'label_ids': label_ids,
-                })
+                messages_to_process.append(
+                    {
+                        "document": existing_document,
+                        "is_new": False,
+                        "markdown_content": markdown_content,
+                        "content_hash": content_hash,
+                        "message_id": message_id,
+                        "thread_id": thread_id,
+                        "subject": subject,
+                        "sender": sender,
+                        "date_str": date_str,
+                        "label_ids": label_ids,
+                    }
+                )
                 continue
 
             # Document doesn't exist by unique_identifier_hash
@@ -350,18 +354,20 @@ async def _analyze_gmail_messages_phase1(
             )
             session.add(document)
 
-            messages_to_process.append({
-                'document': document,
-                'is_new': True,
-                'markdown_content': markdown_content,
-                'content_hash': content_hash,
-                'message_id': message_id,
-                'thread_id': thread_id,
-                'subject': subject,
-                'sender': sender,
-                'date_str': date_str,
-                'label_ids': label_ids,
-            })
+            messages_to_process.append(
+                {
+                    "document": document,
+                    "is_new": True,
+                    "markdown_content": markdown_content,
+                    "content_hash": content_hash,
+                    "message_id": message_id,
+                    "thread_id": thread_id,
+                    "subject": subject,
+                    "sender": sender,
+                    "date_str": date_str,
+                    "label_ids": label_ids,
+                }
+            )
 
         except Exception as e:
             logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
@@ -398,7 +404,7 @@ async def _process_gmail_messages_phase2(
                 await on_heartbeat_callback(documents_indexed)
                 last_heartbeat_time = current_time
 
-        document = item['document']
+        document = item["document"]
         try:
             # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
             document.status = DocumentStatus.processing()
@@ -411,37 +417,35 @@ async def _process_gmail_messages_phase2(
 
             if user_llm:
                 document_metadata_for_summary = {
-                    "message_id": item['message_id'],
-                    "thread_id": item['thread_id'],
-                    "subject": item['subject'],
-                    "sender": item['sender'],
+                    "message_id": item["message_id"],
+                    "thread_id": item["thread_id"],
+                    "subject": item["subject"],
+                    "sender": item["sender"],
                     "document_type": "Gmail Message (Composio)",
                 }
                 summary_content, summary_embedding = await generate_document_summary(
-                    item['markdown_content'], user_llm, document_metadata_for_summary
+                    item["markdown_content"], user_llm, document_metadata_for_summary
                 )
             else:
-                summary_content = (
-                    f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}"
-                )
+                summary_content = f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}"
                 summary_embedding = config.embedding_model_instance.embed(
                     summary_content
                 )
 
-            chunks = await create_document_chunks(item['markdown_content'])
+            chunks = await create_document_chunks(item["markdown_content"])
 
             # Update document to READY with actual content
-            document.title = item['subject']
+            document.title = item["subject"]
             document.content = summary_content
-            document.content_hash = item['content_hash']
+            document.content_hash = item["content_hash"]
             document.embedding = summary_embedding
             document.document_metadata = {
-                "message_id": item['message_id'],
-                "thread_id": item['thread_id'],
-                "subject": item['subject'],
-                "sender": item['sender'],
-                "date": item['date_str'],
-                "labels": item['label_ids'],
+                "message_id": item["message_id"],
+                "thread_id": item["thread_id"],
+                "subject": item["subject"],
+                "sender": item["sender"],
+                "date": item["date_str"],
+                "labels": item["label_ids"],
                 "connector_id": connector_id,
                 "source": "composio",
             }
@@ -465,7 +469,9 @@ async def _process_gmail_messages_phase2(
                 document.status = DocumentStatus.failed(str(e))
                 document.updated_at = get_current_timestamp()
             except Exception as status_error:
-                logger.error(f"Failed to update document status to failed: {status_error}")
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
             documents_failed += 1
             continue
 
@@ -571,7 +577,9 @@ async def index_composio_gmail(
                 )
 
             all_messages.extend(messages)
-            logger.info(f"Fetched {len(messages)} messages (total: {len(all_messages)})")
+            logger.info(
+                f"Fetched {len(messages)} messages (total: {len(all_messages)})"
+            )
 
             if not next_token or len(messages) < current_batch_size:
                 break
@@ -616,7 +624,7 @@ async def index_composio_gmail(
         )
 
         # Commit all pending documents - they all appear in UI now
-        new_documents_count = len([m for m in messages_to_process if m['is_new']])
+        new_documents_count = len([m for m in messages_to_process if m["is_new"]])
         if new_documents_count > 0:
             logger.info(f"Phase 1: Committing {new_documents_count} pending documents")
             await session.commit()
@@ -645,9 +653,7 @@ async def index_composio_gmail(
         await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit to ensure all documents are persisted
-        logger.info(
-            f"Final commit: Total {documents_indexed} Gmail messages processed"
-        )
+        logger.info(f"Final commit: Total {documents_indexed} Gmail messages processed")
         try:
             await session.commit()
             logger.info(
diff --git a/surfsense_backend/app/connectors/composio_google_calendar_connector.py b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
index dc9c18c99..6593721a1 100644
--- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@@ -268,7 +268,9 @@ async def index_composio_google_calendar(
         documents_indexed = 0
         documents_skipped = 0
         documents_failed = 0  # Track events that failed processing
-        duplicate_content_count = 0  # Track events skipped due to duplicate content_hash
+        duplicate_content_count = (
+            0  # Track events skipped due to duplicate content_hash
+        )
         last_heartbeat_time = time.time()
 
         # =======================================================================
@@ -317,23 +319,27 @@ async def index_composio_google_calendar(
                 if existing_document:
                     if existing_document.content_hash == content_hash:
                         # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
                             existing_document.status = DocumentStatus.ready()
                         documents_skipped += 1
                         continue
 
                     # Queue existing document for update (will be set to processing in Phase 2)
-                    events_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'markdown_content': markdown_content,
-                        'content_hash': content_hash,
-                        'event_id': event_id,
-                        'summary': summary,
-                        'start_time': start_time,
-                        'end_time': end_time,
-                        'location': location,
-                    })
+                    events_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "markdown_content": markdown_content,
+                            "content_hash": content_hash,
+                            "event_id": event_id,
+                            "summary": summary,
+                            "start_time": start_time,
+                            "end_time": end_time,
+                            "location": location,
+                        }
+                    )
                     continue
 
                 # Document doesn't exist by unique_identifier_hash
@@ -383,17 +389,19 @@ async def index_composio_google_calendar(
                 session.add(document)
                 new_documents_created = True
 
-                events_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'markdown_content': markdown_content,
-                    'content_hash': content_hash,
-                    'event_id': event_id,
-                    'summary': summary,
-                    'start_time': start_time,
-                    'end_time': end_time,
-                    'location': location,
-                })
+                events_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "markdown_content": markdown_content,
+                        "content_hash": content_hash,
+                        "event_id": event_id,
+                        "summary": summary,
+                        "start_time": start_time,
+                        "end_time": end_time,
+                        "location": location,
+                    }
+                )
 
             except Exception as e:
                 logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
@@ -402,7 +410,9 @@ async def index_composio_google_calendar(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -419,7 +429,7 @@ async def index_composio_google_calendar(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
@@ -432,35 +442,40 @@ async def index_composio_google_calendar(
 
                 if user_llm:
                     document_metadata_for_summary = {
-                        "event_id": item['event_id'],
-                        "summary": item['summary'],
-                        "start_time": item['start_time'],
+                        "event_id": item["event_id"],
+                        "summary": item["summary"],
+                        "start_time": item["start_time"],
                         "document_type": "Google Calendar Event (Composio)",
                     }
-                    summary_content, summary_embedding = await generate_document_summary(
-                        item['markdown_content'], user_llm, document_metadata_for_summary
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["markdown_content"],
+                        user_llm,
+                        document_metadata_for_summary,
                     )
                 else:
                     summary_content = f"Calendar: {item['summary']}\n\nStart: {item['start_time']}\nEnd: {item['end_time']}"
-                    if item['location']:
+                    if item["location"]:
                         summary_content += f"\nLocation: {item['location']}"
                     summary_embedding = config.embedding_model_instance.embed(
                         summary_content
                     )
 
-                chunks = await create_document_chunks(item['markdown_content'])
+                chunks = await create_document_chunks(item["markdown_content"])
 
                 # Update document to READY with actual content
-                document.title = item['summary']
+                document.title = item["summary"]
                 document.content = summary_content
-                document.content_hash = item['content_hash']
+                document.content_hash = item["content_hash"]
                 document.embedding = summary_embedding
                 document.document_metadata = {
-                    "event_id": item['event_id'],
-                    "summary": item['summary'],
-                    "start_time": item['start_time'],
-                    "end_time": item['end_time'],
-                    "location": item['location'],
+                    "event_id": item["event_id"],
+                    "summary": item["summary"],
+                    "start_time": item["start_time"],
+                    "end_time": item["end_time"],
+                    "location": item["location"],
                     "connector_id": connector_id,
                     "source": "composio",
                 }
@@ -484,7 +499,9 @@ async def index_composio_google_calendar(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 documents_failed += 1
                 continue
 
diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py
index 26cfd3020..4ccd195e6 100644
--- a/surfsense_backend/app/connectors/composio_google_drive_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py
@@ -938,13 +938,15 @@ async def _index_composio_drive_delta_sync(
 
             if existing_document:
                 # Queue existing document for update
-                files_to_process.append({
-                    'document': existing_document,
-                    'is_new': False,
-                    'file_id': file_id,
-                    'file_name': file_name,
-                    'mime_type': mime_type,
-                })
+                files_to_process.append(
+                    {
+                        "document": existing_document,
+                        "is_new": False,
+                        "file_id": file_id,
+                        "file_name": file_name,
+                        "mime_type": mime_type,
+                    }
+                )
                 continue
 
             # Create new document with PENDING status
@@ -974,13 +976,15 @@ async def _index_composio_drive_delta_sync(
             session.add(document)
             new_documents_created = True
 
-            files_to_process.append({
-                'document': document,
-                'is_new': True,
-                'file_id': file_id,
-                'file_name': file_name,
-                'mime_type': mime_type,
-            })
+            files_to_process.append(
+                {
+                    "document": document,
+                    "is_new": True,
+                    "file_id": file_id,
+                    "file_name": file_name,
+                    "mime_type": mime_type,
+                }
+            )
 
         except Exception as e:
             logger.error(f"Error in Phase 1 for change: {e!s}", exc_info=True)
@@ -989,7 +993,9 @@ async def _index_composio_drive_delta_sync(
 
     # Commit all pending documents - they all appear in UI now
     if new_documents_created:
-        logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents")
+        logger.info(
+            f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
+        )
         await session.commit()
 
     # =======================================================================
@@ -1005,7 +1011,7 @@ async def _index_composio_drive_delta_sync(
                 await on_heartbeat_callback(documents_indexed)
                 last_heartbeat_time = current_time
 
-        document = item['document']
+        document = item["document"]
         try:
             # Set to PROCESSING and commit
             document.status = DocumentStatus.processing()
@@ -1013,11 +1019,13 @@ async def _index_composio_drive_delta_sync(
 
             # Get file content
             content, content_error = await composio_connector.get_drive_file_content(
-                item['file_id'], original_mime_type=item['mime_type']
+                item["file_id"], original_mime_type=item["mime_type"]
             )
 
             if content_error or not content:
-                logger.warning(f"Could not get content for file {item['file_name']}: {content_error}")
+                logger.warning(
+                    f"Could not get content for file {item['file_name']}: {content_error}"
+                )
                 markdown_content = f"# {item['file_name']}\n\n"
                 markdown_content += f"**File ID:** {item['file_id']}\n"
                 markdown_content += f"**Type:** {item['mime_type']}\n"
@@ -1031,9 +1039,9 @@ async def _index_composio_drive_delta_sync(
             else:
                 markdown_content = await _process_file_content(
                     content=content,
-                    file_name=item['file_name'],
-                    file_id=item['file_id'],
-                    mime_type=item['mime_type'],
+                    file_name=item["file_name"],
+                    file_id=item["file_id"],
+                    mime_type=item["mime_type"],
                     search_space_id=search_space_id,
                     user_id=user_id,
                     session=session,
@@ -1045,14 +1053,14 @@ async def _index_composio_drive_delta_sync(
             content_hash = generate_content_hash(markdown_content, search_space_id)
 
             # For existing documents, check if content changed
-            if not item['is_new'] and document.content_hash == content_hash:
+            if not item["is_new"] and document.content_hash == content_hash:
                 if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
                     document.status = DocumentStatus.ready()
                 documents_skipped += 1
                 continue
 
             # Check for duplicate content hash (for new documents)
-            if item['is_new']:
+            if item["is_new"]:
                 with session.no_autoflush:
                     duplicate_by_content = await check_duplicate_document_by_hash(
                         session, content_hash
@@ -1067,13 +1075,15 @@ async def _index_composio_drive_delta_sync(
                     continue
 
             # Heavy processing (LLM, embeddings, chunks)
-            user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
+            user_llm = await get_user_long_context_llm(
+                session, user_id, search_space_id
+            )
 
             if user_llm:
                 document_metadata_for_summary = {
-                    "file_id": item['file_id'],
-                    "file_name": item['file_name'],
-                    "mime_type": item['mime_type'],
+                    "file_id": item["file_id"],
+                    "file_name": item["file_name"],
+                    "mime_type": item["mime_type"],
                     "document_type": "Google Drive File (Composio)",
                 }
                 summary_content, summary_embedding = await generate_document_summary(
@@ -1081,20 +1091,22 @@ async def _index_composio_drive_delta_sync(
                 )
             else:
                 summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
-                summary_embedding = config.embedding_model_instance.embed(summary_content)
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
 
             chunks = await create_document_chunks(markdown_content)
 
             # Update document to READY
-            document.title = item['file_name']
+            document.title = item["file_name"]
             document.content = summary_content
             document.content_hash = content_hash
             document.embedding = summary_embedding
             document.document_metadata = {
-                "file_id": item['file_id'],
-                "file_name": item['file_name'],
-                "FILE_NAME": item['file_name'],
-                "mime_type": item['mime_type'],
+                "file_id": item["file_id"],
+                "file_name": item["file_name"],
+                "FILE_NAME": item["file_name"],
+                "mime_type": item["mime_type"],
                 "connector_id": connector_id,
                 "source": "composio",
             }
@@ -1117,7 +1129,9 @@ async def _index_composio_drive_delta_sync(
                 document.status = DocumentStatus.failed(str(e))
                 document.updated_at = get_current_timestamp()
             except Exception as status_error:
-                logger.error(f"Failed to update document status to failed: {status_error}")
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
             documents_failed += 1
             continue
 
@@ -1329,13 +1343,15 @@ async def _index_composio_drive_full_scan(
 
             if existing_document:
                 # Queue existing document for update (will be set to processing in Phase 2)
-                files_to_process.append({
-                    'document': existing_document,
-                    'is_new': False,
-                    'file_id': file_id,
-                    'file_name': file_name,
-                    'mime_type': mime_type,
-                })
+                files_to_process.append(
+                    {
+                        "document": existing_document,
+                        "is_new": False,
+                        "file_id": file_id,
+                        "file_name": file_name,
+                        "mime_type": mime_type,
+                    }
+                )
                 continue
 
             # Create new document with PENDING status (visible in UI immediately)
@@ -1365,13 +1381,15 @@ async def _index_composio_drive_full_scan(
             session.add(document)
             new_documents_created = True
 
-            files_to_process.append({
-                'document': document,
-                'is_new': True,
-                'file_id': file_id,
-                'file_name': file_name,
-                'mime_type': mime_type,
-            })
+            files_to_process.append(
+                {
+                    "document": document,
+                    "is_new": True,
+                    "file_id": file_id,
+                    "file_name": file_name,
+                    "mime_type": mime_type,
+                }
+            )
 
         except Exception as e:
             logger.error(f"Error in Phase 1 for file: {e!s}", exc_info=True)
@@ -1380,7 +1398,9 @@ async def _index_composio_drive_full_scan(
 
     # Commit all pending documents - they all appear in UI now
     if new_documents_created:
-        logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents")
+        logger.info(
+            f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
+        )
         await session.commit()
 
     # =======================================================================
@@ -1397,7 +1417,7 @@ async def _index_composio_drive_full_scan(
                 await on_heartbeat_callback(documents_indexed)
                 last_heartbeat_time = current_time
 
-        document = item['document']
+        document = item["document"]
         try:
             # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
             document.status = DocumentStatus.processing()
@@ -1405,11 +1425,13 @@ async def _index_composio_drive_full_scan(
 
             # Get file content (pass mime_type for Google Workspace export handling)
             content, content_error = await composio_connector.get_drive_file_content(
-                item['file_id'], original_mime_type=item['mime_type']
+                item["file_id"], original_mime_type=item["mime_type"]
             )
 
             if content_error or not content:
-                logger.warning(f"Could not get content for file {item['file_name']}: {content_error}")
+                logger.warning(
+                    f"Could not get content for file {item['file_name']}: {content_error}"
+                )
                 markdown_content = f"# {item['file_name']}\n\n"
                 markdown_content += f"**File ID:** {item['file_id']}\n"
                 markdown_content += f"**Type:** {item['mime_type']}\n"
@@ -1424,9 +1446,9 @@ async def _index_composio_drive_full_scan(
                 # Process content based on file type
                 markdown_content = await _process_file_content(
                     content=content,
-                    file_name=item['file_name'],
-                    file_id=item['file_id'],
-                    mime_type=item['mime_type'],
+                    file_name=item["file_name"],
+                    file_id=item["file_id"],
+                    mime_type=item["mime_type"],
                     search_space_id=search_space_id,
                     user_id=user_id,
                     session=session,
@@ -1438,7 +1460,7 @@ async def _index_composio_drive_full_scan(
             content_hash = generate_content_hash(markdown_content, search_space_id)
 
             # For existing documents, check if content changed
-            if not item['is_new'] and document.content_hash == content_hash:
+            if not item["is_new"] and document.content_hash == content_hash:
                 # Ensure status is ready
                 if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
                     document.status = DocumentStatus.ready()
@@ -1446,7 +1468,7 @@ async def _index_composio_drive_full_scan(
                 continue
 
             # Check for duplicate content hash (for new documents)
-            if item['is_new']:
+            if item["is_new"]:
                 with session.no_autoflush:
                     duplicate_by_content = await check_duplicate_document_by_hash(
                         session, content_hash
@@ -1462,13 +1484,15 @@ async def _index_composio_drive_full_scan(
                     continue
 
             # Heavy processing (LLM, embeddings, chunks)
-            user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
+            user_llm = await get_user_long_context_llm(
+                session, user_id, search_space_id
+            )
 
             if user_llm:
                 document_metadata_for_summary = {
-                    "file_id": item['file_id'],
-                    "file_name": item['file_name'],
-                    "mime_type": item['mime_type'],
+                    "file_id": item["file_id"],
+                    "file_name": item["file_name"],
+                    "mime_type": item["mime_type"],
                     "document_type": "Google Drive File (Composio)",
                 }
                 summary_content, summary_embedding = await generate_document_summary(
@@ -1476,20 +1500,22 @@ async def _index_composio_drive_full_scan(
                 )
             else:
                 summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
-                summary_embedding = config.embedding_model_instance.embed(summary_content)
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
 
             chunks = await create_document_chunks(markdown_content)
 
             # Update document to READY with actual content
-            document.title = item['file_name']
+            document.title = item["file_name"]
             document.content = summary_content
             document.content_hash = content_hash
             document.embedding = summary_embedding
             document.document_metadata = {
-                "file_id": item['file_id'],
-                "file_name": item['file_name'],
-                "FILE_NAME": item['file_name'],
-                "mime_type": item['mime_type'],
+                "file_id": item["file_id"],
+                "file_name": item["file_name"],
+                "FILE_NAME": item["file_name"],
+                "mime_type": item["mime_type"],
                 "connector_id": connector_id,
                 "source": "composio",
             }
@@ -1515,7 +1541,9 @@ async def _index_composio_drive_full_scan(
                 document.status = DocumentStatus.failed(str(e))
                 document.updated_at = get_current_timestamp()
             except Exception as status_error:
-                logger.error(f"Failed to update document status to failed: {status_error}")
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
             documents_failed += 1
             continue
 
diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index fb5c711ed..344d83f13 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -103,67 +103,70 @@ class PodcastStatus(str, Enum):
 class DocumentStatus:
     """
     Helper class for document processing status (stored as JSONB).
-    
+
     Status values:
     - {"state": "ready"} - Document is fully processed and searchable
     - {"state": "pending"} - Document is queued, waiting to be processed
     - {"state": "processing"} - Document is currently being processed (only 1 at a time)
     - {"state": "failed", "reason": "..."} - Processing failed with reason
-    
+
     Usage:
         document.status = DocumentStatus.pending()
         document.status = DocumentStatus.processing()
         document.status = DocumentStatus.ready()
         document.status = DocumentStatus.failed("LLM rate limit exceeded")
     """
-    
+
     # State constants
     READY = "ready"
     PENDING = "pending"
     PROCESSING = "processing"
     FAILED = "failed"
-    
+
     @staticmethod
     def ready() -> dict:
         """Return status dict for a ready/searchable document."""
         return {"state": DocumentStatus.READY}
-    
+
     @staticmethod
     def pending() -> dict:
         """Return status dict for a document waiting to be processed."""
         return {"state": DocumentStatus.PENDING}
-    
+
     @staticmethod
     def processing() -> dict:
         """Return status dict for a document being processed."""
         return {"state": DocumentStatus.PROCESSING}
-    
+
     @staticmethod
     def failed(reason: str, **extra_details) -> dict:
         """
         Return status dict for a failed document.
-        
+
         Args:
             reason: Human-readable failure reason
             **extra_details: Optional additional details (duplicate_of, error_code, etc.)
         """
-        status = {"state": DocumentStatus.FAILED, "reason": reason[:500]}  # Truncate long reasons
+        status = {
+            "state": DocumentStatus.FAILED,
+            "reason": reason[:500],
+        }  # Truncate long reasons
         if extra_details:
             status.update(extra_details)
         return status
-    
+
     @staticmethod
     def get_state(status: dict | None) -> str | None:
         """Extract state from status dict, returns None if invalid."""
         if status is None:
             return None
         return status.get("state") if isinstance(status, dict) else None
-    
+
     @staticmethod
     def is_state(status: dict | None, state: str) -> bool:
         """Check if status matches a given state."""
         return DocumentStatus.get_state(status) == state
-    
+
     @staticmethod
     def get_failure_reason(status: dict | None) -> str | None:
         """Extract failure reason from status dict."""
@@ -866,7 +869,7 @@ class Document(BaseModel, TimestampMixin):
         JSONB,
         nullable=False,
         default=DocumentStatus.ready,
-        server_default=text("'{\"state\": \"ready\"}'::jsonb"),
+        server_default=text('\'{"state": "ready"}\'::jsonb'),
         index=True,
     )
 
diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py
index 00c80dcb5..b20f8cd9c 100644
--- a/surfsense_backend/app/routes/documents_routes.py
+++ b/surfsense_backend/app/routes/documents_routes.py
@@ -114,11 +114,11 @@ async def create_documents_file_upload(
 ):
     """
     Upload files as documents with real-time status tracking.
-    
+
     Implements 2-phase document status updates for real-time UI feedback:
     - Phase 1: Create all documents with 'pending' status (visible in UI immediately via ElectricSQL)
     - Phase 2: Celery processes each file: pending → processing → ready/failed
-    
+
     Requires DOCUMENTS_CREATE permission.
     """
     from datetime import datetime
@@ -144,7 +144,9 @@ async def create_documents_file_upload(
             raise HTTPException(status_code=400, detail="No files provided")
 
         created_documents: list[Document] = []
-        files_to_process: list[tuple[Document, str, str]] = []  # (document, temp_path, filename)
+        files_to_process: list[
+            tuple[Document, str, str]
+        ] = []  # (document, temp_path, filename)
         skipped_duplicates = 0
 
         # ===== PHASE 1: Create pending documents for all files =====
@@ -201,7 +203,9 @@ async def create_documents_file_upload(
                 )
                 session.add(document)
                 created_documents.append(document)
-                files_to_process.append((document, temp_path, file.filename or "unknown"))
+                files_to_process.append(
+                    (document, temp_path, file.filename or "unknown")
+                )
 
             except Exception as e:
                 raise HTTPException(
@@ -348,15 +352,15 @@ async def read_documents(
             created_by_name = None
             if doc.created_by:
                 created_by_name = doc.created_by.display_name or doc.created_by.email
-            
+
             # Parse status from JSONB
             status_data = None
-            if hasattr(doc, 'status') and doc.status:
+            if hasattr(doc, "status") and doc.status:
                 status_data = DocumentStatusSchema(
                     state=doc.status.get("state", "ready"),
                     reason=doc.status.get("reason"),
                 )
-            
+
             api_documents.append(
                 DocumentRead(
                     id=doc.id,
@@ -503,15 +507,15 @@ async def search_documents(
             created_by_name = None
             if doc.created_by:
                 created_by_name = doc.created_by.display_name or doc.created_by.email
-            
+
             # Parse status from JSONB
             status_data = None
-            if hasattr(doc, 'status') and doc.status:
+            if hasattr(doc, "status") and doc.status:
                 status_data = DocumentStatusSchema(
                     state=doc.status.get("state", "ready"),
                     reason=doc.status.get("reason"),
                 )
-            
+
             api_documents.append(
                 DocumentRead(
                     id=doc.id,
diff --git a/surfsense_backend/app/schemas/documents.py b/surfsense_backend/app/schemas/documents.py
index 7d85d0229..4cedc7d93 100644
--- a/surfsense_backend/app/schemas/documents.py
+++ b/surfsense_backend/app/schemas/documents.py
@@ -43,6 +43,7 @@ class DocumentUpdate(DocumentBase):
 
 class DocumentStatusSchema(BaseModel):
     """Document processing status."""
+
     state: str  # "ready", "processing", "failed"
     reason: str | None = None
 
@@ -59,8 +60,12 @@ class DocumentRead(BaseModel):
     updated_at: datetime | None
     search_space_id: int
     created_by_id: UUID | None = None  # User who created/uploaded this document
-    created_by_name: str | None = None  # Display name or email of the user who created this document
-    status: DocumentStatusSchema | None = None  # Processing status (ready, processing, failed)
+    created_by_name: str | None = (
+        None  # Display name or email of the user who created this document
+    )
+    status: DocumentStatusSchema | None = (
+        None  # Processing status (ready, processing, failed)
+    )
 
     model_config = ConfigDict(from_attributes=True)
 
diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py
index 6967902d1..251241e96 100644
--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@@ -1465,11 +1465,7 @@ class ConnectorService:
             issue_key = metadata.get("issue_key", "")
             issue_title = metadata.get("issue_title", "Untitled Issue")
             status = metadata.get("status", "")
-            title = (
-                f"{issue_key} - {issue_title}"
-                if issue_key
-                else issue_title
-            )
+            title = f"{issue_key} - {issue_title}" if issue_key else issue_title
             if status:
                 title += f" ({status})"
             return title
@@ -2387,11 +2383,7 @@ class ConnectorService:
         def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
             event_name = metadata.get("event_name", "Untitled Event")
             start_time = metadata.get("start_time", "")
-            return (
-                f"{event_name} ({start_time})"
-                if start_time
-                else event_name
-            )
+            return f"{event_name} ({start_time})" if start_time else event_name
 
         def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
             return metadata.get("event_url", "") or ""
diff --git a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
index cd5537927..6dfcbff46 100644
--- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
@@ -548,11 +548,11 @@ def process_file_upload_with_document_task(
 ):
     """
     Celery task to process uploaded file with existing pending document.
-    
+
     This task is used by the 2-phase document upload flow:
     - Phase 1 (API): Creates pending document (visible in UI immediately)
     - Phase 2 (this task): Updates document status: pending → processing → ready/failed
-    
+
     Args:
         document_id: ID of the pending document created in Phase 1
         temp_path: Path to the uploaded file
@@ -634,7 +634,7 @@ async def _process_file_with_document(
 ):
     """
     Process file and update existing pending document status.
-    
+
     This function implements Phase 2 of the 2-phase document upload:
     - Sets document status to 'processing' (shows spinner in UI)
     - Processes the file (parsing, embedding, chunking)
@@ -669,11 +669,15 @@ async def _process_file_with_document(
             file_size = os.path.getsize(temp_path)
             logger.info(f"[_process_file_with_document] File size: {file_size} bytes")
         except Exception as e:
-            logger.warning(f"[_process_file_with_document] Could not get file size: {e}")
+            logger.warning(
+                f"[_process_file_with_document] Could not get file size: {e}"
+            )
             file_size = None
 
         # Create notification for document processing
-        logger.info(f"[_process_file_with_document] Creating notification for: {filename}")
+        logger.info(
+            f"[_process_file_with_document] Creating notification for: {filename}"
+        )
         notification = (
             await NotificationService.document_processing.notify_processing_started(
                 session=session,
@@ -822,7 +826,9 @@ async def _process_file_with_document(
             if os.path.exists(temp_path):
                 try:
                     os.unlink(temp_path)
-                    logger.info(f"[_process_file_with_document] Cleaned up temp file: {temp_path}")
+                    logger.info(
+                        f"[_process_file_with_document] Cleaned up temp file: {temp_path}"
+                    )
                 except Exception as cleanup_error:
                     logger.warning(
                         f"[_process_file_with_document] Failed to clean up temp file: {cleanup_error}"
diff --git a/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py b/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py
index ff7a11645..ef3a30e43 100644
--- a/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py
+++ b/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py
@@ -154,9 +154,7 @@ async def _cleanup_stale_notifications():
                 f"Found {len(stale_notification_ids)} stale connector indexing notifications "
                 f"(no Redis heartbeat key): {stale_notification_ids}"
             )
-            logger.info(
-                f"Connector IDs for document cleanup: {stale_connector_ids}"
-            )
+            logger.info(f"Connector IDs for document cleanup: {stale_connector_ids}")
 
             # O(1) Batch UPDATE notifications using JSONB || operator
             # This merges the update data into existing notification_metadata
diff --git a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
index 05a4007ae..46cd069c9 100644
--- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
@@ -140,7 +140,9 @@ async def index_airtable_records(
                     log_entry, success_msg, {"bases_count": 0}
                 )
                 # CRITICAL: Update timestamp even when no bases found so Electric SQL syncs
-                await update_connector_last_indexed(session, connector, update_last_indexed)
+                await update_connector_last_indexed(
+                    session, connector, update_last_indexed
+                )
                 await session.commit()
                 return 0, None  # Return None (not error) when no items found
 
@@ -277,22 +279,28 @@ async def index_airtable_records(
                                 # Document exists - check if content has changed
                                 if existing_document.content_hash == content_hash:
                                     # Ensure status is ready (might have been stuck in processing/pending)
-                                    if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
-                                        existing_document.status = DocumentStatus.ready()
+                                    if not DocumentStatus.is_state(
+                                        existing_document.status, DocumentStatus.READY
+                                    ):
+                                        existing_document.status = (
+                                            DocumentStatus.ready()
+                                        )
                                     documents_skipped += 1
                                     continue
 
                                 # Queue existing document for update (will be set to processing in Phase 2)
-                                records_to_process.append({
-                                    'document': existing_document,
-                                    'is_new': False,
-                                    'markdown_content': markdown_content,
-                                    'content_hash': content_hash,
-                                    'record_id': record_id,
-                                    'record': record,
-                                    'base_name': base_name,
-                                    'table_name': table_name,
-                                })
+                                records_to_process.append(
+                                    {
+                                        "document": existing_document,
+                                        "is_new": False,
+                                        "markdown_content": markdown_content,
+                                        "content_hash": content_hash,
+                                        "record_id": record_id,
+                                        "record": record,
+                                        "base_name": base_name,
+                                        "table_name": table_name,
+                                    }
+                                )
                                 continue
 
                             # Document doesn't exist by unique_identifier_hash
@@ -339,25 +347,31 @@ async def index_airtable_records(
                             session.add(document)
                             new_documents_created = True
 
-                            records_to_process.append({
-                                'document': document,
-                                'is_new': True,
-                                'markdown_content': markdown_content,
-                                'content_hash': content_hash,
-                                'record_id': record_id,
-                                'record': record,
-                                'base_name': base_name,
-                                'table_name': table_name,
-                            })
+                            records_to_process.append(
+                                {
+                                    "document": document,
+                                    "is_new": True,
+                                    "markdown_content": markdown_content,
+                                    "content_hash": content_hash,
+                                    "record_id": record_id,
+                                    "record": record,
+                                    "base_name": base_name,
+                                    "table_name": table_name,
+                                }
+                            )
 
                         except Exception as e:
-                            logger.error(f"Error in Phase 1 for record: {e!s}", exc_info=True)
+                            logger.error(
+                                f"Error in Phase 1 for record: {e!s}", exc_info=True
+                            )
                             documents_failed += 1
                             continue
 
             # Commit all pending documents - they all appear in UI now
             if new_documents_created:
-                logger.info(f"Phase 1: Committing {len([r for r in records_to_process if r['is_new']])} pending documents")
+                logger.info(
+                    f"Phase 1: Committing {len([r for r in records_to_process if r['is_new']])} pending documents"
+                )
                 await session.commit()
 
             # =======================================================================
@@ -374,7 +388,7 @@ async def index_airtable_records(
                         await on_heartbeat_callback(documents_indexed)
                         last_heartbeat_time = current_time
 
-                document = item['document']
+                document = item["document"]
                 try:
                     # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                     document.status = DocumentStatus.processing()
@@ -387,13 +401,18 @@ async def index_airtable_records(
 
                     if user_llm:
                         document_metadata_for_summary = {
-                            "record_id": item['record_id'],
-                            "created_time": item['record'].get("CREATED_TIME()", ""),
+                            "record_id": item["record_id"],
+                            "created_time": item["record"].get("CREATED_TIME()", ""),
                             "document_type": "Airtable Record",
                             "connector_type": "Airtable",
                         }
-                        summary_content, summary_embedding = await generate_document_summary(
-                            item['markdown_content'], user_llm, document_metadata_for_summary
+                        (
+                            summary_content,
+                            summary_embedding,
+                        ) = await generate_document_summary(
+                            item["markdown_content"],
+                            user_llm,
+                            document_metadata_for_summary,
                         )
                     else:
                         # Fallback to simple summary if no LLM configured
@@ -402,18 +421,18 @@ async def index_airtable_records(
                             summary_content
                         )
 
-                    chunks = await create_document_chunks(item['markdown_content'])
+                    chunks = await create_document_chunks(item["markdown_content"])
 
                     # Update document to READY with actual content
-                    document.title = item['record_id']
+                    document.title = item["record_id"]
                     document.content = summary_content
-                    document.content_hash = item['content_hash']
+                    document.content_hash = item["content_hash"]
                     document.embedding = summary_embedding
                     document.document_metadata = {
-                        "record_id": item['record_id'],
-                        "created_time": item['record'].get("CREATED_TIME()", ""),
-                        "base_name": item['base_name'],
-                        "table_name": item['table_name'],
+                        "record_id": item["record_id"],
+                        "created_time": item["record"].get("CREATED_TIME()", ""),
+                        "base_name": item["base_name"],
+                        "table_name": item["table_name"],
                         "connector_id": connector_id,
                     }
                     safe_set_chunks(document, chunks)
@@ -430,13 +449,17 @@ async def index_airtable_records(
                         await session.commit()
 
                 except Exception as e:
-                    logger.error(f"Error processing Airtable record: {e!s}", exc_info=True)
+                    logger.error(
+                        f"Error processing Airtable record: {e!s}", exc_info=True
+                    )
                     # Mark document as failed with reason (visible in UI)
                     try:
                         document.status = DocumentStatus.failed(str(e))
                         document.updated_at = get_current_timestamp()
                     except Exception as status_error:
-                        logger.error(f"Failed to update document status to failed: {status_error}")
+                        logger.error(
+                            f"Failed to update document status to failed: {status_error}"
+                        )
                     documents_failed += 1
                     continue
 
@@ -446,7 +469,9 @@ async def index_airtable_records(
             total_processed = documents_indexed
 
             # Final commit to ensure all documents are persisted (safety net)
-            logger.info(f"Final commit: Total {documents_indexed} Airtable records processed")
+            logger.info(
+                f"Final commit: Total {documents_indexed} Airtable records processed"
+            )
             try:
                 await session.commit()
                 logger.info(
diff --git a/surfsense_backend/app/tasks/connector_indexers/base.py b/surfsense_backend/app/tasks/connector_indexers/base.py
index b5b4e5559..da32e84a6 100644
--- a/surfsense_backend/app/tasks/connector_indexers/base.py
+++ b/surfsense_backend/app/tasks/connector_indexers/base.py
@@ -31,29 +31,30 @@ def get_current_timestamp() -> datetime:
 def safe_set_chunks(document: Document, chunks: list) -> None:
     """
     Safely assign chunks to a document without triggering lazy loading.
-    
+
     ALWAYS use this instead of `document.chunks = chunks` to avoid
     SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
-    
+
     Why this is needed:
     - Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
       load the OLD chunks first (for comparison/orphan detection)
     - This lazy loading fails in async context with asyncpg driver
     - set_committed_value bypasses this by setting the value directly
-    
+
     This function is safe regardless of how the document was loaded
     (with or without selectinload).
-    
+
     Args:
         document: The Document object to update
         chunks: List of Chunk objects to assign
-    
+
     Example:
         # Instead of: document.chunks = chunks (DANGEROUS!)
         safe_set_chunks(document, chunks)  # Always safe
     """
     from sqlalchemy.orm.attributes import set_committed_value
-    set_committed_value(document, 'chunks', chunks)
+
+    set_committed_value(document, "chunks", chunks)
 
 
 async def check_duplicate_document_by_hash(
diff --git a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
index fbf90b345..d60884539 100644
--- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
@@ -261,7 +261,9 @@ async def index_bookstack_pages(
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
                         # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
                             existing_document.status = DocumentStatus.ready()
                         logger.info(
                             f"Document for BookStack page {page_name} unchanged. Skipping."
@@ -270,20 +272,22 @@ async def index_bookstack_pages(
                         continue
 
                     # Queue existing document for update (will be set to processing in Phase 2)
-                    pages_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'page_id': page_id,
-                        'page_name': page_name,
-                        'page_slug': page_slug,
-                        'book_id': book_id,
-                        'book_slug': book_slug,
-                        'chapter_id': chapter_id,
-                        'page_url': page_url,
-                        'page_content': page_content,
-                        'full_content': full_content,
-                        'content_hash': content_hash,
-                    })
+                    pages_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "page_id": page_id,
+                            "page_name": page_name,
+                            "page_slug": page_slug,
+                            "book_id": book_id,
+                            "book_slug": book_slug,
+                            "chapter_id": chapter_id,
+                            "page_url": page_url,
+                            "page_content": page_content,
+                            "full_content": full_content,
+                            "content_hash": content_hash,
+                        }
+                    )
                     continue
 
                 # Document doesn't exist by unique_identifier_hash
@@ -331,20 +335,22 @@ async def index_bookstack_pages(
                 session.add(document)
                 new_documents_created = True
 
-                pages_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'page_id': page_id,
-                    'page_name': page_name,
-                    'page_slug': page_slug,
-                    'book_id': book_id,
-                    'book_slug': book_slug,
-                    'chapter_id': chapter_id,
-                    'page_url': page_url,
-                    'page_content': page_content,
-                    'full_content': full_content,
-                    'content_hash': content_hash,
-                })
+                pages_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "page_id": page_id,
+                        "page_name": page_name,
+                        "page_slug": page_slug,
+                        "book_id": book_id,
+                        "book_slug": book_slug,
+                        "chapter_id": chapter_id,
+                        "page_url": page_url,
+                        "page_content": page_content,
+                        "full_content": full_content,
+                        "content_hash": content_hash,
+                    }
+                )
 
             except Exception as e:
                 logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
@@ -353,7 +359,9 @@ async def index_bookstack_pages(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -370,7 +378,7 @@ async def index_bookstack_pages(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
@@ -383,23 +391,23 @@ async def index_bookstack_pages(
 
                 # Build document metadata
                 doc_metadata = {
-                    "page_id": item['page_id'],
-                    "page_name": item['page_name'],
-                    "page_slug": item['page_slug'],
-                    "book_id": item['book_id'],
-                    "book_slug": item['book_slug'],
-                    "chapter_id": item['chapter_id'],
+                    "page_id": item["page_id"],
+                    "page_name": item["page_name"],
+                    "page_slug": item["page_slug"],
+                    "book_id": item["book_id"],
+                    "book_slug": item["book_slug"],
+                    "chapter_id": item["chapter_id"],
                     "base_url": bookstack_base_url,
-                    "page_url": item['page_url'],
+                    "page_url": item["page_url"],
                     "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                     "connector_id": connector_id,
                 }
 
                 if user_llm:
                     summary_metadata = {
-                        "page_name": item['page_name'],
-                        "page_id": item['page_id'],
-                        "book_id": item['book_id'],
+                        "page_name": item["page_name"],
+                        "page_id": item["page_id"],
+                        "book_id": item["book_id"],
                         "document_type": "BookStack Page",
                         "connector_type": "BookStack",
                     }
@@ -407,17 +415,15 @@ async def index_bookstack_pages(
                         summary_content,
                         summary_embedding,
                     ) = await generate_document_summary(
-                        item['full_content'], user_llm, summary_metadata
+                        item["full_content"], user_llm, summary_metadata
                     )
                 else:
                     # Fallback to simple summary if no LLM configured
-                    summary_content = (
-                        f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n"
-                    )
-                    if item['page_content']:
+                    summary_content = f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n"
+                    if item["page_content"]:
                         # Take first 1000 characters of content for summary
-                        content_preview = item['page_content'][:1000]
-                        if len(item['page_content']) > 1000:
+                        content_preview = item["page_content"][:1000]
+                        if len(item["page_content"]) > 1000:
                             content_preview += "..."
                         summary_content += f"Content Preview: {content_preview}\n\n"
                     summary_embedding = config.embedding_model_instance.embed(
@@ -425,12 +431,12 @@ async def index_bookstack_pages(
                     )
 
                 # Process chunks - using the full page content
-                chunks = await create_document_chunks(item['full_content'])
+                chunks = await create_document_chunks(item["full_content"])
 
                 # Update document to READY with actual content
-                document.title = item['page_name']
+                document.title = item["page_name"]
                 document.content = summary_content
-                document.content_hash = item['content_hash']
+                document.content_hash = item["content_hash"]
                 document.embedding = summary_embedding
                 document.document_metadata = doc_metadata
                 safe_set_chunks(document, chunks)
@@ -456,7 +462,9 @@ async def index_bookstack_pages(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 skipped_pages.append(
                     f"{item.get('page_name', 'Unknown')} (processing error)"
                 )
@@ -473,7 +481,9 @@ async def index_bookstack_pages(
         )
         try:
             await session.commit()
-            logger.info("Successfully committed all BookStack document changes to database")
+            logger.info(
+                "Successfully committed all BookStack document changes to database"
+            )
         except Exception as e:
             # Handle any remaining integrity errors gracefully (race conditions, etc.)
             if (
diff --git a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
index 934e56744..47c5d8b3b 100644
--- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
@@ -260,7 +260,9 @@ async def index_clickup_tasks(
                         # Document exists - check if content has changed
                         if existing_document.content_hash == content_hash:
                             # Ensure status is ready (might have been stuck in processing/pending)
-                            if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            if not DocumentStatus.is_state(
+                                existing_document.status, DocumentStatus.READY
+                            ):
                                 existing_document.status = DocumentStatus.ready()
                             logger.info(
                                 f"Document for ClickUp task {task_name} unchanged. Skipping."
@@ -272,22 +274,24 @@ async def index_clickup_tasks(
                             logger.info(
                                 f"Content changed for ClickUp task {task_name}. Queuing for update."
                             )
-                            tasks_to_process.append({
-                                'document': existing_document,
-                                'is_new': False,
-                                'task_content': task_content,
-                                'content_hash': content_hash,
-                                'task_id': task_id,
-                                'task_name': task_name,
-                                'task_status': task_status,
-                                'task_priority': task_priority,
-                                'task_list_name': task_list_name,
-                                'task_space_name': task_space_name,
-                                'task_assignees': task_assignees,
-                                'task_due_date': task_due_date,
-                                'task_created': task_created,
-                                'task_updated': task_updated,
-                            })
+                            tasks_to_process.append(
+                                {
+                                    "document": existing_document,
+                                    "is_new": False,
+                                    "task_content": task_content,
+                                    "content_hash": content_hash,
+                                    "task_id": task_id,
+                                    "task_name": task_name,
+                                    "task_status": task_status,
+                                    "task_priority": task_priority,
+                                    "task_list_name": task_list_name,
+                                    "task_space_name": task_space_name,
+                                    "task_assignees": task_assignees,
+                                    "task_due_date": task_due_date,
+                                    "task_created": task_created,
+                                    "task_updated": task_updated,
+                                }
+                            )
                             continue
 
                     # Document doesn't exist by unique_identifier_hash
@@ -335,22 +339,24 @@ async def index_clickup_tasks(
                     session.add(document)
                     new_documents_created = True
 
-                    tasks_to_process.append({
-                        'document': document,
-                        'is_new': True,
-                        'task_content': task_content,
-                        'content_hash': content_hash,
-                        'task_id': task_id,
-                        'task_name': task_name,
-                        'task_status': task_status,
-                        'task_priority': task_priority,
-                        'task_list_name': task_list_name,
-                        'task_space_name': task_space_name,
-                        'task_assignees': task_assignees,
-                        'task_due_date': task_due_date,
-                        'task_created': task_created,
-                        'task_updated': task_updated,
-                    })
+                    tasks_to_process.append(
+                        {
+                            "document": document,
+                            "is_new": True,
+                            "task_content": task_content,
+                            "content_hash": content_hash,
+                            "task_id": task_id,
+                            "task_name": task_name,
+                            "task_status": task_status,
+                            "task_priority": task_priority,
+                            "task_list_name": task_list_name,
+                            "task_space_name": task_space_name,
+                            "task_assignees": task_assignees,
+                            "task_due_date": task_due_date,
+                            "task_created": task_created,
+                            "task_updated": task_updated,
+                        }
+                    )
 
                 except Exception as e:
                     logger.error(
@@ -362,7 +368,9 @@ async def index_clickup_tasks(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([t for t in tasks_to_process if t['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([t for t in tasks_to_process if t['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -379,7 +387,7 @@ async def index_clickup_tasks(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
@@ -392,13 +400,13 @@ async def index_clickup_tasks(
 
                 if user_llm:
                     document_metadata_for_summary = {
-                        "task_id": item['task_id'],
-                        "task_name": item['task_name'],
-                        "task_status": item['task_status'],
-                        "task_priority": item['task_priority'],
-                        "task_list": item['task_list_name'],
-                        "task_space": item['task_space_name'],
-                        "assignees": len(item['task_assignees']),
+                        "task_id": item["task_id"],
+                        "task_name": item["task_name"],
+                        "task_status": item["task_status"],
+                        "task_priority": item["task_priority"],
+                        "task_list": item["task_list_name"],
+                        "task_space": item["task_space_name"],
+                        "assignees": len(item["task_assignees"]),
                         "document_type": "ClickUp Task",
                         "connector_type": "ClickUp",
                     }
@@ -406,30 +414,30 @@ async def index_clickup_tasks(
                         summary_content,
                         summary_embedding,
                     ) = await generate_document_summary(
-                        item['task_content'], user_llm, document_metadata_for_summary
+                        item["task_content"], user_llm, document_metadata_for_summary
                     )
                 else:
-                    summary_content = item['task_content']
+                    summary_content = item["task_content"]
                     summary_embedding = config.embedding_model_instance.embed(
-                        item['task_content']
+                        item["task_content"]
                     )
 
-                chunks = await create_document_chunks(item['task_content'])
+                chunks = await create_document_chunks(item["task_content"])
 
                 # Update document to READY with actual content
-                document.title = item['task_name']
+                document.title = item["task_name"]
                 document.content = summary_content
-                document.content_hash = item['content_hash']
+                document.content_hash = item["content_hash"]
                 document.embedding = summary_embedding
                 document.document_metadata = {
-                    "task_id": item['task_id'],
-                    "task_name": item['task_name'],
-                    "task_status": item['task_status'],
-                    "task_priority": item['task_priority'],
-                    "task_assignees": item['task_assignees'],
-                    "task_due_date": item['task_due_date'],
-                    "task_created": item['task_created'],
-                    "task_updated": item['task_updated'],
+                    "task_id": item["task_id"],
+                    "task_name": item["task_name"],
+                    "task_status": item["task_status"],
+                    "task_priority": item["task_priority"],
+                    "task_assignees": item["task_assignees"],
+                    "task_due_date": item["task_due_date"],
+                    "task_created": item["task_created"],
+                    "task_updated": item["task_updated"],
                     "connector_id": connector_id,
                     "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                 }
@@ -456,7 +464,9 @@ async def index_clickup_tasks(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 documents_failed += 1
                 continue
 
diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
index 7fd842996..a3a059d4e 100644
--- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
@@ -262,23 +262,27 @@ async def index_confluence_pages(
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
                         # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
                             existing_document.status = DocumentStatus.ready()
                         documents_skipped += 1
                         continue
 
                     # Queue existing document for update (will be set to processing in Phase 2)
-                    pages_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'full_content': full_content,
-                        'page_content': page_content,
-                        'content_hash': content_hash,
-                        'page_id': page_id,
-                        'page_title': page_title,
-                        'space_id': space_id,
-                        'comment_count': comment_count,
-                    })
+                    pages_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "full_content": full_content,
+                            "page_content": page_content,
+                            "content_hash": content_hash,
+                            "page_id": page_id,
+                            "page_title": page_title,
+                            "space_id": space_id,
+                            "comment_count": comment_count,
+                        }
+                    )
                     continue
 
                 # Document doesn't exist by unique_identifier_hash
@@ -323,17 +327,19 @@ async def index_confluence_pages(
                 session.add(document)
                 new_documents_created = True
 
-                pages_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'full_content': full_content,
-                    'page_content': page_content,
-                    'content_hash': content_hash,
-                    'page_id': page_id,
-                    'page_title': page_title,
-                    'space_id': space_id,
-                    'comment_count': comment_count,
-                })
+                pages_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "full_content": full_content,
+                        "page_content": page_content,
+                        "content_hash": content_hash,
+                        "page_id": page_id,
+                        "page_title": page_title,
+                        "space_id": space_id,
+                        "comment_count": comment_count,
+                    }
+                )
 
             except Exception as e:
                 logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
@@ -342,7 +348,9 @@ async def index_confluence_pages(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -359,7 +367,7 @@ async def index_confluence_pages(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
@@ -372,10 +380,10 @@ async def index_confluence_pages(
 
                 if user_llm:
                     document_metadata = {
-                        "page_title": item['page_title'],
-                        "page_id": item['page_id'],
-                        "space_id": item['space_id'],
-                        "comment_count": item['comment_count'],
+                        "page_title": item["page_title"],
+                        "page_id": item["page_id"],
+                        "space_id": item["space_id"],
+                        "comment_count": item["comment_count"],
                         "document_type": "Confluence Page",
                         "connector_type": "Confluence",
                     }
@@ -383,17 +391,15 @@ async def index_confluence_pages(
                         summary_content,
                         summary_embedding,
                     ) = await generate_document_summary(
-                        item['full_content'], user_llm, document_metadata
+                        item["full_content"], user_llm, document_metadata
                     )
                 else:
                     # Fallback to simple summary if no LLM configured
-                    summary_content = (
-                        f"Confluence Page: {item['page_title']}\n\nSpace ID: {item['space_id']}\n\n"
-                    )
-                    if item['page_content']:
+                    summary_content = f"Confluence Page: {item['page_title']}\n\nSpace ID: {item['space_id']}\n\n"
+                    if item["page_content"]:
                         # Take first 1000 characters of content for summary
-                        content_preview = item['page_content'][:1000]
-                        if len(item['page_content']) > 1000:
+                        content_preview = item["page_content"][:1000]
+                        if len(item["page_content"]) > 1000:
                             content_preview += "..."
                         summary_content += f"Content Preview: {content_preview}\n\n"
                     summary_content += f"Comments: {item['comment_count']}"
@@ -402,18 +408,18 @@ async def index_confluence_pages(
                     )
 
                 # Process chunks - using the full page content with comments
-                chunks = await create_document_chunks(item['full_content'])
+                chunks = await create_document_chunks(item["full_content"])
 
                 # Update document to READY with actual content
-                document.title = item['page_title']
+                document.title = item["page_title"]
                 document.content = summary_content
-                document.content_hash = item['content_hash']
+                document.content_hash = item["content_hash"]
                 document.embedding = summary_embedding
                 document.document_metadata = {
-                    "page_id": item['page_id'],
-                    "page_title": item['page_title'],
-                    "space_id": item['space_id'],
-                    "comment_count": item['comment_count'],
+                    "page_id": item["page_id"],
+                    "page_title": item["page_title"],
+                    "space_id": item["space_id"],
+                    "comment_count": item["comment_count"],
                     "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                     "connector_id": connector_id,
                 }
@@ -440,7 +446,9 @@ async def index_confluence_pages(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 documents_failed += 1
                 continue  # Skip this page and continue with others
 
diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
index e5f333531..1595897a0 100644
--- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
@@ -352,9 +352,7 @@ async def index_discord_messages(
             try:
                 channels = await discord_client.get_text_channels(guild_id)
                 if not channels:
-                    logger.info(
-                        f"No channels found in guild {guild_name}. Skipping."
-                    )
+                    logger.info(f"No channels found in guild {guild_name}. Skipping.")
                     skipped_channels.append(f"{guild_name} (no channels)")
                 else:
                     for channel in channels:
@@ -456,25 +454,31 @@ async def index_discord_messages(
                                 # Document exists - check if content has changed
                                 if existing_document.content_hash == content_hash:
                                     # Ensure status is ready (might have been stuck in processing/pending)
-                                    if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
-                                        existing_document.status = DocumentStatus.ready()
+                                    if not DocumentStatus.is_state(
+                                        existing_document.status, DocumentStatus.READY
+                                    ):
+                                        existing_document.status = (
+                                            DocumentStatus.ready()
+                                        )
                                     documents_skipped += 1
                                     continue
 
                                 # Queue existing document for update (will be set to processing in Phase 2)
-                                messages_to_process.append({
-                                    'document': existing_document,
-                                    'is_new': False,
-                                    'combined_document_string': combined_document_string,
-                                    'content_hash': content_hash,
-                                    'guild_name': guild_name,
-                                    'guild_id': guild_id,
-                                    'channel_name': channel_name,
-                                    'channel_id': channel_id,
-                                    'message_id': msg_id,
-                                    'message_timestamp': msg_timestamp,
-                                    'message_user_name': msg_user_name,
-                                })
+                                messages_to_process.append(
+                                    {
+                                        "document": existing_document,
+                                        "is_new": False,
+                                        "combined_document_string": combined_document_string,
+                                        "content_hash": content_hash,
+                                        "guild_name": guild_name,
+                                        "guild_id": guild_id,
+                                        "channel_name": channel_name,
+                                        "channel_id": channel_id,
+                                        "message_id": msg_id,
+                                        "message_timestamp": msg_timestamp,
+                                        "message_user_name": msg_user_name,
+                                    }
+                                )
                                 continue
 
                             # Document doesn't exist by unique_identifier_hash
@@ -522,19 +526,21 @@ async def index_discord_messages(
                             session.add(document)
                             new_documents_created = True
 
-                            messages_to_process.append({
-                                'document': document,
-                                'is_new': True,
-                                'combined_document_string': combined_document_string,
-                                'content_hash': content_hash,
-                                'guild_name': guild_name,
-                                'guild_id': guild_id,
-                                'channel_name': channel_name,
-                                'channel_id': channel_id,
-                                'message_id': msg_id,
-                                'message_timestamp': msg_timestamp,
-                                'message_user_name': msg_user_name,
-                            })
+                            messages_to_process.append(
+                                {
+                                    "document": document,
+                                    "is_new": True,
+                                    "combined_document_string": combined_document_string,
+                                    "content_hash": content_hash,
+                                    "guild_name": guild_name,
+                                    "guild_id": guild_id,
+                                    "channel_name": channel_name,
+                                    "channel_id": channel_id,
+                                    "message_id": msg_id,
+                                    "message_timestamp": msg_timestamp,
+                                    "message_user_name": msg_user_name,
+                                }
+                            )
 
             except Exception as e:
                 logger.error(
@@ -547,7 +553,9 @@ async def index_discord_messages(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -564,31 +572,31 @@ async def index_discord_messages(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
                 await session.commit()
 
                 # Heavy processing (embeddings, chunks)
-                chunks = await create_document_chunks(item['combined_document_string'])
+                chunks = await create_document_chunks(item["combined_document_string"])
                 doc_embedding = config.embedding_model_instance.embed(
-                    item['combined_document_string']
+                    item["combined_document_string"]
                 )
 
                 # Update document to READY with actual content
                 document.title = f"{item['guild_name']}#{item['channel_name']}"
-                document.content = item['combined_document_string']
-                document.content_hash = item['content_hash']
+                document.content = item["combined_document_string"]
+                document.content_hash = item["content_hash"]
                 document.embedding = doc_embedding
                 document.document_metadata = {
-                    "guild_name": item['guild_name'],
-                    "guild_id": item['guild_id'],
-                    "channel_name": item['channel_name'],
-                    "channel_id": item['channel_id'],
-                    "message_id": item['message_id'],
-                    "message_timestamp": item['message_timestamp'],
-                    "message_user_name": item['message_user_name'],
+                    "guild_name": item["guild_name"],
+                    "guild_id": item["guild_id"],
+                    "channel_name": item["channel_name"],
+                    "channel_id": item["channel_id"],
+                    "message_id": item["message_id"],
+                    "message_timestamp": item["message_timestamp"],
+                    "message_user_name": item["message_user_name"],
                     "indexed_at": datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S"),
                     "connector_id": connector_id,
                 }
@@ -612,7 +620,9 @@ async def index_discord_messages(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 documents_failed += 1
                 continue
 
diff --git a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py
index 97cd31a09..212afff39 100644
--- a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py
@@ -253,7 +253,9 @@ async def index_elasticsearch_documents(
                         # If content is unchanged, skip. Otherwise queue for update.
                         if existing_doc.content_hash == content_hash:
                             # Ensure status is ready (might have been stuck in processing/pending)
-                            if not DocumentStatus.is_state(existing_doc.status, DocumentStatus.READY):
+                            if not DocumentStatus.is_state(
+                                existing_doc.status, DocumentStatus.READY
+                            ):
                                 existing_doc.status = DocumentStatus.ready()
                             logger.info(
                                 f"Skipping ES doc {doc_id} — already indexed (doc id {existing_doc.id})"
@@ -262,17 +264,19 @@ async def index_elasticsearch_documents(
                             continue
 
                         # Queue existing document for update (will be set to processing in Phase 2)
-                        docs_to_process.append({
-                            'document': existing_doc,
-                            'is_new': False,
-                            'doc_id': doc_id,
-                            'title': title,
-                            'content': content,
-                            'content_hash': content_hash,
-                            'unique_identifier_hash': unique_identifier_hash,
-                            'hit': hit,
-                            'source': source,
-                        })
+                        docs_to_process.append(
+                            {
+                                "document": existing_doc,
+                                "is_new": False,
+                                "doc_id": doc_id,
+                                "title": title,
+                                "content": content,
+                                "content_hash": content_hash,
+                                "unique_identifier_hash": unique_identifier_hash,
+                                "hit": hit,
+                                "source": source,
+                            }
+                        )
                         hits_collected += 1
                         continue
 
@@ -310,17 +314,19 @@ async def index_elasticsearch_documents(
                     session.add(document)
                     new_documents_created = True
 
-                    docs_to_process.append({
-                        'document': document,
-                        'is_new': True,
-                        'doc_id': doc_id,
-                        'title': title,
-                        'content': content,
-                        'content_hash': content_hash,
-                        'unique_identifier_hash': unique_identifier_hash,
-                        'hit': hit,
-                        'source': source,
-                    })
+                    docs_to_process.append(
+                        {
+                            "document": document,
+                            "is_new": True,
+                            "doc_id": doc_id,
+                            "title": title,
+                            "content": content,
+                            "content_hash": content_hash,
+                            "unique_identifier_hash": unique_identifier_hash,
+                            "hit": hit,
+                            "source": source,
+                        }
+                    )
                     hits_collected += 1
 
                 except Exception as e:
@@ -330,7 +336,9 @@ async def index_elasticsearch_documents(
 
             # Commit all pending documents - they all appear in UI now
             if new_documents_created:
-                logger.info(f"Phase 1: Committing {len([d for d in docs_to_process if d['is_new']])} pending documents")
+                logger.info(
+                    f"Phase 1: Committing {len([d for d in docs_to_process if d['is_new']])} pending documents"
+                )
                 await session.commit()
 
             # =======================================================================
@@ -347,7 +355,7 @@ async def index_elasticsearch_documents(
                         await on_heartbeat_callback(documents_processed)
                         last_heartbeat_time = current_time
 
-                document = item['document']
+                document = item["document"]
                 try:
                     # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                     document.status = DocumentStatus.processing()
@@ -355,9 +363,9 @@ async def index_elasticsearch_documents(
 
                     # Build metadata
                     metadata = {
-                        "elasticsearch_id": item['doc_id'],
-                        "elasticsearch_index": item['hit'].get("_index", index_name),
-                        "elasticsearch_score": item['hit'].get("_score"),
+                        "elasticsearch_id": item["doc_id"],
+                        "elasticsearch_index": item["hit"].get("_index", index_name),
+                        "elasticsearch_score": item["hit"].get("_score"),
                         "indexed_at": datetime.now().isoformat(),
                         "source": "ELASTICSEARCH_CONNECTOR",
                         "connector_id": connector_id,
@@ -366,17 +374,17 @@ async def index_elasticsearch_documents(
                     # Add any additional metadata fields specified in config
                     if "ELASTICSEARCH_METADATA_FIELDS" in config:
                         for field in config["ELASTICSEARCH_METADATA_FIELDS"]:
-                            if field in item['source']:
-                                metadata[f"es_{field}"] = item['source'][field]
+                            if field in item["source"]:
+                                metadata[f"es_{field}"] = item["source"][field]
 
                     # Create chunks
-                    chunks = await create_document_chunks(item['content'])
+                    chunks = await create_document_chunks(item["content"])
 
                     # Update document to READY with actual content
-                    document.title = item['title']
-                    document.content = item['content']
-                    document.content_hash = item['content_hash']
-                    document.unique_identifier_hash = item['unique_identifier_hash']
+                    document.title = item["title"]
+                    document.content = item["content"]
+                    document.content_hash = item["content_hash"]
+                    document.unique_identifier_hash = item["unique_identifier_hash"]
                     document.document_metadata = metadata
                     safe_set_chunks(document, chunks)
                     document.updated_at = get_current_timestamp()
@@ -399,7 +407,9 @@ async def index_elasticsearch_documents(
                         document.status = DocumentStatus.failed(str(e))
                         document.updated_at = get_current_timestamp()
                     except Exception as status_error:
-                        logger.error(f"Failed to update document status to failed: {status_error}")
+                        logger.error(
+                            f"Failed to update document status to failed: {status_error}"
+                        )
                     documents_failed += 1
                     continue
 
@@ -411,10 +421,14 @@ async def index_elasticsearch_documents(
                 )
 
             # Final commit for any remaining documents not yet committed in batches
-            logger.info(f"Final commit: Total {documents_processed} Elasticsearch documents processed")
+            logger.info(
+                f"Final commit: Total {documents_processed} Elasticsearch documents processed"
+            )
             try:
                 await session.commit()
-                logger.info("Successfully committed all Elasticsearch document changes to database")
+                logger.info(
+                    "Successfully committed all Elasticsearch document changes to database"
+                )
             except Exception as e:
                 # Handle any remaining integrity errors gracefully (race conditions, etc.)
                 if (
diff --git a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
index b37989a84..e1a1ddd4d 100644
--- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
@@ -17,7 +17,7 @@ from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import config
-from app.connectors.github_connector import GitHubConnector, RepositoryDigest
+from app.connectors.github_connector import GitHubConnector
 from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
 from app.services.llm_service import get_user_long_context_llm
 from app.services.task_logging_service import TaskLoggingService
@@ -237,7 +237,9 @@ async def index_github_repos(
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
                         # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
                             existing_document.status = DocumentStatus.ready()
                         logger.info(f"Repository {repo_full_name} unchanged. Skipping.")
                         documents_skipped += 1
@@ -247,14 +249,16 @@ async def index_github_repos(
                     logger.info(
                         f"Content changed for repository {repo_full_name}. Queuing for update."
                     )
-                    repos_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'digest': digest,
-                        'content_hash': content_hash,
-                        'repo_full_name': repo_full_name,
-                        'unique_identifier_hash': unique_identifier_hash,
-                    })
+                    repos_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "digest": digest,
+                            "content_hash": content_hash,
+                            "repo_full_name": repo_full_name,
+                            "unique_identifier_hash": unique_identifier_hash,
+                        }
+                    )
                     continue
 
                 # Document doesn't exist by unique_identifier_hash
@@ -298,14 +302,16 @@ async def index_github_repos(
                 session.add(document)
                 new_documents_created = True
 
-                repos_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'digest': digest,
-                    'content_hash': content_hash,
-                    'repo_full_name': repo_full_name,
-                    'unique_identifier_hash': unique_identifier_hash,
-                })
+                repos_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "digest": digest,
+                        "content_hash": content_hash,
+                        "repo_full_name": repo_full_name,
+                        "unique_identifier_hash": unique_identifier_hash,
+                    }
+                )
 
             except Exception as repo_err:
                 logger.error(
@@ -317,7 +323,9 @@ async def index_github_repos(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([r for r in repos_to_process if r['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([r for r in repos_to_process if r['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -334,9 +342,9 @@ async def index_github_repos(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
-            digest = item['digest']
-            repo_full_name = item['repo_full_name']
+            document = item["document"]
+            digest = item["digest"]
+            repo_full_name = item["repo_full_name"]
 
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
@@ -353,7 +361,9 @@ async def index_github_repos(
                     "document_type": "GitHub Repository",
                     "connector_type": "GitHub",
                     "ingestion_method": "gitingest",
-                    "file_tree": digest.tree[:2000] if len(digest.tree) > 2000 else digest.tree,
+                    "file_tree": digest.tree[:2000]
+                    if len(digest.tree) > 2000
+                    else digest.tree,
                     "estimated_tokens": digest.estimated_tokens,
                 }
 
@@ -377,13 +387,17 @@ async def index_github_repos(
                         f"## Summary\n{digest.summary}\n\n"
                         f"## File Structure\n{digest.tree[:3000]}"
                     )
-                    summary_embedding = config.embedding_model_instance.embed(summary_text)
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_text
+                    )
 
                 # Chunk the full digest content for granular search
                 try:
                     chunks_data = await create_document_chunks(digest.content)
                 except Exception as chunk_err:
-                    logger.error(f"Failed to chunk repository {repo_full_name}: {chunk_err}")
+                    logger.error(
+                        f"Failed to chunk repository {repo_full_name}: {chunk_err}"
+                    )
                     chunks_data = await _simple_chunk_content(digest.content)
 
                 # Update document to READY with actual content
@@ -401,7 +415,7 @@ async def index_github_repos(
 
                 document.title = repo_full_name
                 document.content = summary_text
-                document.content_hash = item['content_hash']
+                document.content_hash = item["content_hash"]
                 document.embedding = summary_embedding
                 document.document_metadata = doc_metadata
                 safe_set_chunks(document, chunks_data)
@@ -433,7 +447,9 @@ async def index_github_repos(
                     document.status = DocumentStatus.failed(str(repo_err))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 errors.append(f"Failed processing {repo_full_name}: {repo_err}")
                 documents_failed += 1
                 continue
@@ -442,7 +458,9 @@ async def index_github_repos(
         await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit
-        logger.info(f"Final commit: Total {documents_processed} GitHub repositories processed")
+        logger.info(
+            f"Final commit: Total {documents_processed} GitHub repositories processed"
+        )
         try:
             await session.commit()
             logger.info(
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
index ad749e61c..822e58d36 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@@ -345,25 +345,29 @@ async def index_google_calendar_events(
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
                         # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
                             existing_document.status = DocumentStatus.ready()
                         documents_skipped += 1
                         continue
 
                     # Queue existing document for update (will be set to processing in Phase 2)
-                    events_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'event_markdown': event_markdown,
-                        'content_hash': content_hash,
-                        'event_id': event_id,
-                        'event_summary': event_summary,
-                        'calendar_id': calendar_id,
-                        'start_time': start_time,
-                        'end_time': end_time,
-                        'location': location,
-                        'description': description,
-                    })
+                    events_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "event_markdown": event_markdown,
+                            "content_hash": content_hash,
+                            "event_id": event_id,
+                            "event_summary": event_summary,
+                            "calendar_id": calendar_id,
+                            "start_time": start_time,
+                            "end_time": end_time,
+                            "location": location,
+                            "description": description,
+                        }
+                    )
                     continue
 
                 # Document doesn't exist by unique_identifier_hash
@@ -411,19 +415,21 @@ async def index_google_calendar_events(
                 session.add(document)
                 new_documents_created = True
 
-                events_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'event_markdown': event_markdown,
-                    'content_hash': content_hash,
-                    'event_id': event_id,
-                    'event_summary': event_summary,
-                    'calendar_id': calendar_id,
-                    'start_time': start_time,
-                    'end_time': end_time,
-                    'location': location,
-                    'description': description,
-                })
+                events_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "event_markdown": event_markdown,
+                        "content_hash": content_hash,
+                        "event_id": event_id,
+                        "event_summary": event_summary,
+                        "calendar_id": calendar_id,
+                        "start_time": start_time,
+                        "end_time": end_time,
+                        "location": location,
+                        "description": description,
+                    }
+                )
 
             except Exception as e:
                 logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
@@ -432,7 +438,9 @@ async def index_google_calendar_events(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -449,7 +457,7 @@ async def index_google_calendar_events(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
@@ -462,48 +470,53 @@ async def index_google_calendar_events(
 
                 if user_llm:
                     document_metadata_for_summary = {
-                        "event_id": item['event_id'],
-                        "event_summary": item['event_summary'],
-                        "calendar_id": item['calendar_id'],
-                        "start_time": item['start_time'],
-                        "end_time": item['end_time'],
-                        "location": item['location'] or "No location",
+                        "event_id": item["event_id"],
+                        "event_summary": item["event_summary"],
+                        "calendar_id": item["calendar_id"],
+                        "start_time": item["start_time"],
+                        "end_time": item["end_time"],
+                        "location": item["location"] or "No location",
                         "document_type": "Google Calendar Event",
                         "connector_type": "Google Calendar",
                     }
-                    summary_content, summary_embedding = await generate_document_summary(
-                        item['event_markdown'], user_llm, document_metadata_for_summary
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["event_markdown"], user_llm, document_metadata_for_summary
                     )
                 else:
-                    summary_content = f"Google Calendar Event: {item['event_summary']}\n\n"
+                    summary_content = (
+                        f"Google Calendar Event: {item['event_summary']}\n\n"
+                    )
                     summary_content += f"Calendar: {item['calendar_id']}\n"
                     summary_content += f"Start: {item['start_time']}\n"
                     summary_content += f"End: {item['end_time']}\n"
-                    if item['location']:
+                    if item["location"]:
                         summary_content += f"Location: {item['location']}\n"
-                    if item['description']:
-                        desc_preview = item['description'][:1000]
-                        if len(item['description']) > 1000:
+                    if item["description"]:
+                        desc_preview = item["description"][:1000]
+                        if len(item["description"]) > 1000:
                             desc_preview += "..."
                         summary_content += f"Description: {desc_preview}\n"
                     summary_embedding = config.embedding_model_instance.embed(
                         summary_content
                     )
 
-                chunks = await create_document_chunks(item['event_markdown'])
+                chunks = await create_document_chunks(item["event_markdown"])
 
                 # Update document to READY with actual content
-                document.title = item['event_summary']
+                document.title = item["event_summary"]
                 document.content = summary_content
-                document.content_hash = item['content_hash']
+                document.content_hash = item["content_hash"]
                 document.embedding = summary_embedding
                 document.document_metadata = {
-                    "event_id": item['event_id'],
-                    "event_summary": item['event_summary'],
-                    "calendar_id": item['calendar_id'],
-                    "start_time": item['start_time'],
-                    "end_time": item['end_time'],
-                    "location": item['location'],
+                    "event_id": item["event_id"],
+                    "event_summary": item["event_summary"],
+                    "calendar_id": item["calendar_id"],
+                    "start_time": item["start_time"],
+                    "end_time": item["end_time"],
+                    "location": item["location"],
                     "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                     "connector_id": connector_id,
                 }
@@ -527,7 +540,9 @@ async def index_google_calendar_events(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 documents_failed += 1
                 continue
 
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
index 8eae35d00..f7624cffe 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -435,7 +435,7 @@ async def _index_full_scan(
     on_heartbeat_callback: HeartbeatCallbackType | None = None,
 ) -> tuple[int, int]:
     """Perform full scan indexing of a folder.
-    
+
     Implements 2-phase document status updates for real-time UI feedback:
     - Phase 1: Collect all files and create pending documents (visible in UI immediately)
     - Phase 2: Process each file: pending → processing → ready/failed
@@ -533,7 +533,9 @@ async def _index_full_scan(
 
     # Commit all pending documents - they all appear in UI now
     if new_documents_created:
-        logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f[1] and f[1].id is None])} pending documents")
+        logger.info(
+            f"Phase 1: Committing {len([f for f in files_to_process if f[1] and f[1].id is None])} pending documents"
+        )
         await session.commit()
 
     # =======================================================================
@@ -568,9 +570,7 @@ async def _index_full_scan(
 
         if documents_indexed % 10 == 0 and documents_indexed > 0:
             await session.commit()
-            logger.info(
-                f"Committed batch: {documents_indexed} files indexed so far"
-            )
+            logger.info(f"Committed batch: {documents_indexed} files indexed so far")
 
     logger.info(
         f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed"
@@ -597,7 +597,7 @@ async def _index_with_delta_sync(
 
     Note: include_subfolders is accepted for API consistency but delta sync
     automatically tracks changes across all folders including subfolders.
-    
+
     Implements 2-phase document status updates for real-time UI feedback:
     - Phase 1: Collect all changes and create pending documents (visible in UI immediately)
     - Phase 2: Process each file: pending → processing → ready/failed
@@ -676,7 +676,7 @@ async def _index_with_delta_sync(
 
     # Commit all pending documents - they all appear in UI now
     if new_documents_created:
-        logger.info(f"Phase 1: Committing pending documents")
+        logger.info("Phase 1: Committing pending documents")
         await session.commit()
 
     # =======================================================================
@@ -685,7 +685,7 @@ async def _index_with_delta_sync(
     # =======================================================================
     logger.info(f"Phase 2: Processing {len(changes_to_process)} changes")
 
-    for change, file, pending_doc in changes_to_process:
+    for _, file, pending_doc in changes_to_process:
         # Check if it's time for a heartbeat update
         if on_heartbeat_callback:
             current_time = time.time()
@@ -728,17 +728,17 @@ async def _create_pending_document_for_file(
 ) -> tuple[Document | None, bool]:
     """
     Create a pending document for a Google Drive file if it doesn't exist.
-    
+
     This is Phase 1 of the 2-phase document status update pattern.
     Creates documents with 'pending' status so they appear in UI immediately.
-    
+
     Args:
         session: Database session
         file: File metadata from Google Drive API
         connector_id: ID of the Drive connector
         search_space_id: ID of the search space
         user_id: ID of the user
-        
+
     Returns:
         Tuple of (document, should_skip):
         - (existing_doc, False): Existing document that needs update
@@ -746,28 +746,28 @@ async def _create_pending_document_for_file(
         - (None, True): File should be skipped (unchanged, rename-only, or folder)
     """
     from app.connectors.google_drive.file_types import should_skip_file
-    
+
     file_id = file.get("id")
     file_name = file.get("name", "Unknown")
     mime_type = file.get("mimeType", "")
-    
+
     # Skip folders and shortcuts
     if should_skip_file(mime_type):
         return None, True
-    
+
     if not file_id:
         return None, True
-    
+
     # Generate unique identifier hash for this file
     unique_identifier_hash = generate_unique_identifier_hash(
         DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id
     )
-    
+
     # Check if document exists
     existing_document = await check_document_by_unique_identifier(
         session, unique_identifier_hash
     )
-    
+
     if existing_document:
         # Check if this is a rename-only update (content unchanged)
         incoming_md5 = file.get("md5Checksum")
@@ -775,7 +775,7 @@ async def _create_pending_document_for_file(
         doc_metadata = existing_document.document_metadata or {}
         stored_md5 = doc_metadata.get("md5_checksum")
         stored_modified_time = doc_metadata.get("modified_time")
-        
+
         # Determine if content changed
         content_unchanged = False
         if incoming_md5 and stored_md5:
@@ -783,16 +783,18 @@ async def _create_pending_document_for_file(
         elif not incoming_md5 and incoming_modified_time and stored_modified_time:
             # Google Workspace file - use modifiedTime as fallback
             content_unchanged = incoming_modified_time == stored_modified_time
-        
+
         if content_unchanged:
             # Ensure status is ready (might have been stuck in processing/pending)
-            if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+            if not DocumentStatus.is_state(
+                existing_document.status, DocumentStatus.READY
+            ):
                 existing_document.status = DocumentStatus.ready()
             return None, True
-        
+
         # Content changed - return existing document for update
         return existing_document, False
-    
+
     # Create new pending document
     document = Document(
         search_space_id=search_space_id,
@@ -815,7 +817,7 @@ async def _create_pending_document_for_file(
         connector_id=connector_id,
     )
     session.add(document)
-    
+
     return document, False
 
 
@@ -958,7 +960,7 @@ async def _process_single_file(
 ) -> tuple[int, int, int]:
     """
     Process a single file by downloading and using Surfsense's file processor.
-    
+
     Implements Phase 2 of the 2-phase document status update pattern.
     Updates document status: pending → processing → ready/failed
 
@@ -1042,12 +1044,13 @@ async def _process_single_file(
             processed_doc = await check_document_by_unique_identifier(
                 session, unique_identifier_hash
             )
-            if processed_doc:
-                # Ensure status is READY
-                if not DocumentStatus.is_state(processed_doc.status, DocumentStatus.READY):
-                    processed_doc.status = DocumentStatus.ready()
-                    processed_doc.updated_at = get_current_timestamp()
-                    await session.commit()
+            # Ensure status is READY
+            if processed_doc and not DocumentStatus.is_state(
+                processed_doc.status, DocumentStatus.READY
+            ):
+                processed_doc.status = DocumentStatus.ready()
+                processed_doc.updated_at = get_current_timestamp()
+                await session.commit()
 
         logger.info(f"Successfully indexed Google Drive file: {file_name}")
         return 1, 0, 0
@@ -1061,7 +1064,9 @@ async def _process_single_file(
                 pending_document.updated_at = get_current_timestamp()
                 await session.commit()
             except Exception as status_error:
-                logger.error(f"Failed to update document status to failed: {status_error}")
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
         return 0, 0, 1
 
 
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
index 89e8796d3..c7caee4da 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@@ -228,7 +228,9 @@ async def index_google_gmail_messages(
         documents_indexed = 0
         documents_skipped = 0
         documents_failed = 0  # Track messages that failed processing
-        duplicate_content_count = 0  # Track messages skipped due to duplicate content_hash
+        duplicate_content_count = (
+            0  # Track messages skipped due to duplicate content_hash
+        )
 
         # Heartbeat tracking - update notification periodically to prevent appearing stuck
         last_heartbeat_time = time.time()
@@ -294,23 +296,27 @@ async def index_google_gmail_messages(
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
                         # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
                             existing_document.status = DocumentStatus.ready()
                         documents_skipped += 1
                         continue
 
                     # Queue existing document for update (will be set to processing in Phase 2)
-                    messages_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'markdown_content': markdown_content,
-                        'content_hash': content_hash,
-                        'message_id': message_id,
-                        'thread_id': thread_id,
-                        'subject': subject,
-                        'sender': sender,
-                        'date_str': date_str,
-                    })
+                    messages_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "markdown_content": markdown_content,
+                            "content_hash": content_hash,
+                            "message_id": message_id,
+                            "thread_id": thread_id,
+                            "subject": subject,
+                            "sender": sender,
+                            "date_str": date_str,
+                        }
+                    )
                     continue
 
                 # Document doesn't exist by unique_identifier_hash
@@ -356,17 +362,19 @@ async def index_google_gmail_messages(
                 session.add(document)
                 new_documents_created = True
 
-                messages_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'markdown_content': markdown_content,
-                    'content_hash': content_hash,
-                    'message_id': message_id,
-                    'thread_id': thread_id,
-                    'subject': subject,
-                    'sender': sender,
-                    'date_str': date_str,
-                })
+                messages_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "markdown_content": markdown_content,
+                        "content_hash": content_hash,
+                        "message_id": message_id,
+                        "thread_id": thread_id,
+                        "subject": subject,
+                        "sender": sender,
+                        "date_str": date_str,
+                    }
+                )
 
             except Exception as e:
                 logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
@@ -375,7 +383,9 @@ async def index_google_gmail_messages(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -392,7 +402,7 @@ async def index_google_gmail_messages(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
@@ -405,16 +415,21 @@ async def index_google_gmail_messages(
 
                 if user_llm:
                     document_metadata_for_summary = {
-                        "message_id": item['message_id'],
-                        "thread_id": item['thread_id'],
-                        "subject": item['subject'],
-                        "sender": item['sender'],
-                        "date": item['date_str'],
+                        "message_id": item["message_id"],
+                        "thread_id": item["thread_id"],
+                        "subject": item["subject"],
+                        "sender": item["sender"],
+                        "date": item["date_str"],
                         "document_type": "Gmail Message",
                         "connector_type": "Google Gmail",
                     }
-                    summary_content, summary_embedding = await generate_document_summary(
-                        item['markdown_content'], user_llm, document_metadata_for_summary
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["markdown_content"],
+                        user_llm,
+                        document_metadata_for_summary,
                     )
                 else:
                     summary_content = f"Google Gmail Message: {item['subject']}\n\n"
@@ -424,19 +439,19 @@ async def index_google_gmail_messages(
                         summary_content
                     )
 
-                chunks = await create_document_chunks(item['markdown_content'])
+                chunks = await create_document_chunks(item["markdown_content"])
 
                 # Update document to READY with actual content
-                document.title = item['subject']
+                document.title = item["subject"]
                 document.content = summary_content
-                document.content_hash = item['content_hash']
+                document.content_hash = item["content_hash"]
                 document.embedding = summary_embedding
                 document.document_metadata = {
-                    "message_id": item['message_id'],
-                    "thread_id": item['thread_id'],
-                    "subject": item['subject'],
-                    "sender": item['sender'],
-                    "date": item['date_str'],
+                    "message_id": item["message_id"],
+                    "thread_id": item["thread_id"],
+                    "subject": item["subject"],
+                    "sender": item["sender"],
+                    "date": item["date_str"],
                     "connector_id": connector_id,
                 }
                 safe_set_chunks(document, chunks)
@@ -459,7 +474,9 @@ async def index_google_gmail_messages(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 documents_failed += 1
                 continue
 
diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
index 038df0f46..65f56ce46 100644
--- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
@@ -239,23 +239,27 @@ async def index_jira_issues(
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
                         # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
                             existing_document.status = DocumentStatus.ready()
                         documents_skipped += 1
                         continue
 
                     # Queue existing document for update (will be set to processing in Phase 2)
-                    issues_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'issue_content': issue_content,
-                        'content_hash': content_hash,
-                        'issue_id': issue_id,
-                        'issue_identifier': issue_identifier,
-                        'issue_title': issue_title,
-                        'formatted_issue': formatted_issue,
-                        'comment_count': comment_count,
-                    })
+                    issues_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "issue_content": issue_content,
+                            "content_hash": content_hash,
+                            "issue_id": issue_id,
+                            "issue_identifier": issue_identifier,
+                            "issue_title": issue_title,
+                            "formatted_issue": formatted_issue,
+                            "comment_count": comment_count,
+                        }
+                    )
                     continue
 
                 # Document doesn't exist by unique_identifier_hash
@@ -301,17 +305,19 @@ async def index_jira_issues(
                 session.add(document)
                 new_documents_created = True
 
-                issues_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'issue_content': issue_content,
-                    'content_hash': content_hash,
-                    'issue_id': issue_id,
-                    'issue_identifier': issue_identifier,
-                    'issue_title': issue_title,
-                    'formatted_issue': formatted_issue,
-                    'comment_count': comment_count,
-                })
+                issues_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "issue_content": issue_content,
+                        "content_hash": content_hash,
+                        "issue_id": issue_id,
+                        "issue_identifier": issue_identifier,
+                        "issue_title": issue_title,
+                        "formatted_issue": formatted_issue,
+                        "comment_count": comment_count,
+                    }
+                )
 
             except Exception as e:
                 logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
@@ -320,7 +326,9 @@ async def index_jira_issues(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -337,7 +345,7 @@ async def index_jira_issues(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
@@ -350,11 +358,11 @@ async def index_jira_issues(
 
                 if user_llm:
                     document_metadata = {
-                        "issue_key": item['issue_identifier'],
-                        "issue_title": item['issue_title'],
-                        "status": item['formatted_issue'].get("status", "Unknown"),
-                        "priority": item['formatted_issue'].get("priority", "Unknown"),
-                        "comment_count": item['comment_count'],
+                        "issue_key": item["issue_identifier"],
+                        "issue_title": item["issue_title"],
+                        "status": item["formatted_issue"].get("status", "Unknown"),
+                        "priority": item["formatted_issue"].get("priority", "Unknown"),
+                        "comment_count": item["comment_count"],
                         "document_type": "Jira Issue",
                         "connector_type": "Jira",
                     }
@@ -362,34 +370,32 @@ async def index_jira_issues(
                         summary_content,
                         summary_embedding,
                     ) = await generate_document_summary(
-                        item['issue_content'], user_llm, document_metadata
+                        item["issue_content"], user_llm, document_metadata
                     )
                 else:
                     # Fallback to simple summary if no LLM configured
                     summary_content = f"Jira Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['formatted_issue'].get('status', 'Unknown')}\n\n"
-                    if item['formatted_issue'].get("description"):
-                        summary_content += (
-                            f"Description: {item['formatted_issue'].get('description')}\n\n"
-                        )
+                    if item["formatted_issue"].get("description"):
+                        summary_content += f"Description: {item['formatted_issue'].get('description')}\n\n"
                     summary_content += f"Comments: {item['comment_count']}"
                     summary_embedding = config.embedding_model_instance.embed(
                         summary_content
                     )
 
                 # Process chunks - using the full issue content with comments
-                chunks = await create_document_chunks(item['issue_content'])
+                chunks = await create_document_chunks(item["issue_content"])
 
                 # Update document to READY with actual content
                 document.title = f"{item['issue_identifier']}: {item['issue_title']}"
                 document.content = summary_content
-                document.content_hash = item['content_hash']
+                document.content_hash = item["content_hash"]
                 document.embedding = summary_embedding
                 document.document_metadata = {
-                    "issue_id": item['issue_id'],
-                    "issue_identifier": item['issue_identifier'],
-                    "issue_title": item['issue_title'],
-                    "state": item['formatted_issue'].get("status", "Unknown"),
-                    "comment_count": item['comment_count'],
+                    "issue_id": item["issue_id"],
+                    "issue_identifier": item["issue_identifier"],
+                    "issue_title": item["issue_title"],
+                    "state": item["formatted_issue"].get("status", "Unknown"),
+                    "comment_count": item["comment_count"],
                     "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                     "connector_id": connector_id,
                 }
@@ -416,7 +422,9 @@ async def index_jira_issues(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 documents_failed += 1
                 continue  # Skip this issue and continue with others
 
diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
index 45e1e357a..87bafb3c0 100644
--- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
@@ -272,7 +272,9 @@ async def index_linear_issues(
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
                         # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
                             existing_document.status = DocumentStatus.ready()
                         logger.info(
                             f"Document for Linear issue {issue_identifier} unchanged. Skipping."
@@ -281,19 +283,21 @@ async def index_linear_issues(
                         continue
 
                     # Queue existing document for update (will be set to processing in Phase 2)
-                    issues_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'issue_content': issue_content,
-                        'content_hash': content_hash,
-                        'issue_id': issue_id,
-                        'issue_identifier': issue_identifier,
-                        'issue_title': issue_title,
-                        'state': state,
-                        'description': description,
-                        'comment_count': comment_count,
-                        'priority': priority,
-                    })
+                    issues_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "issue_content": issue_content,
+                            "content_hash": content_hash,
+                            "issue_id": issue_id,
+                            "issue_identifier": issue_identifier,
+                            "issue_title": issue_title,
+                            "state": state,
+                            "description": description,
+                            "comment_count": comment_count,
+                            "priority": priority,
+                        }
+                    )
                     continue
 
                 # Document doesn't exist by unique_identifier_hash
@@ -338,19 +342,21 @@ async def index_linear_issues(
                 session.add(document)
                 new_documents_created = True
 
-                issues_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'issue_content': issue_content,
-                    'content_hash': content_hash,
-                    'issue_id': issue_id,
-                    'issue_identifier': issue_identifier,
-                    'issue_title': issue_title,
-                    'state': state,
-                    'description': description,
-                    'comment_count': comment_count,
-                    'priority': priority,
-                })
+                issues_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "issue_content": issue_content,
+                        "content_hash": content_hash,
+                        "issue_id": issue_id,
+                        "issue_identifier": issue_identifier,
+                        "issue_title": issue_title,
+                        "state": state,
+                        "description": description,
+                        "comment_count": comment_count,
+                        "priority": priority,
+                    }
+                )
 
             except Exception as e:
                 logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
@@ -359,7 +365,9 @@ async def index_linear_issues(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -376,7 +384,7 @@ async def index_linear_issues(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
@@ -389,20 +397,23 @@ async def index_linear_issues(
 
                 if user_llm:
                     document_metadata_for_summary = {
-                        "issue_id": item['issue_identifier'],
-                        "issue_title": item['issue_title'],
-                        "state": item['state'],
-                        "priority": item['priority'],
-                        "comment_count": item['comment_count'],
+                        "issue_id": item["issue_identifier"],
+                        "issue_title": item["issue_title"],
+                        "state": item["state"],
+                        "priority": item["priority"],
+                        "comment_count": item["comment_count"],
                         "document_type": "Linear Issue",
                         "connector_type": "Linear",
                     }
-                    summary_content, summary_embedding = await generate_document_summary(
-                        item['issue_content'], user_llm, document_metadata_for_summary
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["issue_content"], user_llm, document_metadata_for_summary
                     )
                 else:
                     # Fallback to simple summary if no LLM configured
-                    description = item['description']
+                    description = item["description"]
                     if description and len(description) > 1000:
                         description = description[:997] + "..."
                     summary_content = f"Linear Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['state']}\n\n"
@@ -413,19 +424,19 @@ async def index_linear_issues(
                         summary_content
                     )
 
-                chunks = await create_document_chunks(item['issue_content'])
+                chunks = await create_document_chunks(item["issue_content"])
 
                 # Update document to READY with actual content
                 document.title = f"{item['issue_identifier']}: {item['issue_title']}"
                 document.content = summary_content
-                document.content_hash = item['content_hash']
+                document.content_hash = item["content_hash"]
                 document.embedding = summary_embedding
                 document.document_metadata = {
-                    "issue_id": item['issue_id'],
-                    "issue_identifier": item['issue_identifier'],
-                    "issue_title": item['issue_title'],
-                    "state": item['state'],
-                    "comment_count": item['comment_count'],
+                    "issue_id": item["issue_id"],
+                    "issue_identifier": item["issue_identifier"],
+                    "issue_title": item["issue_title"],
+                    "state": item["state"],
+                    "comment_count": item["comment_count"],
                     "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                     "connector_id": connector_id,
                 }
@@ -452,7 +463,9 @@ async def index_linear_issues(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 skipped_issues.append(
                     f"{item.get('issue_identifier', 'Unknown')} (processing error)"
                 )
@@ -466,7 +479,9 @@ async def index_linear_issues(
         logger.info(f"Final commit: Total {documents_indexed} Linear issues processed")
         try:
             await session.commit()
-            logger.info("Successfully committed all Linear document changes to database")
+            logger.info(
+                "Successfully committed all Linear document changes to database"
+            )
         except Exception as e:
             # Handle any remaining integrity errors gracefully (race conditions, etc.)
             if (
diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
index 80d4ef3cf..04af80e53 100644
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@@ -305,7 +305,9 @@ async def index_luma_events(
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
                         # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
                             existing_document.status = DocumentStatus.ready()
                         logger.info(
                             f"Document for Luma event {event_name} unchanged. Skipping."
@@ -314,23 +316,25 @@ async def index_luma_events(
                         continue
 
                     # Queue existing document for update (will be set to processing in Phase 2)
-                    events_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'event_id': event_id,
-                        'event_name': event_name,
-                        'event_url': event_url,
-                        'event_markdown': event_markdown,
-                        'content_hash': content_hash,
-                        'start_at': start_at,
-                        'end_at': end_at,
-                        'timezone': timezone,
-                        'location': location,
-                        'city': city,
-                        'host_names': host_names,
-                        'description': description,
-                        'cover_url': cover_url,
-                    })
+                    events_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "event_id": event_id,
+                            "event_name": event_name,
+                            "event_url": event_url,
+                            "event_markdown": event_markdown,
+                            "content_hash": content_hash,
+                            "start_at": start_at,
+                            "end_at": end_at,
+                            "timezone": timezone,
+                            "location": location,
+                            "city": city,
+                            "host_names": host_names,
+                            "description": description,
+                            "cover_url": cover_url,
+                        }
+                    )
                     continue
 
                 # Document doesn't exist by unique_identifier_hash
@@ -380,23 +384,25 @@ async def index_luma_events(
                 session.add(document)
                 new_documents_created = True
 
-                events_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'event_id': event_id,
-                    'event_name': event_name,
-                    'event_url': event_url,
-                    'event_markdown': event_markdown,
-                    'content_hash': content_hash,
-                    'start_at': start_at,
-                    'end_at': end_at,
-                    'timezone': timezone,
-                    'location': location,
-                    'city': city,
-                    'host_names': host_names,
-                    'description': description,
-                    'cover_url': cover_url,
-                })
+                events_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "event_id": event_id,
+                        "event_name": event_name,
+                        "event_url": event_url,
+                        "event_markdown": event_markdown,
+                        "content_hash": content_hash,
+                        "start_at": start_at,
+                        "end_at": end_at,
+                        "timezone": timezone,
+                        "location": location,
+                        "city": city,
+                        "host_names": host_names,
+                        "description": description,
+                        "cover_url": cover_url,
+                    }
+                )
 
             except Exception as e:
                 logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
@@ -405,7 +411,9 @@ async def index_luma_events(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -422,7 +430,7 @@ async def index_luma_events(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
@@ -435,15 +443,15 @@ async def index_luma_events(
 
                 if user_llm:
                     document_metadata_for_summary = {
-                        "event_id": item['event_id'],
-                        "event_name": item['event_name'],
-                        "event_url": item['event_url'],
-                        "start_at": item['start_at'],
-                        "end_at": item['end_at'],
-                        "timezone": item['timezone'],
-                        "location": item['location'] or "No location",
-                        "city": item['city'],
-                        "hosts": item['host_names'],
+                        "event_id": item["event_id"],
+                        "event_name": item["event_name"],
+                        "event_url": item["event_url"],
+                        "start_at": item["start_at"],
+                        "end_at": item["end_at"],
+                        "timezone": item["timezone"],
+                        "location": item["location"] or "No location",
+                        "city": item["city"],
+                        "hosts": item["host_names"],
                         "document_type": "Luma Event",
                         "connector_type": "Luma",
                     }
@@ -451,26 +459,26 @@ async def index_luma_events(
                         summary_content,
                         summary_embedding,
                     ) = await generate_document_summary(
-                        item['event_markdown'], user_llm, document_metadata_for_summary
+                        item["event_markdown"], user_llm, document_metadata_for_summary
                     )
                 else:
                     # Fallback to simple summary if no LLM configured
                     summary_content = f"Luma Event: {item['event_name']}\n\n"
-                    if item['event_url']:
+                    if item["event_url"]:
                         summary_content += f"URL: {item['event_url']}\n"
                     summary_content += f"Start: {item['start_at']}\n"
                     summary_content += f"End: {item['end_at']}\n"
-                    if item['timezone']:
+                    if item["timezone"]:
                         summary_content += f"Timezone: {item['timezone']}\n"
-                    if item['location']:
+                    if item["location"]:
                         summary_content += f"Location: {item['location']}\n"
-                    if item['city']:
+                    if item["city"]:
                         summary_content += f"City: {item['city']}\n"
-                    if item['host_names']:
+                    if item["host_names"]:
                         summary_content += f"Hosts: {item['host_names']}\n"
-                    if item['description']:
-                        desc_preview = item['description'][:1000]
-                        if len(item['description']) > 1000:
+                    if item["description"]:
+                        desc_preview = item["description"][:1000]
+                        if len(item["description"]) > 1000:
                             desc_preview += "..."
                         summary_content += f"Description: {desc_preview}\n"
 
@@ -478,24 +486,24 @@ async def index_luma_events(
                         summary_content
                     )
 
-                chunks = await create_document_chunks(item['event_markdown'])
+                chunks = await create_document_chunks(item["event_markdown"])
 
                 # Update document to READY with actual content
-                document.title = item['event_name']
+                document.title = item["event_name"]
                 document.content = summary_content
-                document.content_hash = item['content_hash']
+                document.content_hash = item["content_hash"]
                 document.embedding = summary_embedding
                 document.document_metadata = {
-                    "event_id": item['event_id'],
-                    "event_name": item['event_name'],
-                    "event_url": item['event_url'],
-                    "start_at": item['start_at'],
-                    "end_at": item['end_at'],
-                    "timezone": item['timezone'],
-                    "location": item['location'],
-                    "city": item['city'],
-                    "hosts": item['host_names'],
-                    "cover_url": item['cover_url'],
+                    "event_id": item["event_id"],
+                    "event_name": item["event_name"],
+                    "event_url": item["event_url"],
+                    "start_at": item["start_at"],
+                    "end_at": item["end_at"],
+                    "timezone": item["timezone"],
+                    "location": item["location"],
+                    "city": item["city"],
+                    "hosts": item["host_names"],
+                    "cover_url": item["cover_url"],
                     "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                     "connector_id": connector_id,
                 }
@@ -522,7 +530,9 @@ async def index_luma_events(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 skipped_events.append(
                     f"{item.get('event_name', 'Unknown')} (processing error)"
                 )
diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
index 37927b779..52704e173 100644
--- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
@@ -354,20 +354,24 @@ async def index_notion_pages(
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
                         # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
                             existing_document.status = DocumentStatus.ready()
                         documents_skipped += 1
                         continue
 
                     # Queue existing document for update (will be set to processing in Phase 2)
-                    pages_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'markdown_content': markdown_content,
-                        'content_hash': content_hash,
-                        'page_id': page_id,
-                        'page_title': page_title,
-                    })
+                    pages_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "markdown_content": markdown_content,
+                            "content_hash": content_hash,
+                            "page_id": page_id,
+                            "page_title": page_title,
+                        }
+                    )
                     continue
 
                 # Document doesn't exist by unique_identifier_hash
@@ -410,14 +414,16 @@ async def index_notion_pages(
                 session.add(document)
                 new_documents_created = True
 
-                pages_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'markdown_content': markdown_content,
-                    'content_hash': content_hash,
-                    'page_id': page_id,
-                    'page_title': page_title,
-                })
+                pages_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "markdown_content": markdown_content,
+                        "content_hash": content_hash,
+                        "page_id": page_id,
+                        "page_title": page_title,
+                    }
+                )
 
             except Exception as e:
                 logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
@@ -426,7 +432,9 @@ async def index_notion_pages(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -443,7 +451,7 @@ async def index_notion_pages(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
@@ -456,13 +464,18 @@ async def index_notion_pages(
 
                 if user_llm:
                     document_metadata_for_summary = {
-                        "page_title": item['page_title'],
-                        "page_id": item['page_id'],
+                        "page_title": item["page_title"],
+                        "page_id": item["page_id"],
                         "document_type": "Notion Page",
                         "connector_type": "Notion",
                     }
-                    summary_content, summary_embedding = await generate_document_summary(
-                        item['markdown_content'], user_llm, document_metadata_for_summary
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["markdown_content"],
+                        user_llm,
+                        document_metadata_for_summary,
                     )
                 else:
                     # Fallback to simple summary if no LLM configured
@@ -471,16 +484,16 @@ async def index_notion_pages(
                         summary_content
                     )
 
-                chunks = await create_document_chunks(item['markdown_content'])
+                chunks = await create_document_chunks(item["markdown_content"])
 
                 # Update document to READY with actual content
-                document.title = item['page_title']
+                document.title = item["page_title"]
                 document.content = summary_content
-                document.content_hash = item['content_hash']
+                document.content_hash = item["content_hash"]
                 document.embedding = summary_embedding
                 document.document_metadata = {
-                    "page_title": item['page_title'],
-                    "page_id": item['page_id'],
+                    "page_title": item["page_title"],
+                    "page_id": item["page_id"],
                     "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                     "connector_id": connector_id,
                 }
@@ -504,7 +517,9 @@ async def index_notion_pages(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 skipped_pages.append(f"{item['page_title']} (processing error)")
                 documents_failed += 1
                 continue
diff --git a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py
index 0e6934e2c..6dea1a730 100644
--- a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py
@@ -382,27 +382,31 @@ async def index_obsidian_vault(
                     # Document exists - check if content has changed
                     if existing_document.content_hash == content_hash:
                         # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
                             existing_document.status = DocumentStatus.ready()
                         logger.debug(f"Note {title} unchanged, skipping")
                         skipped_count += 1
                         continue
 
                     # Queue existing document for update (will be set to processing in Phase 2)
-                    files_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'file_info': file_info,
-                        'content': content,
-                        'body_content': body_content,
-                        'frontmatter': frontmatter,
-                        'wiki_links': wiki_links,
-                        'tags': tags,
-                        'title': title,
-                        'relative_path': relative_path,
-                        'content_hash': content_hash,
-                        'unique_identifier_hash': unique_identifier_hash,
-                    })
+                    files_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "file_info": file_info,
+                            "content": content,
+                            "body_content": body_content,
+                            "frontmatter": frontmatter,
+                            "wiki_links": wiki_links,
+                            "tags": tags,
+                            "title": title,
+                            "relative_path": relative_path,
+                            "content_hash": content_hash,
+                            "unique_identifier_hash": unique_identifier_hash,
+                        }
+                    )
                     continue
 
                 # Document doesn't exist by unique_identifier_hash
@@ -445,20 +449,22 @@ async def index_obsidian_vault(
                 session.add(document)
                 new_documents_created = True
 
-                files_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'file_info': file_info,
-                    'content': content,
-                    'body_content': body_content,
-                    'frontmatter': frontmatter,
-                    'wiki_links': wiki_links,
-                    'tags': tags,
-                    'title': title,
-                    'relative_path': relative_path,
-                    'content_hash': content_hash,
-                    'unique_identifier_hash': unique_identifier_hash,
-                })
+                files_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "file_info": file_info,
+                        "content": content,
+                        "body_content": body_content,
+                        "frontmatter": frontmatter,
+                        "wiki_links": wiki_links,
+                        "tags": tags,
+                        "title": title,
+                        "relative_path": relative_path,
+                        "content_hash": content_hash,
+                        "unique_identifier_hash": unique_identifier_hash,
+                    }
+                )
 
             except Exception as e:
                 logger.exception(
@@ -469,7 +475,9 @@ async def index_obsidian_vault(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -491,22 +499,22 @@ async def index_obsidian_vault(
                     await on_heartbeat_callback(indexed_count)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
                 await session.commit()
 
                 # Extract data from item
-                title = item['title']
-                relative_path = item['relative_path']
-                content = item['content']
-                body_content = item['body_content']
-                frontmatter = item['frontmatter']
-                wiki_links = item['wiki_links']
-                tags = item['tags']
-                content_hash = item['content_hash']
-                file_info = item['file_info']
+                title = item["title"]
+                relative_path = item["relative_path"]
+                content = item["content"]
+                body_content = item["body_content"]
+                frontmatter = item["frontmatter"]
+                wiki_links = item["wiki_links"]
+                tags = item["tags"]
+                content_hash = item["content_hash"]
+                file_info = item["file_info"]
 
                 # Build metadata
                 document_metadata = {
@@ -584,7 +592,9 @@ async def index_obsidian_vault(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 failed_count += 1
                 continue
 
@@ -592,9 +602,7 @@ async def index_obsidian_vault(
         await update_connector_last_indexed(session, connector, update_last_indexed)
 
         # Final commit for any remaining documents not yet committed in batches
-        logger.info(
-            f"Final commit: Total {indexed_count} Obsidian notes processed"
-        )
+        logger.info(f"Final commit: Total {indexed_count} Obsidian notes processed")
         try:
             await session.commit()
             logger.info(
diff --git a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
index 61faa39b3..111552fa6 100644
--- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
@@ -314,7 +314,9 @@ async def index_slack_messages(
                         # Document exists - check if content has changed
                         if existing_document.content_hash == content_hash:
                             # Ensure status is ready (might have been stuck in processing/pending)
-                            if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                            if not DocumentStatus.is_state(
+                                existing_document.status, DocumentStatus.READY
+                            ):
                                 existing_document.status = DocumentStatus.ready()
                             logger.info(
                                 f"Document for Slack message {msg_ts} in channel {channel_name} unchanged. Skipping."
@@ -323,18 +325,20 @@ async def index_slack_messages(
                             continue
 
                         # Queue existing document for update (will be set to processing in Phase 2)
-                        messages_to_process.append({
-                            'document': existing_document,
-                            'is_new': False,
-                            'combined_document_string': combined_document_string,
-                            'content_hash': content_hash,
-                            'channel_name': channel_name,
-                            'channel_id': channel_id,
-                            'msg_ts': msg_ts,
-                            'start_date': start_date_str,
-                            'end_date': end_date_str,
-                            'message_count': len(formatted_messages),
-                        })
+                        messages_to_process.append(
+                            {
+                                "document": existing_document,
+                                "is_new": False,
+                                "combined_document_string": combined_document_string,
+                                "content_hash": content_hash,
+                                "channel_name": channel_name,
+                                "channel_id": channel_id,
+                                "msg_ts": msg_ts,
+                                "start_date": start_date_str,
+                                "end_date": end_date_str,
+                                "message_count": len(formatted_messages),
+                            }
+                        )
                         continue
 
                     # Document doesn't exist by unique_identifier_hash
@@ -377,18 +381,20 @@ async def index_slack_messages(
                     session.add(document)
                     new_documents_created = True
 
-                    messages_to_process.append({
-                        'document': document,
-                        'is_new': True,
-                        'combined_document_string': combined_document_string,
-                        'content_hash': content_hash,
-                        'channel_name': channel_name,
-                        'channel_id': channel_id,
-                        'msg_ts': msg_ts,
-                        'start_date': start_date_str,
-                        'end_date': end_date_str,
-                        'message_count': len(formatted_messages),
-                    })
+                    messages_to_process.append(
+                        {
+                            "document": document,
+                            "is_new": True,
+                            "combined_document_string": combined_document_string,
+                            "content_hash": content_hash,
+                            "channel_name": channel_name,
+                            "channel_id": channel_id,
+                            "msg_ts": msg_ts,
+                            "start_date": start_date_str,
+                            "end_date": end_date_str,
+                            "message_count": len(formatted_messages),
+                        }
+                    )
 
                 logger.info(
                     f"Phase 1: Collected {len(formatted_messages)} messages from channel {channel_name}"
@@ -409,7 +415,9 @@ async def index_slack_messages(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -426,29 +434,29 @@ async def index_slack_messages(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
                 await session.commit()
 
                 # Heavy processing (embeddings, chunks)
-                chunks = await create_document_chunks(item['combined_document_string'])
+                chunks = await create_document_chunks(item["combined_document_string"])
                 doc_embedding = config.embedding_model_instance.embed(
-                    item['combined_document_string']
+                    item["combined_document_string"]
                 )
 
                 # Update document to READY with actual content
-                document.title = item['channel_name']
-                document.content = item['combined_document_string']
-                document.content_hash = item['content_hash']
+                document.title = item["channel_name"]
+                document.content = item["combined_document_string"]
+                document.content_hash = item["content_hash"]
                 document.embedding = doc_embedding
                 document.document_metadata = {
-                    "channel_name": item['channel_name'],
-                    "channel_id": item['channel_id'],
-                    "start_date": item['start_date'],
-                    "end_date": item['end_date'],
-                    "message_count": item['message_count'],
+                    "channel_name": item["channel_name"],
+                    "channel_id": item["channel_id"],
+                    "start_date": item["start_date"],
+                    "end_date": item["end_date"],
+                    "message_count": item["message_count"],
                     "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                     "connector_id": connector_id,
                 }
@@ -475,7 +483,9 @@ async def index_slack_messages(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 documents_failed += 1
                 continue
 
diff --git a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
index 27259fd6f..1b13a2c37 100644
--- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py
@@ -332,25 +332,31 @@ async def index_teams_messages(
                                 # Document exists - check if content has changed
                                 if existing_document.content_hash == content_hash:
                                     # Ensure status is ready (might have been stuck in processing/pending)
-                                    if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
-                                        existing_document.status = DocumentStatus.ready()
+                                    if not DocumentStatus.is_state(
+                                        existing_document.status, DocumentStatus.READY
+                                    ):
+                                        existing_document.status = (
+                                            DocumentStatus.ready()
+                                        )
                                     documents_skipped += 1
                                     continue
 
                                 # Queue existing document for update (will be set to processing in Phase 2)
-                                messages_to_process.append({
-                                    'document': existing_document,
-                                    'is_new': False,
-                                    'combined_document_string': combined_document_string,
-                                    'content_hash': content_hash,
-                                    'team_name': team_name,
-                                    'team_id': team_id,
-                                    'channel_name': channel_name,
-                                    'channel_id': channel_id,
-                                    'message_id': message_id,
-                                    'start_date': start_date_str,
-                                    'end_date': end_date_str,
-                                })
+                                messages_to_process.append(
+                                    {
+                                        "document": existing_document,
+                                        "is_new": False,
+                                        "combined_document_string": combined_document_string,
+                                        "content_hash": content_hash,
+                                        "team_name": team_name,
+                                        "team_id": team_id,
+                                        "channel_name": channel_name,
+                                        "channel_id": channel_id,
+                                        "message_id": message_id,
+                                        "start_date": start_date_str,
+                                        "end_date": end_date_str,
+                                    }
+                                )
                                 continue
 
                             # Document doesn't exist by unique_identifier_hash
@@ -400,19 +406,21 @@ async def index_teams_messages(
                             session.add(document)
                             new_documents_created = True
 
-                            messages_to_process.append({
-                                'document': document,
-                                'is_new': True,
-                                'combined_document_string': combined_document_string,
-                                'content_hash': content_hash,
-                                'team_name': team_name,
-                                'team_id': team_id,
-                                'channel_name': channel_name,
-                                'channel_id': channel_id,
-                                'message_id': message_id,
-                                'start_date': start_date_str,
-                                'end_date': end_date_str,
-                            })
+                            messages_to_process.append(
+                                {
+                                    "document": document,
+                                    "is_new": True,
+                                    "combined_document_string": combined_document_string,
+                                    "content_hash": content_hash,
+                                    "team_name": team_name,
+                                    "team_id": team_id,
+                                    "channel_name": channel_name,
+                                    "channel_id": channel_id,
+                                    "message_id": message_id,
+                                    "start_date": start_date_str,
+                                    "end_date": end_date_str,
+                                }
+                            )
 
                     except Exception as e:
                         logger.error(
@@ -432,7 +440,9 @@ async def index_teams_messages(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -449,30 +459,30 @@ async def index_teams_messages(
                     await on_heartbeat_callback(documents_indexed)
                     last_heartbeat_time = current_time
 
-            document = item['document']
+            document = item["document"]
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                 document.status = DocumentStatus.processing()
                 await session.commit()
 
                 # Heavy processing (embeddings, chunks)
-                chunks = await create_document_chunks(item['combined_document_string'])
+                chunks = await create_document_chunks(item["combined_document_string"])
                 doc_embedding = config.embedding_model_instance.embed(
-                    item['combined_document_string']
+                    item["combined_document_string"]
                 )
 
                 # Update document to READY with actual content
                 document.title = f"{item['team_name']} - {item['channel_name']}"
-                document.content = item['combined_document_string']
-                document.content_hash = item['content_hash']
+                document.content = item["combined_document_string"]
+                document.content_hash = item["content_hash"]
                 document.embedding = doc_embedding
                 document.document_metadata = {
-                    "team_name": item['team_name'],
-                    "team_id": item['team_id'],
-                    "channel_name": item['channel_name'],
-                    "channel_id": item['channel_id'],
-                    "start_date": item['start_date'],
-                    "end_date": item['end_date'],
+                    "team_name": item["team_name"],
+                    "team_id": item["team_id"],
+                    "channel_name": item["channel_name"],
+                    "channel_id": item["channel_id"],
+                    "start_date": item["start_date"],
+                    "end_date": item["end_date"],
                     "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                     "connector_id": connector_id,
                 }
@@ -497,7 +507,9 @@ async def index_teams_messages(
                     document.status = DocumentStatus.failed(str(e))
                     document.updated_at = get_current_timestamp()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 documents_failed += 1
                 continue
 
@@ -510,9 +522,7 @@ async def index_teams_messages(
         )
         try:
             await session.commit()
-            logger.info(
-                "Successfully committed all Teams document changes to database"
-            )
+            logger.info("Successfully committed all Teams document changes to database")
         except Exception as e:
             # Handle any remaining integrity errors gracefully (race conditions, etc.)
             if (
diff --git a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
index 5d25b4623..5b3fa02b0 100644
--- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
@@ -184,22 +184,28 @@ async def index_crawled_urls(
 
                 if existing_document:
                     # Document exists - check if it's already being processed
-                    if DocumentStatus.is_state(existing_document.status, DocumentStatus.PENDING):
+                    if DocumentStatus.is_state(
+                        existing_document.status, DocumentStatus.PENDING
+                    ):
                         logger.info(f"URL {url} already pending. Skipping.")
                         documents_skipped += 1
                         continue
-                    if DocumentStatus.is_state(existing_document.status, DocumentStatus.PROCESSING):
+                    if DocumentStatus.is_state(
+                        existing_document.status, DocumentStatus.PROCESSING
+                    ):
                         logger.info(f"URL {url} already processing. Skipping.")
                         documents_skipped += 1
                         continue
 
                     # Queue existing document for potential update check
-                    urls_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'url': url,
-                        'unique_identifier_hash': unique_identifier_hash,
-                    })
+                    urls_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "url": url,
+                            "unique_identifier_hash": unique_identifier_hash,
+                        }
+                    )
                     continue
 
                 # Create new document with PENDING status (visible in UI immediately)
@@ -224,12 +230,14 @@ async def index_crawled_urls(
                 session.add(document)
                 new_documents_created = True
 
-                urls_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'url': url,
-                    'unique_identifier_hash': unique_identifier_hash,
-                })
+                urls_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "url": url,
+                        "unique_identifier_hash": unique_identifier_hash,
+                    }
+                )
 
             except Exception as e:
                 logger.error(f"Error in Phase 1 for URL {url}: {e!s}", exc_info=True)
@@ -238,7 +246,9 @@ async def index_crawled_urls(
 
         # Commit all pending documents - they all appear in UI now
         if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([u for u in urls_to_process if u['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([u for u in urls_to_process if u['is_new']])} pending documents"
+            )
             await session.commit()
 
         # =======================================================================
@@ -255,9 +265,9 @@ async def index_crawled_urls(
                     await on_heartbeat_callback(documents_indexed + documents_updated)
                     last_heartbeat_time = current_time
 
-            document = item['document']
-            url = item['url']
-            is_new = item['is_new']
+            document = item["document"]
+            url = item["url"]
+            is_new = item["is_new"]
 
             try:
                 # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
@@ -298,7 +308,9 @@ async def index_crawled_urls(
                     continue
 
                 # Format content as structured document for summary generation
-                structured_document = crawler.format_to_structured_document(crawl_result)
+                structured_document = crawler.format_to_structured_document(
+                    crawl_result
+                )
 
                 # Generate content hash using a version WITHOUT metadata
                 structured_document_for_hash = crawler.format_to_structured_document(
@@ -339,7 +351,9 @@ async def index_crawled_urls(
                             f"(existing document ID: {duplicate_by_content.id}). "
                             f"Marking as failed."
                         )
-                        document.status = DocumentStatus.failed("Duplicate content exists")
+                        document.status = DocumentStatus.failed(
+                            "Duplicate content exists"
+                        )
                         document.updated_at = get_current_timestamp()
                         await session.commit()
                         duplicate_content_count += 1
@@ -360,7 +374,10 @@ async def index_crawled_urls(
                         "document_type": "Crawled URL",
                         "crawler_type": crawler_type,
                     }
-                    summary_content, summary_embedding = await generate_document_summary(
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
                         structured_document, user_llm, document_metadata_for_summary
                     )
                 else:
@@ -423,7 +440,9 @@ async def index_crawled_urls(
                     document.updated_at = get_current_timestamp()
                     await session.commit()
                 except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                 documents_failed += 1
                 continue
 
@@ -438,7 +457,9 @@ async def index_crawled_urls(
         )
         try:
             await session.commit()
-            logger.info("Successfully committed all webcrawler document changes to database")
+            logger.info(
+                "Successfully committed all webcrawler document changes to database"
+            )
         except Exception as e:
             # Handle any remaining integrity errors gracefully
             if "duplicate key value violates unique constraint" in str(e).lower():
diff --git a/surfsense_backend/app/tasks/document_processors/base.py b/surfsense_backend/app/tasks/document_processors/base.py
index c8046868c..2047ec63d 100644
--- a/surfsense_backend/app/tasks/document_processors/base.py
+++ b/surfsense_backend/app/tasks/document_processors/base.py
@@ -17,29 +17,30 @@ md = MarkdownifyTransformer()
 def safe_set_chunks(document: Document, chunks: list) -> None:
     """
     Safely assign chunks to a document without triggering lazy loading.
-    
+
     ALWAYS use this instead of `document.chunks = chunks` to avoid
     SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
-    
+
     Why this is needed:
     - Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
       load the OLD chunks first (for comparison/orphan detection)
     - This lazy loading fails in async context with asyncpg driver
     - set_committed_value bypasses this by setting the value directly
-    
+
     This function is safe regardless of how the document was loaded
     (with or without selectinload).
-    
+
     Args:
         document: The Document object to update
         chunks: List of Chunk objects to assign
-    
+
     Example:
         # Instead of: document.chunks = chunks (DANGEROUS!)
         safe_set_chunks(document, chunks)  # Always safe
     """
     from sqlalchemy.orm.attributes import set_committed_value
-    set_committed_value(document, 'chunks', chunks)
+
+    set_committed_value(document, "chunks", chunks)
 
 
 def get_current_timestamp() -> datetime:
diff --git a/surfsense_backend/app/tasks/document_processors/circleback_processor.py b/surfsense_backend/app/tasks/document_processors/circleback_processor.py
index e9c395c83..a513bcaf0 100644
--- a/surfsense_backend/app/tasks/document_processors/circleback_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/circleback_processor.py
@@ -91,7 +91,9 @@ async def add_circleback_meeting_document(
             # Document exists - check if content has changed
             if existing_document.content_hash == content_hash:
                 # Ensure status is ready (might have been stuck in processing/pending)
-                if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                if not DocumentStatus.is_state(
+                    existing_document.status, DocumentStatus.READY
+                ):
                     existing_document.status = DocumentStatus.ready()
                     await session.commit()
                 logger.info(f"Circleback meeting {meeting_id} unchanged. Skipping.")
@@ -110,7 +112,7 @@ async def add_circleback_meeting_document(
             # PHASE 1: Create document with PENDING status
             # This makes the document visible in the UI immediately
             # =======================================================================
-            
+
             # Fetch the user who set up the Circleback connector (preferred)
             # or fall back to search space owner if no connector found
             created_by_user_id = None
@@ -173,7 +175,7 @@ async def add_circleback_meeting_document(
         # =======================================================================
         # PHASE 3: Process the document content
         # =======================================================================
-        
+
         # Get LLM for generating summary
         llm = await get_document_summary_llm(session, search_space_id)
         if not llm:
@@ -243,7 +245,7 @@ async def add_circleback_meeting_document(
 
         await session.commit()
         await session.refresh(document)
-        
+
         if existing_document:
             logger.info(
                 f"Updated Circleback meeting document {meeting_id} in search space {search_space_id}"
@@ -267,7 +269,9 @@ async def add_circleback_meeting_document(
                 document.updated_at = get_current_timestamp()
                 await session.commit()
             except Exception as status_error:
-                logger.error(f"Failed to update document status to failed: {status_error}")
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
         raise db_error
     except Exception as e:
         await session.rollback()
@@ -279,5 +283,7 @@ async def add_circleback_meeting_document(
                 document.updated_at = get_current_timestamp()
                 await session.commit()
             except Exception as status_error:
-                logger.error(f"Failed to update document status to failed: {status_error}")
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
         raise RuntimeError(f"Failed to process Circleback meeting: {e!s}") from e
diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py
index e14dc3f42..3fa57e998 100644
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@@ -1629,16 +1629,16 @@ async def process_file_in_background_with_document(
 ) -> Document | None:
     """
     Process file and update existing pending document (2-phase pattern).
-    
+
     This function is Phase 2 of the real-time document status updates:
     - Phase 1 (API): Created document with pending status
     - Phase 2 (this): Process file and update document to ready/failed
-    
+
     The document already exists with pending status. This function:
     1. Parses the file content (markdown, audio, or ETL services)
     2. Updates the document with content, embeddings, and chunks
     3. Sets status to 'ready' on success
-    
+
     Args:
         document: Existing document with pending status
         file_path: Path to the uploaded file
@@ -1650,7 +1650,7 @@ async def process_file_in_background_with_document(
         log_entry: Log entry for this task
         connector: Optional connector info for Google Drive files
         notification: Optional notification for progress updates
-    
+
     Returns:
         Updated Document object if successful, None if duplicate content detected
     """
@@ -1665,13 +1665,18 @@ async def process_file_in_background_with_document(
         etl_service = None
 
         # ===== STEP 1: Parse file content based on type =====
-        
+
         # Check if the file is a markdown or text file
         if filename.lower().endswith((".md", ".markdown", ".txt")):
             # Update notification: parsing stage
             if notification:
-                await NotificationService.document_processing.notify_processing_progress(
-                    session, notification, stage="parsing", stage_message="Reading file"
+                await (
+                    NotificationService.document_processing.notify_processing_progress(
+                        session,
+                        notification,
+                        stage="parsing",
+                        stage_message="Reading file",
+                    )
                 )
 
             await task_logger.log_task_progress(
@@ -1695,8 +1700,13 @@ async def process_file_in_background_with_document(
         ):
             # Update notification: parsing stage (transcription)
             if notification:
-                await NotificationService.document_processing.notify_processing_progress(
-                    session, notification, stage="parsing", stage_message="Transcribing audio"
+                await (
+                    NotificationService.document_processing.notify_processing_progress(
+                        session,
+                        notification,
+                        stage="parsing",
+                        stage_message="Transcribing audio",
+                    )
                 )
 
             await task_logger.log_task_progress(
@@ -1708,7 +1718,8 @@ async def process_file_in_background_with_document(
             # Transcribe audio
             stt_service_type = (
                 "local"
-                if app_config.STT_SERVICE and app_config.STT_SERVICE.startswith("local/")
+                if app_config.STT_SERVICE
+                and app_config.STT_SERVICE.startswith("local/")
                 else "external"
             )
 
@@ -1719,7 +1730,9 @@ async def process_file_in_background_with_document(
                 transcribed_text = result.get("text", "")
                 if not transcribed_text:
                     raise ValueError("Transcription returned empty text")
-                markdown_content = f"# Transcription of {filename}\n\n{transcribed_text}"
+                markdown_content = (
+                    f"# Transcription of {filename}\n\n{transcribed_text}"
+                )
             else:
                 with open(file_path, "rb") as audio_file:
                     transcription_kwargs = {
@@ -1728,12 +1741,18 @@ async def process_file_in_background_with_document(
                         "api_key": app_config.STT_SERVICE_API_KEY,
                     }
                     if app_config.STT_SERVICE_API_BASE:
-                        transcription_kwargs["api_base"] = app_config.STT_SERVICE_API_BASE
-                    transcription_response = await atranscription(**transcription_kwargs)
+                        transcription_kwargs["api_base"] = (
+                            app_config.STT_SERVICE_API_BASE
+                        )
+                    transcription_response = await atranscription(
+                        **transcription_kwargs
+                    )
                     transcribed_text = transcription_response.get("text", "")
                     if not transcribed_text:
                         raise ValueError("Transcription returned empty text")
-                markdown_content = f"# Transcription of {filename}\n\n{transcribed_text}"
+                markdown_content = (
+                    f"# Transcription of {filename}\n\n{transcribed_text}"
+                )
 
             etl_service = "AUDIO_TRANSCRIPTION"
             # Clean up temp file
@@ -1742,13 +1761,18 @@ async def process_file_in_background_with_document(
 
         else:
             # Document files - use ETL service
-            from app.services.page_limit_service import PageLimitExceededError, PageLimitService
+            from app.services.page_limit_service import (
+                PageLimitExceededError,
+                PageLimitService,
+            )
 
             page_limit_service = PageLimitService(session)
 
             # Estimate page count
             try:
-                estimated_pages = page_limit_service.estimate_pages_before_processing(file_path)
+                estimated_pages = page_limit_service.estimate_pages_before_processing(
+                    file_path
+                )
             except Exception:
                 file_size = os.path.getsize(file_path)
                 estimated_pages = max(1, file_size // (80 * 1024))
@@ -1759,14 +1783,22 @@ async def process_file_in_background_with_document(
             if app_config.ETL_SERVICE == "UNSTRUCTURED":
                 if notification:
                     await NotificationService.document_processing.notify_processing_progress(
-                        session, notification, stage="parsing", stage_message="Extracting content"
+                        session,
+                        notification,
+                        stage="parsing",
+                        stage_message="Extracting content",
                     )
 
                 from langchain_unstructured import UnstructuredLoader
 
                 loader = UnstructuredLoader(
-                    file_path, mode="elements", post_processors=[], languages=["eng"],
-                    include_orig_elements=False, include_metadata=False, strategy="auto"
+                    file_path,
+                    mode="elements",
+                    post_processors=[],
+                    languages=["eng"],
+                    include_orig_elements=False,
+                    include_metadata=False,
+                    strategy="auto",
                 )
                 docs = await loader.aload()
                 markdown_content = await convert_document_to_markdown(docs)
@@ -1775,37 +1807,55 @@ async def process_file_in_background_with_document(
                 etl_service = "UNSTRUCTURED"
 
                 # Update page usage
-                await page_limit_service.update_page_usage(user_id, final_page_count, allow_exceed=True)
+                await page_limit_service.update_page_usage(
+                    user_id, final_page_count, allow_exceed=True
+                )
 
             elif app_config.ETL_SERVICE == "LLAMACLOUD":
                 if notification:
                     await NotificationService.document_processing.notify_processing_progress(
-                        session, notification, stage="parsing", stage_message="Extracting content"
+                        session,
+                        notification,
+                        stage="parsing",
+                        stage_message="Extracting content",
                     )
 
                 result = await parse_with_llamacloud_retry(
-                    file_path=file_path, estimated_pages=estimated_pages,
-                    task_logger=task_logger, log_entry=log_entry
+                    file_path=file_path,
+                    estimated_pages=estimated_pages,
+                    task_logger=task_logger,
+                    log_entry=log_entry,
+                )
+                markdown_documents = await result.aget_markdown_documents(
+                    split_by_page=False
                 )
-                markdown_documents = await result.aget_markdown_documents(split_by_page=False)
                 if not markdown_documents:
-                    raise RuntimeError(f"LlamaCloud parsing returned no documents: {filename}")
+                    raise RuntimeError(
+                        f"LlamaCloud parsing returned no documents: {filename}"
+                    )
                 markdown_content = markdown_documents[0].text
                 etl_service = "LLAMACLOUD"
 
                 # Update page usage
-                await page_limit_service.update_page_usage(user_id, estimated_pages, allow_exceed=True)
+                await page_limit_service.update_page_usage(
+                    user_id, estimated_pages, allow_exceed=True
+                )
 
             elif app_config.ETL_SERVICE == "DOCLING":
                 if notification:
                     await NotificationService.document_processing.notify_processing_progress(
-                        session, notification, stage="parsing", stage_message="Extracting content"
+                        session,
+                        notification,
+                        stage="parsing",
+                        stage_message="Extracting content",
                     )
 
                 # Suppress logging during Docling import
                 getLogger("docling.pipeline.base_pipeline").setLevel(ERROR)
                 getLogger("docling.document_converter").setLevel(ERROR)
-                getLogger("docling_core.transforms.chunker.hierarchical_chunker").setLevel(ERROR)
+                getLogger(
+                    "docling_core.transforms.chunker.hierarchical_chunker"
+                ).setLevel(ERROR)
 
                 from docling.document_converter import DocumentConverter
 
@@ -1815,7 +1865,9 @@ async def process_file_in_background_with_document(
                 etl_service = "DOCLING"
 
                 # Update page usage
-                await page_limit_service.update_page_usage(user_id, estimated_pages, allow_exceed=True)
+                await page_limit_service.update_page_usage(
+                    user_id, estimated_pages, allow_exceed=True
+                )
 
             else:
                 raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}")
@@ -1829,7 +1881,7 @@ async def process_file_in_background_with_document(
 
         # ===== STEP 2: Check for duplicate content =====
         content_hash = generate_content_hash(markdown_content, search_space_id)
-        
+
         existing_by_content = await check_duplicate_document(session, content_hash)
         if existing_by_content and existing_by_content.id != document.id:
             # Duplicate content found - mark this document as failed
@@ -1846,7 +1898,7 @@ async def process_file_in_background_with_document(
             )
 
         user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
-        
+
         if user_llm:
             document_metadata = {
                 "file_name": filename,
@@ -1881,10 +1933,10 @@ async def process_file_in_background_with_document(
             **(document.document_metadata or {}),
         }
         flag_modified(document, "document_metadata")
-        
+
         # Use safe_set_chunks to avoid async issues
         safe_set_chunks(document, chunks)
-        
+
         document.blocknote_document = blocknote_json
         document.content_needs_reindexing = False
         document.updated_at = get_current_timestamp()
@@ -1922,7 +1974,11 @@ async def process_file_in_background_with_document(
             log_entry,
             error_message,
             str(e),
-            {"error_type": type(e).__name__, "filename": filename, "document_id": document.id},
+            {
+                "error_type": type(e).__name__,
+                "filename": filename,
+                "document_id": document.id,
+            },
         )
         logging.error(f"Error processing file with document: {error_message}")
         raise
diff --git a/surfsense_backend/app/tasks/document_processors/youtube_processor.py b/surfsense_backend/app/tasks/document_processors/youtube_processor.py
index 19092b592..e83d7c855 100644
--- a/surfsense_backend/app/tasks/document_processors/youtube_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/youtube_processor.py
@@ -136,11 +136,19 @@ async def add_youtube_video_document(
             document = existing_document
             is_new_document = False
             # Check if already being processed
-            if DocumentStatus.is_state(existing_document.status, DocumentStatus.PENDING):
-                logging.info(f"YouTube video {video_id} already pending. Returning existing.")
+            if DocumentStatus.is_state(
+                existing_document.status, DocumentStatus.PENDING
+            ):
+                logging.info(
+                    f"YouTube video {video_id} already pending. Returning existing."
+                )
                 return existing_document
-            if DocumentStatus.is_state(existing_document.status, DocumentStatus.PROCESSING):
-                logging.info(f"YouTube video {video_id} already processing. Returning existing.")
+            if DocumentStatus.is_state(
+                existing_document.status, DocumentStatus.PROCESSING
+            ):
+                logging.info(
+                    f"YouTube video {video_id} already processing. Returning existing."
+                )
                 return existing_document
         else:
             # Create new document with PENDING status (visible in UI immediately)
@@ -300,7 +308,9 @@ async def add_youtube_video_document(
                     "video_id": video_id,
                 },
             )
-            logging.info(f"Document for YouTube video {video_id} unchanged. Marking as ready.")
+            logging.info(
+                f"Document for YouTube video {video_id} unchanged. Marking as ready."
+            )
             document.status = DocumentStatus.ready()
             await session.commit()
             return document
@@ -408,7 +418,9 @@ async def add_youtube_video_document(
         # Mark document as failed if it exists
         if document:
             try:
-                document.status = DocumentStatus.failed(f"Database error: {str(db_error)[:150]}")
+                document.status = DocumentStatus.failed(
+                    f"Database error: {str(db_error)[:150]}"
+                )
                 document.updated_at = get_current_timestamp()
                 await session.commit()
             except Exception:
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
index 2bba85085..b214c96be 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
@@ -38,7 +38,9 @@ export function DocumentTypeChip({ type, className }: { type: string; className?
 			className={`inline-flex items-center gap-1.5 rounded bg-muted/40 px-2 py-1 text-xs text-muted-foreground max-w-full overflow-hidden ${className ?? ""}`}
 		>
 			<span className="opacity-80 flex-shrink-0">{icon}</span>
-			<span ref={textRef} className="truncate min-w-0">{fullLabel}</span>
+			<span ref={textRef} className="truncate min-w-0">
+				{fullLabel}
+			</span>
 		</span>
 	);
 
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
index 028f38098..6bd5f8460 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
@@ -68,9 +68,7 @@ export function DocumentsFilters({
 	const filteredTypes = useMemo(() => {
 		if (!typeSearchQuery.trim()) return uniqueTypes;
 		const query = typeSearchQuery.toLowerCase();
-		return uniqueTypes.filter((type) =>
-			getDocumentTypeLabel(type).toLowerCase().includes(query)
-		);
+		return uniqueTypes.filter((type) => getDocumentTypeLabel(type).toLowerCase().includes(query));
 	}, [uniqueTypes, typeSearchQuery]);
 
 	const typeCounts = useMemo(() => {
@@ -156,94 +154,95 @@ export function DocumentsFilters({
 
 				{/* Filter Buttons Group */}
 				<div className="flex items-center gap-2 flex-wrap">
-				{/* Type Filter */}
-				<Popover>
-					<PopoverTrigger asChild>
-						<Button
-							variant="outline"
-							size="sm"
-							className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border"
-						>
-							<FileType size={14} className="text-muted-foreground" />
-							<span className="hidden sm:inline">Type</span>
-							{activeTypes.length > 0 && (
-								<span className="flex h-5 w-5 items-center justify-center rounded-full bg-primary text-[10px] font-medium text-primary-foreground">
-									{activeTypes.length}
-								</span>
-							)}
-						</Button>
-					</PopoverTrigger>
-					<PopoverContent className="w-64 !p-0 overflow-hidden" align="end">
-						<div>
-							{/* Search input */}
-							<div className="p-2 border-b border-border/50">
-								<div className="relative">
-									<Search className="absolute left-0.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" />
-									<Input
-										placeholder="Search types..."
-										value={typeSearchQuery}
-										onChange={(e) => setTypeSearchQuery(e.target.value)}
-										className="h-6 pl-6 text-sm bg-transparent border-0 focus-visible:ring-0"
-									/>
-								</div>
-							</div>
-
-							<div className="max-h-[300px] overflow-y-auto overflow-x-hidden py-1.5 px-1.5">
-								{filteredTypes.length === 0 ? (
-									<div className="py-6 text-center text-sm text-muted-foreground">
-										No types found
+					{/* Type Filter */}
+					<Popover>
+						<PopoverTrigger asChild>
+							<Button
+								variant="outline"
+								size="sm"
+								className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border"
+							>
+								<FileType size={14} className="text-muted-foreground" />
+								<span className="hidden sm:inline">Type</span>
+								{activeTypes.length > 0 && (
+									<span className="flex h-5 w-5 items-center justify-center rounded-full bg-primary text-[10px] font-medium text-primary-foreground">
+										{activeTypes.length}
+									</span>
+								)}
+							</Button>
+						</PopoverTrigger>
+						<PopoverContent className="w-64 !p-0 overflow-hidden" align="end">
+							<div>
+								{/* Search input */}
+								<div className="p-2 border-b border-border/50">
+									<div className="relative">
+										<Search className="absolute left-0.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" />
+										<Input
+											placeholder="Search types..."
+											value={typeSearchQuery}
+											onChange={(e) => setTypeSearchQuery(e.target.value)}
+											className="h-6 pl-6 text-sm bg-transparent border-0 focus-visible:ring-0"
+										/>
 									</div>
-								) : (
-									filteredTypes.map((value: DocumentTypeEnum, i) => (
-										<button
-											key={value}
-											type="button"
-											className="flex w-full items-center gap-2.5 py-2 px-3 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
-											onClick={() => onToggleType(value, !activeTypes.includes(value))}
+								</div>
+
+								<div className="max-h-[300px] overflow-y-auto overflow-x-hidden py-1.5 px-1.5">
+									{filteredTypes.length === 0 ? (
+										<div className="py-6 text-center text-sm text-muted-foreground">
+											No types found
+										</div>
+									) : (
+										filteredTypes.map((value: DocumentTypeEnum, i) => (
+											<button
+												key={value}
+												type="button"
+												className="flex w-full items-center gap-2.5 py-2 px-3 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
+												onClick={() => onToggleType(value, !activeTypes.includes(value))}
+											>
+												{/* Icon */}
+												<div className="flex h-7 w-7 shrink-0 items-center justify-center rounded-md bg-muted/50 text-foreground/80">
+													{getDocumentTypeIcon(value, "h-4 w-4")}
+												</div>
+												{/* Text content */}
+												<div className="flex flex-col min-w-0 flex-1 gap-0.5">
+													<span className="text-[13px] font-medium text-foreground truncate leading-tight">
+														{getDocumentTypeLabel(value)}
+													</span>
+													<span className="text-[11px] text-muted-foreground leading-tight">
+														{typeCounts.get(value)} document
+														{(typeCounts.get(value) ?? 0) !== 1 ? "s" : ""}
+													</span>
+												</div>
+												{/* Checkbox */}
+												<Checkbox
+													id={`${id}-${i}`}
+													checked={activeTypes.includes(value)}
+													onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
+													className="h-4 w-4 shrink-0 rounded border-muted-foreground/30 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
+												/>
+											</button>
+										))
+									)}
+								</div>
+								{activeTypes.length > 0 && (
+									<div className="px-3 pt-1.5 pb-1.5 border-t border-border/50">
+										<Button
+											variant="ghost"
+											size="sm"
+											className="w-full h-7 text-[11px] text-muted-foreground hover:text-foreground"
+											onClick={() => {
+												activeTypes.forEach((t) => {
+													onToggleType(t, false);
+												});
+											}}
 										>
-											{/* Icon */}
-											<div className="flex h-7 w-7 shrink-0 items-center justify-center rounded-md bg-muted/50 text-foreground/80">
-												{getDocumentTypeIcon(value, "h-4 w-4")}
-											</div>
-											{/* Text content */}
-											<div className="flex flex-col min-w-0 flex-1 gap-0.5">
-												<span className="text-[13px] font-medium text-foreground truncate leading-tight">
-													{getDocumentTypeLabel(value)}
-												</span>
-												<span className="text-[11px] text-muted-foreground leading-tight">
-													{typeCounts.get(value)} document{(typeCounts.get(value) ?? 0) !== 1 ? "s" : ""}
-												</span>
-											</div>
-											{/* Checkbox */}
-											<Checkbox
-												id={`${id}-${i}`}
-												checked={activeTypes.includes(value)}
-												onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
-												className="h-4 w-4 shrink-0 rounded border-muted-foreground/30 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
-											/>
-										</button>
-									))
+											Clear filters
+										</Button>
+									</div>
 								)}
 							</div>
-							{activeTypes.length > 0 && (
-								<div className="px-3 pt-1.5 pb-1.5 border-t border-border/50">
-									<Button
-										variant="ghost"
-										size="sm"
-										className="w-full h-7 text-[11px] text-muted-foreground hover:text-foreground"
-										onClick={() => {
-											activeTypes.forEach((t) => {
-												onToggleType(t, false);
-											});
-										}}
-									>
-										Clear filters
-									</Button>
-								</div>
-							)}
-						</div>
-					</PopoverContent>
-				</Popover>
+						</PopoverContent>
+					</Popover>
 
 					{/* Bulk Delete Button */}
 					{selectedIds.size > 0 && (
@@ -255,22 +254,14 @@ export function DocumentsFilters({
 									exit={{ opacity: 0, scale: 0.9 }}
 								>
 									{/* Mobile: icon with count */}
-									<Button
-										variant="destructive"
-										size="sm"
-										className="h-9 gap-1.5 px-2.5 md:hidden"
-									>
+									<Button variant="destructive" size="sm" className="h-9 gap-1.5 px-2.5 md:hidden">
 										<Trash size={14} />
 										<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
 											{selectedIds.size}
 										</span>
 									</Button>
 									{/* Desktop: full button */}
-									<Button
-										variant="destructive"
-										size="sm"
-										className="h-9 gap-2 hidden md:flex"
-									>
+									<Button variant="destructive" size="sm" className="h-9 gap-2 hidden md:flex">
 										<Trash size={14} />
 										Delete
 										<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
@@ -288,9 +279,12 @@ export function DocumentsFilters({
 										<CircleAlert size={18} strokeWidth={2} />
 									</div>
 									<AlertDialogHeader className="flex-1">
-										<AlertDialogTitle>Delete {selectedIds.size} document{selectedIds.size !== 1 ? "s" : ""}?</AlertDialogTitle>
+										<AlertDialogTitle>
+											Delete {selectedIds.size} document{selectedIds.size !== 1 ? "s" : ""}?
+										</AlertDialogTitle>
 										<AlertDialogDescription>
-											This action cannot be undone. This will permanently delete the selected {selectedIds.size === 1 ? "document" : "documents"} from your search space.
+											This action cannot be undone. This will permanently delete the selected{" "}
+											{selectedIds.size === 1 ? "document" : "documents"} from your search space.
 										</AlertDialogDescription>
 									</AlertDialogHeader>
 								</div>
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index fb0d72fae..d5ee00dfb 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -1,7 +1,20 @@
 "use client";
 
 import { formatDistanceToNow } from "date-fns";
-import { AlertCircle, Calendar, CheckCircle2, ChevronDown, ChevronUp, Clock, FileText, FileX, Loader2, Network, Plus, User } from "lucide-react";
+import {
+	AlertCircle,
+	Calendar,
+	CheckCircle2,
+	ChevronDown,
+	ChevronUp,
+	Clock,
+	FileText,
+	FileX,
+	Loader2,
+	Network,
+	Plus,
+	User,
+} from "lucide-react";
 import { motion } from "motion/react";
 import { useTranslations } from "next-intl";
 import React, { useRef, useState, useEffect, useCallback } from "react";
@@ -10,12 +23,7 @@ import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
 import { MarkdownViewer } from "@/components/markdown-viewer";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import {
-	Dialog,
-	DialogContent,
-	DialogHeader,
-	DialogTitle,
-} from "@/components/ui/dialog";
+import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog";
 import { Skeleton } from "@/components/ui/skeleton";
 import { Spinner } from "@/components/ui/spinner";
 import {
@@ -35,7 +43,7 @@ import type { ColumnVisibility, Document, DocumentStatus } from "./types";
 // Status indicator component for document processing status
 function StatusIndicator({ status }: { status?: DocumentStatus }) {
 	const state = status?.state ?? "ready";
-	
+
 	switch (state) {
 		case "pending":
 			return (
@@ -176,12 +184,10 @@ function SortableHeader({
 		>
 			{icon && <span className="opacity-60">{icon}</span>}
 			{children}
-			<span className={`transition-opacity ${isActive ? "opacity-100" : "opacity-0 group-hover:opacity-50"}`}>
-				{isActive && sortDesc ? (
-					<ChevronDown size={14} />
-				) : (
-					<ChevronUp size={14} />
-				)}
+			<span
+				className={`transition-opacity ${isActive ? "opacity-100" : "opacity-0 group-hover:opacity-50"}`}
+			>
+				{isActive && sortDesc ? <ChevronDown size={14} /> : <ChevronUp size={14} />}
 			</span>
 		</button>
 	);
@@ -300,8 +306,10 @@ export function DocumentsTableShell({
 
 	// Only consider selectable documents for "select all" logic
 	const selectableDocs = sorted.filter(isSelectable);
-	const allSelectedOnPage = selectableDocs.length > 0 && selectableDocs.every((d) => selectedIds.has(d.id));
-	const someSelectedOnPage = selectableDocs.some((d) => selectedIds.has(d.id)) && !allSelectedOnPage;
+	const allSelectedOnPage =
+		selectableDocs.length > 0 && selectableDocs.every((d) => selectedIds.has(d.id));
+	const someSelectedOnPage =
+		selectableDocs.some((d) => selectedIds.has(d.id)) && !allSelectedOnPage;
 
 	const toggleAll = (checked: boolean) => {
 		const next = new Set(selectedIds);
@@ -388,10 +396,7 @@ export function DocumentsTableShell({
 												</div>
 											</TableCell>
 											<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/40">
-												<Skeleton
-													className="h-4"
-													style={{ width: `${widthPercent}%` }}
-												/>
+												<Skeleton className="h-4" style={{ width: `${widthPercent}%` }} />
 											</TableCell>
 											{columnVisibility.document_type && (
 												<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/40 overflow-hidden">
@@ -429,24 +434,15 @@ export function DocumentsTableShell({
 								<div className="flex items-start gap-3">
 									<Skeleton className="h-4 w-4 mt-0.5 rounded" />
 									<div className="flex-1 min-w-0 space-y-2">
-										<Skeleton
-											className="h-4"
-											style={{ width: `${widthPercent}%` }}
-										/>
+										<Skeleton className="h-4" style={{ width: `${widthPercent}%` }} />
 										<div className="flex flex-wrap items-center gap-2">
 											<Skeleton className="h-5 w-20 rounded" />
-											{columnVisibility.created_by && (
-												<Skeleton className="h-3 w-14" />
-											)}
-											{columnVisibility.created_at && (
-												<Skeleton className="h-3 w-20" />
-											)}
+											{columnVisibility.created_by && <Skeleton className="h-3 w-14" />}
+											{columnVisibility.created_at && <Skeleton className="h-3 w-20" />}
 										</div>
 									</div>
 									<div className="flex items-center gap-2">
-										{columnVisibility.status && (
-											<Skeleton className="h-5 w-5 rounded-full" />
-										)}
+										{columnVisibility.status && <Skeleton className="h-5 w-5 rounded-full" />}
 										<Skeleton className="h-7 w-7 rounded" />
 									</div>
 								</div>
@@ -549,9 +545,7 @@ export function DocumentsTableShell({
 									)}
 									{columnVisibility.status && (
 										<TableHead className="w-20 text-center">
-											<span className="text-sm font-medium text-muted-foreground/70">
-												Status
-											</span>
+											<span className="text-sm font-medium text-muted-foreground/70">Status</span>
 										</TableHead>
 									)}
 									<TableHead className="w-10">
@@ -580,9 +574,7 @@ export function DocumentsTableShell({
 													},
 												}}
 												className={`border-b border-border/40 transition-colors ${
-													isSelected
-														? "bg-primary/5 hover:bg-primary/8"
-														: "hover:bg-muted/30"
+													isSelected ? "bg-primary/5 hover:bg-primary/8" : "hover:bg-muted/30"
 												}`}
 											>
 												<TableCell className="w-8 px-0 py-2.5 text-center">
@@ -591,7 +583,9 @@ export function DocumentsTableShell({
 															checked={isSelected}
 															onCheckedChange={(v) => canSelect && toggleOne(doc.id, !!v)}
 															disabled={!canSelect}
-															aria-label={canSelect ? "Select row" : "Cannot select while processing"}
+															aria-label={
+																canSelect ? "Select row" : "Cannot select while processing"
+															}
 															className={`border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary ${!canSelect ? "opacity-40 cursor-not-allowed" : ""}`}
 														/>
 													</div>
@@ -639,7 +633,9 @@ export function DocumentsTableShell({
 													<TableCell className="w-32 py-2.5 text-sm text-foreground border-r border-border/40">
 														<Tooltip>
 															<TooltipTrigger asChild>
-																<span className="cursor-default">{formatRelativeDate(doc.created_at)}</span>
+																<span className="cursor-default">
+																	{formatRelativeDate(doc.created_at)}
+																</span>
 															</TooltipTrigger>
 															<TooltipContent side="top">
 																{formatAbsoluteDate(doc.created_at)}
@@ -720,9 +716,7 @@ export function DocumentsTableShell({
 											<div className="flex flex-wrap items-center gap-2">
 												<DocumentTypeChip type={doc.document_type} />
 												{columnVisibility.created_by && doc.created_by_name && (
-													<span className="text-xs text-foreground">
-														{doc.created_by_name}
-													</span>
+													<span className="text-xs text-foreground">{doc.created_by_name}</span>
 												)}
 												{columnVisibility.created_at && (
 													<Tooltip>
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
index 4f23693ad..ec355f576 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/RowActions.tsx
@@ -46,7 +46,8 @@ export function RowActions({
 	);
 
 	// Documents in "pending" or "processing" state should show disabled delete
-	const isBeingProcessed = document.status?.state === "pending" || document.status?.state === "processing";
+	const isBeingProcessed =
+		document.status?.state === "pending" || document.status?.state === "processing";
 
 	// SURFSENSE_DOCS are system-managed and should not show delete at all
 	const shouldShowDelete = !NON_DELETABLE_DOCUMENT_TYPES.includes(
@@ -67,8 +68,9 @@ export function RowActions({
 		} catch (error: unknown) {
 			console.error("Error deleting document:", error);
 			// Check for 409 Conflict (document started processing after UI loaded)
-			const status = (error as { response?: { status?: number } })?.response?.status 
-				?? (error as { status?: number })?.status;
+			const status =
+				(error as { response?: { status?: number } })?.response?.status ??
+				(error as { status?: number })?.status;
 			if (status === 409) {
 				toast.error("Document is now being processed. Please try again later.");
 			} else {
@@ -92,7 +94,11 @@ export function RowActions({
 					// Editable documents: show 3-dot dropdown with edit + delete
 					<DropdownMenu>
 						<DropdownMenuTrigger asChild>
-							<Button variant="ghost" size="icon" className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80">
+							<Button
+								variant="ghost"
+								size="icon"
+								className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80"
+							>
 								<MoreHorizontal className="h-4 w-4" />
 								<span className="sr-only">Open menu</span>
 							</Button>
@@ -101,7 +107,9 @@ export function RowActions({
 							<DropdownMenuItem
 								onClick={() => !isEditDisabled && handleEdit()}
 								disabled={isEditDisabled}
-								className={isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""}
+								className={
+									isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
+								}
 							>
 								<Pencil className="mr-2 h-4 w-4" />
 								<span>Edit</span>
@@ -110,7 +118,11 @@ export function RowActions({
 								<DropdownMenuItem
 									onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
 									disabled={isDeleteDisabled}
-									className={isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : "text-destructive focus:text-destructive"}
+									className={
+										isDeleteDisabled
+											? "text-muted-foreground cursor-not-allowed opacity-50"
+											: "text-destructive focus:text-destructive"
+									}
 								>
 									<Trash2 className="mr-2 h-4 w-4" />
 									<span>Delete</span>
@@ -150,7 +162,9 @@ export function RowActions({
 							<DropdownMenuItem
 								onClick={() => !isEditDisabled && handleEdit()}
 								disabled={isEditDisabled}
-								className={isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""}
+								className={
+									isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
+								}
 							>
 								<Pencil className="mr-2 h-4 w-4" />
 								<span>Edit</span>
@@ -159,7 +173,11 @@ export function RowActions({
 								<DropdownMenuItem
 									onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
 									disabled={isDeleteDisabled}
-									className={isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : "text-destructive focus:text-destructive"}
+									className={
+										isDeleteDisabled
+											? "text-muted-foreground cursor-not-allowed opacity-50"
+											: "text-destructive focus:text-destructive"
+									}
 								>
 									<Trash2 className="mr-2 h-4 w-4" />
 									<span>Delete</span>
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
index b85b334d7..8cf2fe8da 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@@ -116,13 +116,15 @@ export default function DocumentsTable() {
 				created_by_id: item.created_by_id ?? null,
 				created_by_name: item.created_by_name ?? null,
 				created_at: item.created_at,
-				status: (item as { status?: { state: "ready" | "pending" | "processing" | "failed"; reason?: string } }).status ?? { state: "ready" as const },
+				status: (
+					item as {
+						status?: { state: "ready" | "pending" | "processing" | "failed"; reason?: string };
+					}
+				).status ?? { state: "ready" as const },
 			}))
 		: paginatedRealtimeDocuments;
 
-	const displayTotal = isSearchMode
-		? searchResponse?.total || 0
-		: sortedRealtimeDocuments.length;
+	const displayTotal = isSearchMode ? searchResponse?.total || 0 : sortedRealtimeDocuments.length;
 
 	const loading = isSearchMode ? isSearchLoading : realtimeLoading;
 	const error = isSearchMode ? searchError : realtimeError;
@@ -149,13 +151,13 @@ export default function DocumentsTable() {
 		// Filter out pending/processing documents - they cannot be deleted
 		// For real-time mode, use sortedRealtimeDocuments (which has status)
 		// For search mode, use searchResponse items (need to safely access status)
-		const allDocs = isSearchMode 
-			? (searchResponse?.items || []).map(item => ({
-				id: item.id,
-				status: (item as { status?: { state: string } }).status,
-			}))
-			: sortedRealtimeDocuments.map(doc => ({ id: doc.id, status: doc.status }));
-		
+		const allDocs = isSearchMode
+			? (searchResponse?.items || []).map((item) => ({
+					id: item.id,
+					status: (item as { status?: { state: string } }).status,
+				}))
+			: sortedRealtimeDocuments.map((doc) => ({ id: doc.id, status: doc.status }));
+
 		const selectedDocs = allDocs.filter((doc) => selectedIds.has(doc.id));
 		const deletableIds = selectedDocs
 			.filter((doc) => doc.status?.state !== "pending" && doc.status?.state !== "processing")
@@ -163,7 +165,9 @@ export default function DocumentsTable() {
 		const inProgressCount = selectedIds.size - deletableIds.length;
 
 		if (inProgressCount > 0) {
-			toast.warning(`${inProgressCount} document(s) are pending or processing and cannot be deleted.`);
+			toast.warning(
+				`${inProgressCount} document(s) are pending or processing and cannot be deleted.`
+			);
 		}
 
 		if (deletableIds.length === 0) {
@@ -180,8 +184,9 @@ export default function DocumentsTable() {
 						await deleteDocumentMutation({ id });
 						return true;
 					} catch (error: unknown) {
-						const status = (error as { response?: { status?: number } })?.response?.status 
-							?? (error as { status?: number })?.status;
+						const status =
+							(error as { response?: { status?: number } })?.response?.status ??
+							(error as { status?: number })?.status;
 						if (status === 409) conflictCount++;
 						return false;
 					}
@@ -195,13 +200,13 @@ export default function DocumentsTable() {
 			} else {
 				toast.error(t("delete_partial_failed"));
 			}
-			
+
 			// If in search mode, refetch search results to reflect deletion
 			if (isSearchMode) {
 				await refetchSearch();
 			}
 			// Real-time mode: Electric will sync the deletion automatically
-			
+
 			setSelectedIds(new Set());
 		} catch (e) {
 			console.error(e);
@@ -210,21 +215,24 @@ export default function DocumentsTable() {
 	};
 
 	// Single document delete handler for RowActions
-	const handleDeleteDocument = useCallback(async (id: number): Promise<boolean> => {
-		try {
-			await deleteDocumentMutation({ id });
-			toast.success(t("delete_success") || "Document deleted");
-			// If in search mode, refetch search results to reflect deletion
-			if (isSearchMode) {
-				await refetchSearch();
+	const handleDeleteDocument = useCallback(
+		async (id: number): Promise<boolean> => {
+			try {
+				await deleteDocumentMutation({ id });
+				toast.success(t("delete_success") || "Document deleted");
+				// If in search mode, refetch search results to reflect deletion
+				if (isSearchMode) {
+					await refetchSearch();
+				}
+				// Real-time mode: Electric will sync the deletion automatically
+				return true;
+			} catch (e) {
+				console.error("Error deleting document:", e);
+				return false;
 			}
-			// Real-time mode: Electric will sync the deletion automatically
-			return true;
-		} catch (e) {
-			console.error("Error deleting document:", e);
-			return false;
-		}
-	}, [deleteDocumentMutation, isSearchMode, refetchSearch, t]);
+		},
+		[deleteDocumentMutation, isSearchMode, refetchSearch, t]
+	);
 
 	const handleSortChange = useCallback((key: SortKey) => {
 		setSortKey((currentKey) => {
diff --git a/surfsense_web/atoms/connector-dialog/connector-dialog.atoms.ts b/surfsense_web/atoms/connector-dialog/connector-dialog.atoms.ts
index 38205a8d2..cbdf17244 100644
--- a/surfsense_web/atoms/connector-dialog/connector-dialog.atoms.ts
+++ b/surfsense_web/atoms/connector-dialog/connector-dialog.atoms.ts
@@ -2,4 +2,3 @@ import { atom } from "jotai";
 
 // Atom to control the connector dialog open state from anywhere in the app
 export const connectorDialogOpenAtom = atom(false);
-
diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx
index ec8399198..e597770ee 100644
--- a/surfsense_web/components/assistant-ui/connector-popup.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup.tsx
@@ -191,7 +191,9 @@ export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger
 			{!hideTrigger && (
 				<TooltipIconButton
 					data-joyride="connector-icon"
-					tooltip={hasConnectors ? `Manage ${activeConnectorsCount} connectors` : "Connect your data"}
+					tooltip={
+						hasConnectors ? `Manage ${activeConnectorsCount} connectors` : "Connect your data"
+					}
 					side="bottom"
 					className={cn(
 						"size-[34px] rounded-full p-1 flex items-center justify-center transition-colors relative",
diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
index 0ab333457..69c1b797d 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts
@@ -346,13 +346,13 @@ export const useConnectorDialog = () => {
 						const connectorId = parseInt(params.connectorId, 10);
 						newConnector = result.data.find((c: SearchSourceConnector) => c.id === connectorId);
 
-					// If we found the connector, find the matching OAuth/Composio connector by type
-					if (newConnector) {
-						const connectorType = newConnector.connector_type;
-						oauthConnector =
-							OAUTH_CONNECTORS.find((c) => c.connectorType === connectorType) ||
-							COMPOSIO_CONNECTORS.find((c) => c.connectorType === connectorType);
-					}
+						// If we found the connector, find the matching OAuth/Composio connector by type
+						if (newConnector) {
+							const connectorType = newConnector.connector_type;
+							oauthConnector =
+								OAUTH_CONNECTORS.find((c) => c.connectorType === connectorType) ||
+								COMPOSIO_CONNECTORS.find((c) => c.connectorType === connectorType);
+						}
 					}
 
 					// If we don't have a connector yet, try to find by connector param
@@ -361,12 +361,12 @@ export const useConnectorDialog = () => {
 							OAUTH_CONNECTORS.find((c) => c.id === params.connector) ||
 							COMPOSIO_CONNECTORS.find((c) => c.id === params.connector);
 
-					if (oauthConnector) {
-						const oauthConnectorType = oauthConnector.connectorType;
-						newConnector = result.data.find(
-							(c: SearchSourceConnector) => c.connector_type === oauthConnectorType
-						);
-					}
+						if (oauthConnector) {
+							const oauthConnectorType = oauthConnector.connectorType;
+							newConnector = result.data.find(
+								(c: SearchSourceConnector) => c.connector_type === oauthConnectorType
+							);
+						}
 					}
 
 					if (newConnector && oauthConnector) {
@@ -679,11 +679,11 @@ export const useConnectorDialog = () => {
 									},
 								});
 
-							const successMessage =
-								currentConnectorType === "MCP_CONNECTOR"
-									? `${connector.name} added successfully`
-									: `${connectorTitle} connected and syncing started!`;
-							toast.success(successMessage);
+								const successMessage =
+									currentConnectorType === "MCP_CONNECTOR"
+										? `${connector.name} added successfully`
+										: `${connectorTitle} connected and syncing started!`;
+								toast.success(successMessage);
 
 								const url = new URL(window.location.href);
 								url.searchParams.delete("modal");
diff --git a/surfsense_web/components/theme/theme-toggle.tsx b/surfsense_web/components/theme/theme-toggle.tsx
index 382d11087..b9b23656b 100644
--- a/surfsense_web/components/theme/theme-toggle.tsx
+++ b/surfsense_web/components/theme/theme-toggle.tsx
@@ -8,172 +8,167 @@ import { cn } from "@/lib/utils";
 
 // ///////////////////////////////////////////////////////////////////////////
 // Types
-export type AnimationVariant =
-  | "circle"
-  | "rectangle"
-  | "gif"
-  | "polygon"
-  | "circle-blur";
+export type AnimationVariant = "circle" | "rectangle" | "gif" | "polygon" | "circle-blur";
 export type AnimationStart =
-  | "top-left"
-  | "top-right"
-  | "bottom-left"
-  | "bottom-right"
-  | "center"
-  | "top-center"
-  | "bottom-center"
-  | "bottom-up"
-  | "top-down"
-  | "left-right"
-  | "right-left";
+	| "top-left"
+	| "top-right"
+	| "bottom-left"
+	| "bottom-right"
+	| "center"
+	| "top-center"
+	| "bottom-center"
+	| "bottom-up"
+	| "top-down"
+	| "left-right"
+	| "right-left";
 
 interface Animation {
-  name: string;
-  css: string;
+	name: string;
+	css: string;
 }
 
 // ///////////////////////////////////////////////////////////////////////////
 // Helper functions
 
 const getPositionCoords = (position: AnimationStart) => {
-  switch (position) {
-    case "top-left":
-      return { cx: "0", cy: "0" };
-    case "top-right":
-      return { cx: "40", cy: "0" };
-    case "bottom-left":
-      return { cx: "0", cy: "40" };
-    case "bottom-right":
-      return { cx: "40", cy: "40" };
-    case "top-center":
-      return { cx: "20", cy: "0" };
-    case "bottom-center":
-      return { cx: "20", cy: "40" };
-    case "bottom-up":
-    case "top-down":
-    case "left-right":
-    case "right-left":
-      return { cx: "20", cy: "20" };
-  }
+	switch (position) {
+		case "top-left":
+			return { cx: "0", cy: "0" };
+		case "top-right":
+			return { cx: "40", cy: "0" };
+		case "bottom-left":
+			return { cx: "0", cy: "40" };
+		case "bottom-right":
+			return { cx: "40", cy: "40" };
+		case "top-center":
+			return { cx: "20", cy: "0" };
+		case "bottom-center":
+			return { cx: "20", cy: "40" };
+		case "bottom-up":
+		case "top-down":
+		case "left-right":
+		case "right-left":
+			return { cx: "20", cy: "20" };
+	}
 };
 
 const generateSVG = (variant: AnimationVariant, start: AnimationStart) => {
-  if (variant === "circle-blur") {
-    if (start === "center") {
-      return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="20" cy="20" r="18" fill="white" filter="url(%23blur)"/></svg>`;
-    }
-    const positionCoords = getPositionCoords(start);
-    if (!positionCoords) {
-      throw new Error(`Invalid start position: ${start}`);
-    }
-    const { cx, cy } = positionCoords;
-    return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="${cx}" cy="${cy}" r="18" fill="white" filter="url(%23blur)"/></svg>`;
-  }
+	if (variant === "circle-blur") {
+		if (start === "center") {
+			return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="20" cy="20" r="18" fill="white" filter="url(%23blur)"/></svg>`;
+		}
+		const positionCoords = getPositionCoords(start);
+		if (!positionCoords) {
+			throw new Error(`Invalid start position: ${start}`);
+		}
+		const { cx, cy } = positionCoords;
+		return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="${cx}" cy="${cy}" r="18" fill="white" filter="url(%23blur)"/></svg>`;
+	}
 
-  if (start === "center") return;
+	if (start === "center") return;
 
-  if (variant === "rectangle") return "";
+	if (variant === "rectangle") return "";
 
-  const positionCoords = getPositionCoords(start);
-  if (!positionCoords) {
-    throw new Error(`Invalid start position: ${start}`);
-  }
-  const { cx, cy } = positionCoords;
+	const positionCoords = getPositionCoords(start);
+	if (!positionCoords) {
+		throw new Error(`Invalid start position: ${start}`);
+	}
+	const { cx, cy } = positionCoords;
 
-  if (variant === "circle") {
-    return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><circle cx="${cx}" cy="${cy}" r="20" fill="white"/></svg>`;
-  }
+	if (variant === "circle") {
+		return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><circle cx="${cx}" cy="${cy}" r="20" fill="white"/></svg>`;
+	}
 
-  return "";
+	return "";
 };
 
 const getTransformOrigin = (start: AnimationStart) => {
-  switch (start) {
-    case "top-left":
-      return "top left";
-    case "top-right":
-      return "top right";
-    case "bottom-left":
-      return "bottom left";
-    case "bottom-right":
-      return "bottom right";
-    case "top-center":
-      return "top center";
-    case "bottom-center":
-      return "bottom center";
-    case "bottom-up":
-    case "top-down":
-    case "left-right":
-    case "right-left":
-      return "center";
-  }
+	switch (start) {
+		case "top-left":
+			return "top left";
+		case "top-right":
+			return "top right";
+		case "bottom-left":
+			return "bottom left";
+		case "bottom-right":
+			return "bottom right";
+		case "top-center":
+			return "top center";
+		case "bottom-center":
+			return "bottom center";
+		case "bottom-up":
+		case "top-down":
+		case "left-right":
+		case "right-left":
+			return "center";
+	}
 };
 
 export const createAnimation = (
-  variant: AnimationVariant,
-  start: AnimationStart = "center",
-  blur = false,
-  url?: string,
+	variant: AnimationVariant,
+	start: AnimationStart = "center",
+	blur = false,
+	url?: string
 ): Animation => {
-  const svg = generateSVG(variant, start);
-  const transformOrigin = getTransformOrigin(start);
+	const svg = generateSVG(variant, start);
+	const transformOrigin = getTransformOrigin(start);
 
-  if (variant === "rectangle") {
-    const getClipPath = (direction: AnimationStart) => {
-      switch (direction) {
-        case "bottom-up":
-          return {
-            from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)",
-            to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
-          };
-        case "top-down":
-          return {
-            from: "polygon(0% 0%, 100% 0%, 100% 0%, 0% 0%)",
-            to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
-          };
-        case "left-right":
-          return {
-            from: "polygon(0% 0%, 0% 0%, 0% 100%, 0% 100%)",
-            to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
-          };
-        case "right-left":
-          return {
-            from: "polygon(100% 0%, 100% 0%, 100% 100%, 100% 100%)",
-            to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
-          };
-        case "top-left":
-          return {
-            from: "polygon(0% 0%, 0% 0%, 0% 0%, 0% 0%)",
-            to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
-          };
-        case "top-right":
-          return {
-            from: "polygon(100% 0%, 100% 0%, 100% 0%, 100% 0%)",
-            to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
-          };
-        case "bottom-left":
-          return {
-            from: "polygon(0% 100%, 0% 100%, 0% 100%, 0% 100%)",
-            to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
-          };
-        case "bottom-right":
-          return {
-            from: "polygon(100% 100%, 100% 100%, 100% 100%, 100% 100%)",
-            to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
-          };
-        default:
-          return {
-            from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)",
-            to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
-          };
-      }
-    };
+	if (variant === "rectangle") {
+		const getClipPath = (direction: AnimationStart) => {
+			switch (direction) {
+				case "bottom-up":
+					return {
+						from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)",
+						to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
+					};
+				case "top-down":
+					return {
+						from: "polygon(0% 0%, 100% 0%, 100% 0%, 0% 0%)",
+						to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
+					};
+				case "left-right":
+					return {
+						from: "polygon(0% 0%, 0% 0%, 0% 100%, 0% 100%)",
+						to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
+					};
+				case "right-left":
+					return {
+						from: "polygon(100% 0%, 100% 0%, 100% 100%, 100% 100%)",
+						to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
+					};
+				case "top-left":
+					return {
+						from: "polygon(0% 0%, 0% 0%, 0% 0%, 0% 0%)",
+						to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
+					};
+				case "top-right":
+					return {
+						from: "polygon(100% 0%, 100% 0%, 100% 0%, 100% 0%)",
+						to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
+					};
+				case "bottom-left":
+					return {
+						from: "polygon(0% 100%, 0% 100%, 0% 100%, 0% 100%)",
+						to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
+					};
+				case "bottom-right":
+					return {
+						from: "polygon(100% 100%, 100% 100%, 100% 100%, 100% 100%)",
+						to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
+					};
+				default:
+					return {
+						from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)",
+						to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
+					};
+			}
+		};
 
-    const clipPath = getClipPath(start);
+		const clipPath = getClipPath(start);
 
-    return {
-      name: `${variant}-${start}${blur ? "-blur" : ""}`,
-      css: `
+		return {
+			name: `${variant}-${start}${blur ? "-blur" : ""}`,
+			css: `
        ::view-transition-group(root) {
         animation-duration: 0.7s;
         animation-timing-function: var(--expo-out);
@@ -218,12 +213,12 @@ export const createAnimation = (
         }
       }
       `,
-    };
-  }
-  if (variant === "circle" && start == "center") {
-    return {
-      name: `${variant}-${start}${blur ? "-blur" : ""}`,
-      css: `
+		};
+	}
+	if (variant === "circle" && start == "center") {
+		return {
+			name: `${variant}-${start}${blur ? "-blur" : ""}`,
+			css: `
        ::view-transition-group(root) {
         animation-duration: 0.7s;
         animation-timing-function: var(--expo-out);
@@ -268,12 +263,12 @@ export const createAnimation = (
         }
       }
       `,
-    };
-  }
-  if (variant === "gif") {
-    return {
-      name: `${variant}-${start}`,
-      css: `
+		};
+	}
+	if (variant === "gif") {
+		return {
+			name: `${variant}-${start}`,
+			css: `
       ::view-transition-group(root) {
   animation-timing-function: var(--expo-in);
 }
@@ -302,14 +297,14 @@ export const createAnimation = (
     mask-size: 2000vmax;
   }
 }`,
-    };
-  }
+		};
+	}
 
-  if (variant === "circle-blur") {
-    if (start === "center") {
-      return {
-        name: `${variant}-${start}`,
-        css: `
+	if (variant === "circle-blur") {
+		if (start === "center") {
+			return {
+				name: `${variant}-${start}`,
+				css: `
         ::view-transition-group(root) {
           animation-timing-function: var(--expo-out);
         }
@@ -334,12 +329,12 @@ export const createAnimation = (
           }
         }
         `,
-      };
-    }
+			};
+		}
 
-    return {
-      name: `${variant}-${start}`,
-      css: `
+		return {
+			name: `${variant}-${start}`,
+			css: `
       ::view-transition-group(root) {
         animation-timing-function: var(--expo-out);
       }
@@ -364,41 +359,41 @@ export const createAnimation = (
         }
       }
       `,
-    };
-  }
+		};
+	}
 
-  if (variant === "polygon") {
-    const getPolygonClipPaths = (position: AnimationStart) => {
-      switch (position) {
-        case "top-left":
-          return {
-            darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)",
-            darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)",
-            lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)",
-            lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)",
-          };
-        case "top-right":
-          return {
-            darkFrom: "polygon(150% -71%, 250% 71%, 250% 71%, 150% -71%)",
-            darkTo: "polygon(150% -71%, 250% 71%, 50% 171%, -71% 50%)",
-            lightFrom: "polygon(-71% 50%, 50% 171%, 50% 171%, -71% 50%)",
-            lightTo: "polygon(-71% 50%, 50% 171%, 250% 71%, 150% -71%)",
-          };
-        default:
-          return {
-            darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)",
-            darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)",
-            lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)",
-            lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)",
-          };
-      }
-    };
+	if (variant === "polygon") {
+		const getPolygonClipPaths = (position: AnimationStart) => {
+			switch (position) {
+				case "top-left":
+					return {
+						darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)",
+						darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)",
+						lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)",
+						lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)",
+					};
+				case "top-right":
+					return {
+						darkFrom: "polygon(150% -71%, 250% 71%, 250% 71%, 150% -71%)",
+						darkTo: "polygon(150% -71%, 250% 71%, 50% 171%, -71% 50%)",
+						lightFrom: "polygon(-71% 50%, 50% 171%, 50% 171%, -71% 50%)",
+						lightTo: "polygon(-71% 50%, 50% 171%, 250% 71%, 150% -71%)",
+					};
+				default:
+					return {
+						darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)",
+						darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)",
+						lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)",
+						lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)",
+					};
+			}
+		};
 
-    const clipPaths = getPolygonClipPaths(start);
+		const clipPaths = getPolygonClipPaths(start);
 
-    return {
-      name: `${variant}-${start}${blur ? "-blur" : ""}`,
-      css: `
+		return {
+			name: `${variant}-${start}${blur ? "-blur" : ""}`,
+			css: `
       ::view-transition-group(root) {
         animation-duration: 0.7s;
         animation-timing-function: var(--expo-out);
@@ -443,35 +438,35 @@ export const createAnimation = (
         }
       }
       `,
-    };
-  }
+		};
+	}
 
-  // Handle circle variants with start positions using clip-path
-  if (variant === "circle" && start !== "center") {
-    const getClipPathPosition = (position: AnimationStart) => {
-      switch (position) {
-        case "top-left":
-          return "0% 0%";
-        case "top-right":
-          return "100% 0%";
-        case "bottom-left":
-          return "0% 100%";
-        case "bottom-right":
-          return "100% 100%";
-        case "top-center":
-          return "50% 0%";
-        case "bottom-center":
-          return "50% 100%";
-        default:
-          return "50% 50%";
-      }
-    };
+	// Handle circle variants with start positions using clip-path
+	if (variant === "circle" && start !== "center") {
+		const getClipPathPosition = (position: AnimationStart) => {
+			switch (position) {
+				case "top-left":
+					return "0% 0%";
+				case "top-right":
+					return "100% 0%";
+				case "bottom-left":
+					return "0% 100%";
+				case "bottom-right":
+					return "100% 100%";
+				case "top-center":
+					return "50% 0%";
+				case "bottom-center":
+					return "50% 100%";
+				default:
+					return "50% 50%";
+			}
+		};
 
-    const clipPosition = getClipPathPosition(start);
+		const clipPosition = getClipPathPosition(start);
 
-    return {
-      name: `${variant}-${start}${blur ? "-blur" : ""}`,
-      css: `
+		return {
+			name: `${variant}-${start}${blur ? "-blur" : ""}`,
+			css: `
        ::view-transition-group(root) {
         animation-duration: 1s;
         animation-timing-function: var(--expo-out);
@@ -516,12 +511,12 @@ export const createAnimation = (
         }
       }
       `,
-    };
-  }
+		};
+	}
 
-  return {
-    name: `${variant}-${start}${blur ? "-blur" : ""}`,
-    css: `
+	return {
+		name: `${variant}-${start}${blur ? "-blur" : ""}`,
+		css: `
       ::view-transition-group(root) {
         animation-timing-function: var(--expo-in);
       }
@@ -549,237 +544,229 @@ export const createAnimation = (
         }
       }
     `,
-  };
+	};
 };
 
 // ///////////////////////////////////////////////////////////////////////////
 // Custom hook for theme toggle functionality
 export const useThemeToggle = ({
-  variant = "circle",
-  start = "center",
-  blur = false,
-  gifUrl = "",
+	variant = "circle",
+	start = "center",
+	blur = false,
+	gifUrl = "",
 }: {
-  variant?: AnimationVariant;
-  start?: AnimationStart;
-  blur?: boolean;
-  gifUrl?: string;
+	variant?: AnimationVariant;
+	start?: AnimationStart;
+	blur?: boolean;
+	gifUrl?: string;
 } = {}) => {
-  const { theme, setTheme, resolvedTheme } = useTheme();
+	const { theme, setTheme, resolvedTheme } = useTheme();
 
-  const [isDark, setIsDark] = useState(false);
+	const [isDark, setIsDark] = useState(false);
 
-  // Sync isDark state with resolved theme after hydration
-  useEffect(() => {
-    setIsDark(resolvedTheme === "dark");
-  }, [resolvedTheme]);
+	// Sync isDark state with resolved theme after hydration
+	useEffect(() => {
+		setIsDark(resolvedTheme === "dark");
+	}, [resolvedTheme]);
 
-  const styleId = "theme-transition-styles";
+	const styleId = "theme-transition-styles";
 
-  const updateStyles = useCallback((css: string) => {
-    if (typeof window === "undefined") return;
+	const updateStyles = useCallback((css: string) => {
+		if (typeof window === "undefined") return;
 
-    let styleElement = document.getElementById(styleId) as HTMLStyleElement;
+		let styleElement = document.getElementById(styleId) as HTMLStyleElement;
 
-    if (!styleElement) {
-      styleElement = document.createElement("style");
-      styleElement.id = styleId;
-      document.head.appendChild(styleElement);
-    }
+		if (!styleElement) {
+			styleElement = document.createElement("style");
+			styleElement.id = styleId;
+			document.head.appendChild(styleElement);
+		}
 
-    styleElement.textContent = css;
-  }, []);
+		styleElement.textContent = css;
+	}, []);
 
-  const toggleTheme = useCallback(() => {
-    setIsDark(!isDark);
+	const toggleTheme = useCallback(() => {
+		setIsDark(!isDark);
 
-    const animation = createAnimation(variant, start, blur, gifUrl);
+		const animation = createAnimation(variant, start, blur, gifUrl);
 
-    updateStyles(animation.css);
+		updateStyles(animation.css);
 
-    if (typeof window === "undefined") return;
+		if (typeof window === "undefined") return;
 
-    const switchTheme = () => {
-      setTheme(theme === "light" ? "dark" : "light");
-    };
+		const switchTheme = () => {
+			setTheme(theme === "light" ? "dark" : "light");
+		};
 
-    if (!document.startViewTransition) {
-      switchTheme();
-      return;
-    }
+		if (!document.startViewTransition) {
+			switchTheme();
+			return;
+		}
 
-    document.startViewTransition(switchTheme);
-  }, [theme, setTheme, variant, start, blur, gifUrl, updateStyles, isDark]);
+		document.startViewTransition(switchTheme);
+	}, [theme, setTheme, variant, start, blur, gifUrl, updateStyles, isDark]);
 
-  const setCrazyLightTheme = useCallback(() => {
-    setIsDark(false);
+	const setCrazyLightTheme = useCallback(() => {
+		setIsDark(false);
 
-    const animation = createAnimation(variant, start, blur, gifUrl);
+		const animation = createAnimation(variant, start, blur, gifUrl);
 
-    updateStyles(animation.css);
+		updateStyles(animation.css);
 
-    if (typeof window === "undefined") return;
+		if (typeof window === "undefined") return;
 
-    const switchTheme = () => {
-      setTheme("light");
-    };
+		const switchTheme = () => {
+			setTheme("light");
+		};
 
-    if (!document.startViewTransition) {
-      switchTheme();
-      return;
-    }
+		if (!document.startViewTransition) {
+			switchTheme();
+			return;
+		}
 
-    document.startViewTransition(switchTheme);
-  }, [setTheme, variant, start, blur, gifUrl, updateStyles]);
+		document.startViewTransition(switchTheme);
+	}, [setTheme, variant, start, blur, gifUrl, updateStyles]);
 
-  const setCrazyDarkTheme = useCallback(() => {
-    setIsDark(true);
+	const setCrazyDarkTheme = useCallback(() => {
+		setIsDark(true);
 
-    const animation = createAnimation(variant, start, blur, gifUrl);
+		const animation = createAnimation(variant, start, blur, gifUrl);
 
-    updateStyles(animation.css);
+		updateStyles(animation.css);
 
-    if (typeof window === "undefined") return;
+		if (typeof window === "undefined") return;
 
-    const switchTheme = () => {
-      setTheme("dark");
-    };
+		const switchTheme = () => {
+			setTheme("dark");
+		};
 
-    if (!document.startViewTransition) {
-      switchTheme();
-      return;
-    }
+		if (!document.startViewTransition) {
+			switchTheme();
+			return;
+		}
 
-    document.startViewTransition(switchTheme);
-  }, [setTheme, variant, start, blur, gifUrl, updateStyles]);
+		document.startViewTransition(switchTheme);
+	}, [setTheme, variant, start, blur, gifUrl, updateStyles]);
 
-  const setCrazySystemTheme = useCallback(() => {
-    if (typeof window === "undefined") return;
+	const setCrazySystemTheme = useCallback(() => {
+		if (typeof window === "undefined") return;
 
-    const prefersDark = window.matchMedia(
-      "(prefers-color-scheme: dark)",
-    ).matches;
-    setIsDark(prefersDark);
+		const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches;
+		setIsDark(prefersDark);
 
-    const animation = createAnimation(variant, start, blur, gifUrl);
+		const animation = createAnimation(variant, start, blur, gifUrl);
 
-    updateStyles(animation.css);
+		updateStyles(animation.css);
 
-    const switchTheme = () => {
-      setTheme("system");
-    };
+		const switchTheme = () => {
+			setTheme("system");
+		};
 
-    if (!document.startViewTransition) {
-      switchTheme();
-      return;
-    }
+		if (!document.startViewTransition) {
+			switchTheme();
+			return;
+		}
 
-    document.startViewTransition(switchTheme);
-  }, [setTheme, variant, start, blur, gifUrl, updateStyles]);
+		document.startViewTransition(switchTheme);
+	}, [setTheme, variant, start, blur, gifUrl, updateStyles]);
 
-  return {
-    isDark,
-    setIsDark,
-    toggleTheme,
-    setCrazyLightTheme,
-    setCrazyDarkTheme,
-    setCrazySystemTheme,
-  };
+	return {
+		isDark,
+		setIsDark,
+		toggleTheme,
+		setCrazyLightTheme,
+		setCrazyDarkTheme,
+		setCrazySystemTheme,
+	};
 };
 
 // ///////////////////////////////////////////////////////////////////////////
 // Theme Toggle Button Component (Sun/Moon Style)
 
 export const ThemeToggleButton = ({
-  className = "",
-  variant = "circle",
-  start = "center",
-  blur = false,
-  gifUrl = "",
+	className = "",
+	variant = "circle",
+	start = "center",
+	blur = false,
+	gifUrl = "",
 }: {
-  className?: string;
-  variant?: AnimationVariant;
-  start?: AnimationStart;
-  blur?: boolean;
-  gifUrl?: string;
+	className?: string;
+	variant?: AnimationVariant;
+	start?: AnimationStart;
+	blur?: boolean;
+	gifUrl?: string;
 }) => {
-  const { isDark, toggleTheme } = useThemeToggle({
-    variant,
-    start,
-    blur,
-    gifUrl,
-  });
-  const clipId = useId();
-  const clipPathId = `theme-toggle-clip-${clipId}`;
+	const { isDark, toggleTheme } = useThemeToggle({
+		variant,
+		start,
+		blur,
+		gifUrl,
+	});
+	const clipId = useId();
+	const clipPathId = `theme-toggle-clip-${clipId}`;
 
-  return (
-    <button
-      type="button"
-      className={cn(
-        "size-10 cursor-pointer rounded-full p-2 transition-all duration-300 active:scale-95 bg-transparent",
-        isDark ? "text-white" : "text-black",
-        className,
-      )}
-      onClick={toggleTheme}
-      aria-label="Toggle theme"
-    >
-      <span className="sr-only">Toggle theme</span>
-      <svg
-        xmlns="http://www.w3.org/2000/svg"
-        aria-hidden="true"
-        fill="currentColor"
-        strokeLinecap="round"
-        viewBox="0 0 32 32"
-      >
-        <clipPath id={clipPathId}>
-          <motion.path
-            animate={{ y: isDark ? 10 : 0, x: isDark ? -12 : 0 }}
-            transition={{ ease: "easeInOut", duration: 0.35 }}
-            d="M0-5h30a1 1 0 0 0 9 13v24H0Z"
-          />
-        </clipPath>
-        <g clipPath={`url(#${clipPathId})`}>
-          <motion.circle
-            animate={{ r: isDark ? 10 : 8 }}
-            transition={{ ease: "easeInOut", duration: 0.35 }}
-            cx="16"
-            cy="16"
-          />
-          <motion.g
-            animate={{
-              rotate: isDark ? -100 : 0,
-              scale: isDark ? 0.5 : 1,
-              opacity: isDark ? 0 : 1,
-            }}
-            transition={{ ease: "easeInOut", duration: 0.35 }}
-            stroke="currentColor"
-            strokeWidth="1.5"
-          >
-            <path d="M16 5.5v-4" />
-            <path d="M16 30.5v-4" />
-            <path d="M1.5 16h4" />
-            <path d="M26.5 16h4" />
-            <path d="m23.4 8.6 2.8-2.8" />
-            <path d="m5.7 26.3 2.9-2.9" />
-            <path d="m5.8 5.8 2.8 2.8" />
-            <path d="m23.4 23.4 2.9 2.9" />
-          </motion.g>
-        </g>
-      </svg>
-    </button>
-  );
+	return (
+		<button
+			type="button"
+			className={cn(
+				"size-10 cursor-pointer rounded-full p-2 transition-all duration-300 active:scale-95 bg-transparent",
+				isDark ? "text-white" : "text-black",
+				className
+			)}
+			onClick={toggleTheme}
+			aria-label="Toggle theme"
+		>
+			<span className="sr-only">Toggle theme</span>
+			<svg
+				xmlns="http://www.w3.org/2000/svg"
+				aria-hidden="true"
+				fill="currentColor"
+				strokeLinecap="round"
+				viewBox="0 0 32 32"
+			>
+				<clipPath id={clipPathId}>
+					<motion.path
+						animate={{ y: isDark ? 10 : 0, x: isDark ? -12 : 0 }}
+						transition={{ ease: "easeInOut", duration: 0.35 }}
+						d="M0-5h30a1 1 0 0 0 9 13v24H0Z"
+					/>
+				</clipPath>
+				<g clipPath={`url(#${clipPathId})`}>
+					<motion.circle
+						animate={{ r: isDark ? 10 : 8 }}
+						transition={{ ease: "easeInOut", duration: 0.35 }}
+						cx="16"
+						cy="16"
+					/>
+					<motion.g
+						animate={{
+							rotate: isDark ? -100 : 0,
+							scale: isDark ? 0.5 : 1,
+							opacity: isDark ? 0 : 1,
+						}}
+						transition={{ ease: "easeInOut", duration: 0.35 }}
+						stroke="currentColor"
+						strokeWidth="1.5"
+					>
+						<path d="M16 5.5v-4" />
+						<path d="M16 30.5v-4" />
+						<path d="M1.5 16h4" />
+						<path d="M26.5 16h4" />
+						<path d="m23.4 8.6 2.8-2.8" />
+						<path d="m5.7 26.3 2.9-2.9" />
+						<path d="m5.8 5.8 2.8 2.8" />
+						<path d="m23.4 23.4 2.9 2.9" />
+					</motion.g>
+				</g>
+			</svg>
+		</button>
+	);
 };
 
 // ///////////////////////////////////////////////////////////////////////////
 // Backwards compatible export (alias for ThemeToggleButton with default settings)
 export function ThemeTogglerComponent() {
-  return (
-    <ThemeToggleButton
-      variant="circle"
-      start="top-right"
-      className="size-8"
-    />
-  );
+	return <ThemeToggleButton variant="circle" start="top-right" className="size-8" />;
 }
 
 /**
diff --git a/surfsense_web/hooks/use-documents.ts b/surfsense_web/hooks/use-documents.ts
index 442c836b2..369cc7b41 100644
--- a/surfsense_web/hooks/use-documents.ts
+++ b/surfsense_web/hooks/use-documents.ts
@@ -144,7 +144,7 @@ export function useDocuments(
 		(doc: DocumentElectric): DocumentDisplay => ({
 			...doc,
 			created_by_name: doc.created_by_id
-				? userCacheRef.current.get(doc.created_by_id) ?? null
+				? (userCacheRef.current.get(doc.created_by_id) ?? null)
 				: null,
 			status: doc.status ?? { state: "ready" },
 		}),
@@ -232,7 +232,15 @@ export function useDocuments(
 				const handle = await client.syncShape({
 					table: "documents",
 					where: `search_space_id = ${spaceId}`,
-					columns: ["id", "document_type", "search_space_id", "title", "created_by_id", "created_at", "status"],
+					columns: [
+						"id",
+						"document_type",
+						"search_space_id",
+						"title",
+						"created_by_id",
+						"created_at",
+						"status",
+					],
 					primaryKey: ["id"],
 				});
 
@@ -258,7 +266,10 @@ export function useDocuments(
 				// Set up live query
 				const db = client.db as {
 					live?: {
-						query: <T>(sql: string, params?: (number | string)[]) => Promise<{
+						query: <T>(
+							sql: string,
+							params?: (number | string)[]
+						) => Promise<{
 							subscribe: (cb: (result: { rows: T[] }) => void) => void;
 							unsubscribe?: () => void;
 						}>;
@@ -297,8 +308,7 @@ export function useDocuments(
 					if (!mounted || !result.rows) return;
 
 					// DEBUG: Log first few raw documents to see what's coming from Electric
-  					console.log("[useDocuments] Raw data sample:", result.rows.slice(0, 3));
-  
+					console.log("[useDocuments] Raw data sample:", result.rows.slice(0, 3));
 
 					const validItems = result.rows.filter(isValidDocument);
 					const isFullySynced = syncHandleRef.current?.isUpToDate ?? false;
@@ -309,8 +319,9 @@ export function useDocuments(
 
 					// Fetch user names for new users (non-blocking)
 					const unknownUserIds = validItems
-						.filter((doc): doc is DocumentElectric & { created_by_id: string } => 
-							doc.created_by_id !== null && !userCacheRef.current.has(doc.created_by_id)
+						.filter(
+							(doc): doc is DocumentElectric & { created_by_id: string } =>
+								doc.created_by_id !== null && !userCacheRef.current.has(doc.created_by_id)
 						)
 						.map((doc) => doc.created_by_id);
 
@@ -326,7 +337,7 @@ export function useDocuments(
 										prev.map((doc) => ({
 											...doc,
 											created_by_name: doc.created_by_id
-												? userCacheRef.current.get(doc.created_by_id) ?? null
+												? (userCacheRef.current.get(doc.created_by_id) ?? null)
 												: null,
 										}))
 									);
@@ -358,7 +369,9 @@ export function useDocuments(
 						// Case 2: Electric is fully synced - TRUST IT COMPLETELY (handles bulk deletes)
 						if (isFullySynced) {
 							const liveDocs = deduplicateAndSort(validItems.map(electricToDisplayDoc));
-							console.log(`[useDocuments] Synced update: ${liveDocs.length} docs (was ${prev.length})`);
+							console.log(
+								`[useDocuments] Synced update: ${liveDocs.length} docs (was ${prev.length})`
+							);
 							return liveDocs;
 						}
 
diff --git a/surfsense_web/lib/electric/client.ts b/surfsense_web/lib/electric/client.ts
index 3fa4586ac..9d596a261 100644
--- a/surfsense_web/lib/electric/client.ts
+++ b/surfsense_web/lib/electric/client.ts
@@ -444,9 +444,9 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 							// in use-inbox.ts generating different sync keys on each render.
 							// That's now fixed (rounded to midnight UTC in getSyncCutoffDate).
 							// We can safely use shapeKey for fast incremental sync.
-							
+
 							const shapeKey = `${userId}_v${SYNC_VERSION}_${table}_${where?.replace(/[^a-zA-Z0-9]/g, "_") || "all"}`;
-							
+
 							// Type assertion to PGlite with electric extension
 							const pgWithElectric = db as unknown as {
 								electric: {
@@ -495,9 +495,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 											// Parse the WHERE clause to build a DELETE statement
 											// The WHERE clause is already validated and formatted
 											await tx.exec(`DELETE FROM ${table} WHERE ${validatedWhere}`);
-											debugLog(
-												`[Electric] 🗑️ Cleared ${table} rows matching: ${validatedWhere}`
-											);
+											debugLog(`[Electric] 🗑️ Cleared ${table} rows matching: ${validatedWhere}`);
 										} else {
 											// No WHERE clause means we're syncing the entire table
 											await tx.exec(`DELETE FROM ${table}`);
@@ -514,10 +512,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 								},
 							};
 
-							debugLog(
-								"[Electric] syncShapeToTable config:",
-								JSON.stringify(shapeConfig, null, 2)
-							);
+							debugLog("[Electric] syncShapeToTable config:", JSON.stringify(shapeConfig, null, 2));
 
 							let shape: { unsubscribe: () => void; isUpToDate: boolean; stream: unknown };
 							try {
@@ -550,9 +545,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 											retryError instanceof Error ? retryError.message : String(retryError);
 										if (retryMessage.includes("Already syncing")) {
 											// Still syncing - create a placeholder handle that indicates the table is being synced
-											debugWarn(
-												`[Electric] ${table} still syncing, creating placeholder handle`
-											);
+											debugWarn(`[Electric] ${table} still syncing, creating placeholder handle`);
 											const placeholderHandle: SyncHandle = {
 												unsubscribe: () => {
 													debugLog(`[Electric] Placeholder unsubscribe for: ${cacheKey}`);
@@ -656,9 +649,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 
 										// Also check stream's isUpToDate property immediately
 										if (stream?.isUpToDate) {
-											debugLog(
-												`[Electric] ✅ Stream isUpToDate is true immediately for ${table}`
-											);
+											debugLog(`[Electric] ✅ Stream isUpToDate is true immediately for ${table}`);
 											resolveInitialSync();
 										}
 									}
@@ -671,9 +662,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
 										}
 
 										if (shape.isUpToDate || stream?.isUpToDate) {
-											debugLog(
-												`[Electric] ✅ Sync completed (detected via polling) for ${table}`
-											);
+											debugLog(`[Electric] ✅ Sync completed (detected via polling) for ${table}`);
 											clearInterval(pollInterval);
 											resolveInitialSync();
 										}

From 0f92b37b66ef27db4ea2c356cf62546693643cdd Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 05:36:18 +0530
Subject: [PATCH 34/36] feat: add status column to documents table for
 per-document processing tracking

---
 ..._status_column.py => 93_add_document_status_column.py} | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
 rename surfsense_backend/alembic/versions/{92_add_document_status_column.py => 93_add_document_status_column.py} (95%)

diff --git a/surfsense_backend/alembic/versions/92_add_document_status_column.py b/surfsense_backend/alembic/versions/93_add_document_status_column.py
similarity index 95%
rename from surfsense_backend/alembic/versions/92_add_document_status_column.py
rename to surfsense_backend/alembic/versions/93_add_document_status_column.py
index 8204096aa..382db6109 100644
--- a/surfsense_backend/alembic/versions/92_add_document_status_column.py
+++ b/surfsense_backend/alembic/versions/93_add_document_status_column.py
@@ -1,7 +1,7 @@
 """Add status column to documents table for per-document processing status
 
-Revision ID: 92
-Revises: 91
+Revision ID: 93
+Revises: 92
 Create Date: 2026-02-05
 
 Changes:
@@ -16,8 +16,8 @@ from collections.abc import Sequence
 from alembic import op
 
 # revision identifiers, used by Alembic.
-revision: str = "92"
-down_revision: str | None = "91"
+revision: str = "93"
+down_revision: str | None = "92"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 

From b41c22842f35d19308e5fcd512dc05c57a35d103 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 6 Feb 2026 12:32:55 +0530
Subject: [PATCH 35/36] refactor: change alelmbic migration number and made
 migrations idempotent

---
 ...ications_table_and_electric_replication.py | 36 +++++++------
 ...4_add_access_token_to_image_generations.py | 50 ++++++++++++++-----
 ...mn.py => 95_add_document_status_column.py} |  8 +--
 3 files changed, 63 insertions(+), 31 deletions(-)
 rename surfsense_backend/alembic/versions/{93_add_document_status_column.py => 95_add_document_status_column.py} (95%)

diff --git a/surfsense_backend/alembic/versions/66_add_notifications_table_and_electric_replication.py b/surfsense_backend/alembic/versions/66_add_notifications_table_and_electric_replication.py
index dc25a1edd..182bf981c 100644
--- a/surfsense_backend/alembic/versions/66_add_notifications_table_and_electric_replication.py
+++ b/surfsense_backend/alembic/versions/66_add_notifications_table_and_electric_replication.py
@@ -17,13 +17,6 @@ from collections.abc import Sequence
 
 from alembic import context, op
 
-# Get Electric SQL user credentials from env.py configuration
-_config = context.config
-ELECTRIC_DB_USER = _config.get_main_option("electric_db_user", "electric")
-ELECTRIC_DB_PASSWORD = _config.get_main_option(
-    "electric_db_password", "electric_password"
-)
-
 # revision identifiers, used by Alembic.
 revision: str = "66"
 down_revision: str | None = "65"
@@ -31,8 +24,21 @@ branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 
 
+def _get_electric_credentials() -> tuple[str, str]:
+    """Get Electric SQL credentials from Alembic config.
+
+    Must be called inside upgrade()/downgrade(), not at module level,
+    because context.config is only available during migration execution.
+    """
+    _config = context.config
+    user = _config.get_main_option("electric_db_user", "electric")
+    password = _config.get_main_option("electric_db_password", "electric_password")
+    return user, password
+
+
 def upgrade() -> None:
     """Upgrade schema - add notifications table and Electric SQL replication."""
+    electric_db_user, electric_db_password = _get_electric_credentials()
     # Create notifications table
     op.execute(
         """
@@ -74,8 +80,8 @@ def upgrade() -> None:
         f"""
         DO $$
         BEGIN
-            IF NOT EXISTS (SELECT FROM pg_user WHERE usename = '{ELECTRIC_DB_USER}') THEN
-                CREATE USER {ELECTRIC_DB_USER} WITH REPLICATION PASSWORD '{ELECTRIC_DB_PASSWORD}';
+            IF NOT EXISTS (SELECT FROM pg_user WHERE usename = '{electric_db_user}') THEN
+                CREATE USER {electric_db_user} WITH REPLICATION PASSWORD '{electric_db_password}';
             END IF;
         END
         $$;
@@ -89,19 +95,19 @@ def upgrade() -> None:
         DECLARE
             db_name TEXT := current_database();
         BEGIN
-            EXECUTE format('GRANT CONNECT ON DATABASE %I TO {ELECTRIC_DB_USER}', db_name);
+            EXECUTE format('GRANT CONNECT ON DATABASE %I TO {electric_db_user}', db_name);
         END
         $$;
         """
     )
-    op.execute(f"GRANT USAGE ON SCHEMA public TO {ELECTRIC_DB_USER};")
-    op.execute(f"GRANT SELECT ON ALL TABLES IN SCHEMA public TO {ELECTRIC_DB_USER};")
-    op.execute(f"GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO {ELECTRIC_DB_USER};")
+    op.execute(f"GRANT USAGE ON SCHEMA public TO {electric_db_user};")
+    op.execute(f"GRANT SELECT ON ALL TABLES IN SCHEMA public TO {electric_db_user};")
+    op.execute(f"GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO {electric_db_user};")
     op.execute(
-        f"ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO {ELECTRIC_DB_USER};"
+        f"ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO {electric_db_user};"
     )
     op.execute(
-        f"ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON SEQUENCES TO {ELECTRIC_DB_USER};"
+        f"ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON SEQUENCES TO {electric_db_user};"
     )
 
     # Create the publication if not exists
diff --git a/surfsense_backend/alembic/versions/94_add_access_token_to_image_generations.py b/surfsense_backend/alembic/versions/94_add_access_token_to_image_generations.py
index 09bea2c19..92f027e00 100644
--- a/surfsense_backend/alembic/versions/94_add_access_token_to_image_generations.py
+++ b/surfsense_backend/alembic/versions/94_add_access_token_to_image_generations.py
@@ -10,8 +10,6 @@ SECRET_KEY rotation.
 
 from collections.abc import Sequence
 
-import sqlalchemy as sa
-
 from alembic import op
 
 # revision identifiers, used by Alembic.
@@ -23,17 +21,45 @@ depends_on: str | Sequence[str] | None = None
 
 def upgrade() -> None:
     # Add access_token column (nullable so existing rows are unaffected)
-    op.add_column(
-        "image_generations",
-        sa.Column("access_token", sa.String(64), nullable=True),
-    )
-    op.create_index(
-        "ix_image_generations_access_token",
-        "image_generations",
-        ["access_token"],
+    # Guard: skip entirely if image_generations table doesn't exist
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF EXISTS (
+                SELECT 1 FROM information_schema.tables
+                WHERE table_name = 'image_generations'
+            ) THEN
+                -- Add column if not exists
+                IF NOT EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = 'image_generations' AND column_name = 'access_token'
+                ) THEN
+                    ALTER TABLE image_generations
+                    ADD COLUMN access_token VARCHAR(64);
+                END IF;
+
+                -- Create index if not exists
+                CREATE INDEX IF NOT EXISTS ix_image_generations_access_token
+                ON image_generations (access_token);
+            END IF;
+        END$$;
+        """
     )
 
 
 def downgrade() -> None:
-    op.drop_index("ix_image_generations_access_token", table_name="image_generations")
-    op.drop_column("image_generations", "access_token")
+    op.execute("DROP INDEX IF EXISTS ix_image_generations_access_token")
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF EXISTS (
+                SELECT 1 FROM information_schema.columns
+                WHERE table_name = 'image_generations' AND column_name = 'access_token'
+            ) THEN
+                ALTER TABLE image_generations DROP COLUMN access_token;
+            END IF;
+        END$$;
+        """
+    )
diff --git a/surfsense_backend/alembic/versions/93_add_document_status_column.py b/surfsense_backend/alembic/versions/95_add_document_status_column.py
similarity index 95%
rename from surfsense_backend/alembic/versions/93_add_document_status_column.py
rename to surfsense_backend/alembic/versions/95_add_document_status_column.py
index 382db6109..f5a6fa65d 100644
--- a/surfsense_backend/alembic/versions/93_add_document_status_column.py
+++ b/surfsense_backend/alembic/versions/95_add_document_status_column.py
@@ -1,7 +1,7 @@
 """Add status column to documents table for per-document processing status
 
-Revision ID: 93
-Revises: 92
+Revision ID: 95
+Revises: 94
 Create Date: 2026-02-05
 
 Changes:
@@ -16,8 +16,8 @@ from collections.abc import Sequence
 from alembic import op
 
 # revision identifiers, used by Alembic.
-revision: str = "93"
-down_revision: str | None = "92"
+revision: str = "95"
+down_revision: str | None = "94"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 

From 017c2628425970c9c9beaebc88f6ccceb79be4ad Mon Sep 17 00:00:00 2001
From: "DESKTOP-RTLN3BA\\$punk" <vermarohanfinal@gmail.com>
Date: Thu, 5 Feb 2026 23:45:01 -0800
Subject: [PATCH 36/36] fix: update DocumentsFilters component for
 accessibility and add success message for document deletion

---
 .../(manage)/components/DocumentsFilters.tsx        | 13 ++++++++++---
 surfsense_web/messages/en.json                      |  1 +
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
index 6bd5f8460..ebdf431e4 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx
@@ -193,11 +193,18 @@ export function DocumentsFilters({
 										</div>
 									) : (
 										filteredTypes.map((value: DocumentTypeEnum, i) => (
-											<button
+											<div
 												key={value}
-												type="button"
+												role="button"
+												tabIndex={0}
 												className="flex w-full items-center gap-2.5 py-2 px-3 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
 												onClick={() => onToggleType(value, !activeTypes.includes(value))}
+												onKeyDown={(e) => {
+													if (e.key === "Enter" || e.key === " ") {
+														e.preventDefault();
+														onToggleType(value, !activeTypes.includes(value));
+													}
+												}}
 											>
 												{/* Icon */}
 												<div className="flex h-7 w-7 shrink-0 items-center justify-center rounded-md bg-muted/50 text-foreground/80">
@@ -220,7 +227,7 @@ export function DocumentsFilters({
 													onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
 													className="h-4 w-4 shrink-0 rounded border-muted-foreground/30 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
 												/>
-											</button>
+											</div>
 										))
 									)}
 								</div>
diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json
index 68ea533ac..fae4c7265 100644
--- a/surfsense_web/messages/en.json
+++ b/surfsense_web/messages/en.json
@@ -308,6 +308,7 @@
 		"no_rows_selected": "No rows selected",
 		"delete_success_count": "Successfully deleted {count} document(s)",
 		"delete_partial_failed": "Some documents could not be deleted",
+		"delete_success": "Document deleted successfully",
 		"delete_error": "Error deleting documents",
 		"filter_by_title": "Filter by title...",
 		"bulk_delete": "Delete Selected",