From 9d6d81871219906432e8b35260a8f5c3de6edbc6 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 1 Apr 2026 18:52:04 +0530 Subject: [PATCH 01/61] fix: add select-none class to TabBar --- surfsense_web/components/layout/ui/tabs/TabBar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_web/components/layout/ui/tabs/TabBar.tsx b/surfsense_web/components/layout/ui/tabs/TabBar.tsx index 18e1ba141..8d0d986d3 100644 --- a/surfsense_web/components/layout/ui/tabs/TabBar.tsx +++ b/surfsense_web/components/layout/ui/tabs/TabBar.tsx @@ -72,7 +72,7 @@ export function TabBar({ onTabSwitch, onNewChat, rightActions, className }: TabB if (tabs.length <= 1) return null; return ( -
+
Date: Wed, 1 Apr 2026 20:31:45 +0530 Subject: [PATCH 02/61] fix: improve document loading error handling and UI feedback for processing state --- surfsense_backend/app/routes/editor_routes.py | 11 ++++-- .../components/DocumentsTableShell.tsx | 5 +++ .../components/documents/DocumentNode.tsx | 4 +-- .../components/documents/FolderTreeView.tsx | 7 ++-- .../components/editor-panel/editor-panel.tsx | 22 +++++++++--- .../layout/ui/tabs/DocumentTabContent.tsx | 34 +++++++++++++++---- .../new-chat/source-detail-panel.tsx | 10 +++--- 7 files changed, 69 insertions(+), 24 deletions(-) diff --git a/surfsense_backend/app/routes/editor_routes.py b/surfsense_backend/app/routes/editor_routes.py index f54f18def..0fcbc475d 100644 --- a/surfsense_backend/app/routes/editor_routes.py +++ b/surfsense_backend/app/routes/editor_routes.py @@ -127,9 +127,16 @@ async def get_editor_content( chunks = sorted(document.chunks, key=lambda c: c.id) if not chunks: + doc_status = document.status or {} + state = doc_status.get("state", "ready") if isinstance(doc_status, dict) else "ready" + if state in ("pending", "processing"): + raise HTTPException( + status_code=409, + detail="This document is still being processed. Please wait a moment and try again.", + ) raise HTTPException( status_code=400, - detail="This document has no content and cannot be edited. Please re-upload to enable editing.", + detail="This document has no viewable content yet. It may still be syncing. Try again in a few seconds, or re-upload if the issue persists.", ) markdown_content = "\n\n".join(chunk.content for chunk in chunks) @@ -137,7 +144,7 @@ async def get_editor_content( if not markdown_content.strip(): raise HTTPException( status_code=400, - detail="This document has empty content and cannot be edited.", + detail="This document appears to be empty. Try re-uploading or editing it to add content.", ) # Persist the lazy migration diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx index 92ced6e47..0758307f7 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx @@ -748,6 +748,7 @@ export function DocumentsTableShell({ onClick={() => onOpenInTab ? onOpenInTab(doc) : handleViewDocument(doc) } + disabled={isBeingProcessed} > Open @@ -1020,6 +1021,10 @@ export function DocumentsTableShell({ e.stopPropagation()}> - onPreview(doc)}> + onPreview(doc)} disabled={isProcessing}> Open @@ -259,7 +259,7 @@ export const DocumentNode = React.memo(function DocumentNode({ {contextMenuOpen && ( e.stopPropagation()}> - onPreview(doc)}> + onPreview(doc)} disabled={isProcessing}> Open diff --git a/surfsense_web/components/documents/FolderTreeView.tsx b/surfsense_web/components/documents/FolderTreeView.tsx index f63d5da5c..7695923e3 100644 --- a/surfsense_web/components/documents/FolderTreeView.tsx +++ b/surfsense_web/components/documents/FolderTreeView.tsx @@ -1,7 +1,7 @@ "use client"; import { useAtom } from "jotai"; -import { CirclePlus } from "lucide-react"; +import { Search } from "lucide-react"; import { useCallback, useMemo, useState } from "react"; import { DndProvider } from "react-dnd"; import { HTML5Backend } from "react-dnd-html5-backend"; @@ -250,8 +250,9 @@ export function FolderTreeView({ if (treeNodes.length === 0 && (activeTypes.length > 0 || searchQuery)) { return (
- -

No matching documents

+ +

No matching documents

+

Try a different search term

); } diff --git a/surfsense_web/components/editor-panel/editor-panel.tsx b/surfsense_web/components/editor-panel/editor-panel.tsx index 3ea36f800..7496e6aec 100644 --- a/surfsense_web/components/editor-panel/editor-panel.tsx +++ b/surfsense_web/components/editor-panel/editor-panel.tsx @@ -1,7 +1,7 @@ "use client"; import { useAtomValue, useSetAtom } from "jotai"; -import { AlertCircle, XIcon } from "lucide-react"; +import { FileQuestionMark, RefreshCw, XIcon } from "lucide-react"; import dynamic from "next/dynamic"; import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; @@ -200,10 +200,22 @@ export function EditorPanelContent({ ) : error || !editorDoc ? (
- -
-

Failed to load document

-

{error || "An unknown error occurred"}

+ {error?.toLowerCase().includes("still being processed") ? ( +
+ +
+ ) : ( +
+ +
+ )} +
+

+ {error?.toLowerCase().includes("still being processed") + ? "Document is processing" + : "Document unavailable"} +

+

{error || "An unknown error occurred"}

) : isEditableType ? ( diff --git a/surfsense_web/components/layout/ui/tabs/DocumentTabContent.tsx b/surfsense_web/components/layout/ui/tabs/DocumentTabContent.tsx index ac279cd4d..849bdbea5 100644 --- a/surfsense_web/components/layout/ui/tabs/DocumentTabContent.tsx +++ b/surfsense_web/components/layout/ui/tabs/DocumentTabContent.tsx @@ -1,6 +1,6 @@ "use client"; -import { AlertCircle, Pencil } from "lucide-react"; +import { FileQuestionMark, PenLine, RefreshCw } from "lucide-react"; import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; import { PlateEditor } from "@/components/editor/plate-editor"; @@ -160,15 +160,35 @@ export function DocumentTabContent({ documentId, searchSpaceId, title }: Documen if (isLoading) return ; if (error || !doc) { + const isProcessing = error?.toLowerCase().includes("still being processed"); return ( -
- -
-

Failed to load document

-

+

+
+ {isProcessing ? ( + + ) : ( + + )} +
+
+

+ {isProcessing ? "Document is processing" : "Document unavailable"} +

+

{error || "An unknown error occurred"}

+ {!isProcessing && ( + + )}
); } @@ -229,7 +249,7 @@ export function DocumentTabContent({ documentId, searchSpaceId, title }: Documen onClick={() => setIsEditing(true)} className="gap-1.5" > - + Edit )} diff --git a/surfsense_web/components/new-chat/source-detail-panel.tsx b/surfsense_web/components/new-chat/source-detail-panel.tsx index b02b2e217..9c1167efe 100644 --- a/surfsense_web/components/new-chat/source-detail-panel.tsx +++ b/surfsense_web/components/new-chat/source-detail-panel.tsx @@ -1,7 +1,7 @@ "use client"; import { useQuery } from "@tanstack/react-query"; -import { BookOpen, ChevronDown, ExternalLink, FileText, Hash, Sparkles, X } from "lucide-react"; +import { BookOpen, ChevronDown, ExternalLink, FileQuestionMark, FileText, Hash, Sparkles, X } from "lucide-react"; import { AnimatePresence, motion, useReducedMotion } from "motion/react"; import { useTranslations } from "next-intl"; import type React from "react"; @@ -392,12 +392,12 @@ export function SourceDetailPanel({ animate={{ opacity: 1, scale: 1 }} className="flex flex-col items-center gap-4 text-center px-6" > -
- +
+
-

- Failed to load document +

+ Document unavailable

{documentByChunkFetchingError.message || From 5c11a15fb6534c55e3e50e49caac4e4ee3703cec Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 1 Apr 2026 20:51:55 +0530 Subject: [PATCH 03/61] refactor: update UI components by removing unused imports and adjusting loading states --- .../components/CommunityPromptsContent.tsx | 2 +- .../components/PromptsContent.tsx | 6 ++-- .../components/editor-panel/editor-panel.tsx | 29 +++++++++---------- .../layout/ui/right-panel/RightPanel.tsx | 2 +- .../settings/image-model-manager.tsx | 3 +- .../settings/model-config-manager.tsx | 2 -- 6 files changed, 20 insertions(+), 24 deletions(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/CommunityPromptsContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/CommunityPromptsContent.tsx index 4bcdcba7e..239832b2d 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/CommunityPromptsContent.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/CommunityPromptsContent.tsx @@ -60,7 +60,7 @@ export function CommunityPromptsContent() { {list.length === 0 && (

- +

No community prompts yet

Share your own prompts from the My Prompts tab diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/PromptsContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/PromptsContent.tsx index 522d71e59..39362d244 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/PromptsContent.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/PromptsContent.tsx @@ -1,7 +1,8 @@ "use client"; import { useAtomValue } from "jotai"; -import { AlertTriangle, Globe, Lock, PenLine, Plus, Sparkles, Trash2 } from "lucide-react"; +import { AlertTriangle, Globe, Lock, PenLine, Sparkles, Trash2 } from "lucide-react"; +import { ShortcutKbd } from "@/components/ui/shortcut-kbd"; import { useCallback, useState } from "react"; import { toast } from "sonner"; import { @@ -145,7 +146,7 @@ export function PromptsContent() {

Create prompt templates triggered with{" "} - / in the + in the chat composer.

{!showForm && ( @@ -158,7 +159,6 @@ export function PromptsContent() { }} className="shrink-0 gap-1.5" > - New )} diff --git a/surfsense_web/components/editor-panel/editor-panel.tsx b/surfsense_web/components/editor-panel/editor-panel.tsx index 7496e6aec..802a5ffc3 100644 --- a/surfsense_web/components/editor-panel/editor-panel.tsx +++ b/surfsense_web/components/editor-panel/editor-panel.tsx @@ -9,24 +9,9 @@ import { closeEditorPanelAtom, editorPanelAtom } from "@/atoms/editor/editor-pan import { MarkdownViewer } from "@/components/markdown-viewer"; import { Button } from "@/components/ui/button"; import { Drawer, DrawerContent, DrawerHandle, DrawerTitle } from "@/components/ui/drawer"; -import { Skeleton } from "@/components/ui/skeleton"; import { useMediaQuery } from "@/hooks/use-media-query"; import { authenticatedFetch, getBearerToken, redirectToLogin } from "@/lib/auth-utils"; -const PlateEditor = dynamic( - () => import("@/components/editor/plate-editor").then((m) => ({ default: m.PlateEditor })), - { ssr: false, loading: () => } -); - -interface EditorContent { - document_id: number; - title: string; - document_type?: string; - source_markdown: string; -} - -const EDITABLE_DOCUMENT_TYPES = new Set(["FILE", "NOTE"]); - function EditorPanelSkeleton() { return (
@@ -47,6 +32,20 @@ function EditorPanelSkeleton() { ); } +const PlateEditor = dynamic( + () => import("@/components/editor/plate-editor").then((m) => ({ default: m.PlateEditor })), + { ssr: false, loading: () => null } +); + +interface EditorContent { + document_id: number; + title: string; + document_type?: string; + source_markdown: string; +} + +const EDITABLE_DOCUMENT_TYPES = new Set(["FILE", "NOTE"]); + export function EditorPanelContent({ documentId, searchSpaceId, diff --git a/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx b/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx index ac2f65065..717f5a459 100644 --- a/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx +++ b/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx @@ -19,7 +19,7 @@ const EditorPanelContent = dynamic( import("@/components/editor-panel/editor-panel").then((m) => ({ default: m.EditorPanelContent, })), - { ssr: false, loading: () => } + { ssr: false, loading: () => null } ); const HitlEditPanelContent = dynamic( diff --git a/surfsense_web/components/settings/image-model-manager.tsx b/surfsense_web/components/settings/image-model-manager.tsx index 8f08b7db3..0c45af7d4 100644 --- a/surfsense_web/components/settings/image-model-manager.tsx +++ b/surfsense_web/components/settings/image-model-manager.tsx @@ -1,7 +1,7 @@ "use client"; import { useAtomValue } from "jotai"; -import { AlertCircle, Edit3, Info, Plus, RefreshCw, Trash2, Wand2 } from "lucide-react"; +import { AlertCircle, Edit3, Info, RefreshCw, Trash2, Wand2 } from "lucide-react"; import { useMemo, useState } from "react"; import { deleteImageGenConfigMutationAtom } from "@/atoms/image-gen-config/image-gen-config-mutation.atoms"; import { @@ -257,7 +257,6 @@ export function ImageModelManager({ searchSpaceId }: ImageModelManagerProps) { size="lg" className="gap-2 text-xs md:text-sm h-9 md:h-10" > - Add First Image Model )} diff --git a/surfsense_web/components/settings/model-config-manager.tsx b/surfsense_web/components/settings/model-config-manager.tsx index 046288a96..50d2ab5b7 100644 --- a/surfsense_web/components/settings/model-config-manager.tsx +++ b/surfsense_web/components/settings/model-config-manager.tsx @@ -7,7 +7,6 @@ import { FileText, Info, MessageSquareQuote, - Plus, RefreshCw, Trash2, Wand2, @@ -270,7 +269,6 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { size="lg" className="gap-2 text-xs md:text-sm h-9 md:h-10" > - Create First Configuration )} From 33e7aeef9d275a49ad0b7e74a46244b42d4ef547 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 1 Apr 2026 21:00:52 +0530 Subject: [PATCH 04/61] style: enhance input field focus styles and transition effects in login and registration forms --- .../app/(home)/login/LocalLoginForm.tsx | 20 +-- surfsense_web/app/(home)/register/page.tsx | 142 +++++++++--------- 2 files changed, 81 insertions(+), 81 deletions(-) diff --git a/surfsense_web/app/(home)/login/LocalLoginForm.tsx b/surfsense_web/app/(home)/login/LocalLoginForm.tsx index ee3b47683..1ebbf46b6 100644 --- a/surfsense_web/app/(home)/login/LocalLoginForm.tsx +++ b/surfsense_web/app/(home)/login/LocalLoginForm.tsx @@ -160,11 +160,11 @@ export function LocalLoginForm() { placeholder="you@example.com" value={username} onChange={(e) => setUsername(e.target.value)} - className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-2 focus:ring-offset-2 bg-background text-foreground transition-all ${ - error.title - ? "border-destructive focus:border-destructive focus:ring-destructive" - : "border-border focus:border-primary focus:ring-primary" - }`} + className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ + error.title + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" + }`} disabled={isLoggingIn} />
@@ -181,11 +181,11 @@ export function LocalLoginForm() { placeholder="Enter your password" value={password} onChange={(e) => setPassword(e.target.value)} - className={`mt-1 block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-2 focus:ring-offset-2 bg-background text-foreground transition-all ${ - error.title - ? "border-destructive focus:border-destructive focus:ring-destructive" - : "border-border focus:border-primary focus:ring-primary" - }`} + className={`mt-1 block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ + error.title + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" + }`} disabled={isLoggingIn} />
-
- - setPassword(e.target.value)} - className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-2 focus:ring-offset-2 dark:bg-gray-800 dark:text-white transition-all ${ - error.title - ? "border-red-300 focus:border-red-500 focus:ring-red-500 dark:border-red-700" - : "border-gray-300 focus:border-blue-500 focus:ring-blue-500 dark:border-gray-700" - }`} - disabled={isRegistering} - /> -
+
+ + setPassword(e.target.value)} + className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ + error.title + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" + }`} + disabled={isRegistering} + /> +
-
- - setConfirmPassword(e.target.value)} - className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-2 focus:ring-offset-2 dark:bg-gray-800 dark:text-white transition-all ${ - error.title - ? "border-red-300 focus:border-red-500 focus:ring-red-500 dark:border-red-700" - : "border-gray-300 focus:border-blue-500 focus:ring-blue-500 dark:border-gray-700" - }`} - disabled={isRegistering} - /> +
+ + setConfirmPassword(e.target.value)} + className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ + error.title + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" + }`} + disabled={isRegistering} + />
); } diff --git a/surfsense_web/components/settings/image-model-manager.tsx b/surfsense_web/components/settings/image-model-manager.tsx index 0c45af7d4..55128dc59 100644 --- a/surfsense_web/components/settings/image-model-manager.tsx +++ b/surfsense_web/components/settings/image-model-manager.tsx @@ -240,26 +240,14 @@ export function ImageModelManager({ searchSpaceId }: ImageModelManagerProps) { {!isLoading && (
{(userConfigs?.length ?? 0) === 0 ? ( - + -
- -
-

No Image Models Yet

-

+

No Image Models Yet

+

{canCreate ? "Add your own image generation model (DALL-E 3, GPT Image 1, etc.)" : "No image models have been added to this space yet. Contact a space owner to add one."}

- {canCreate && ( - - )}
) : ( diff --git a/surfsense_web/components/settings/model-config-manager.tsx b/surfsense_web/components/settings/model-config-manager.tsx index 50d2ab5b7..837060c70 100644 --- a/surfsense_web/components/settings/model-config-manager.tsx +++ b/surfsense_web/components/settings/model-config-manager.tsx @@ -250,28 +250,14 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {
{configs?.length === 0 ? (
- + -
- -
-
-

No Configurations Yet

-

- {canCreate - ? "Create your first AI configuration to customize how your agent responds" - : "No AI configurations have been added to this space yet. Contact a space owner to add one."} -

-
- {canCreate && ( - - )} +

No LLM Models Yet

+

+ {canCreate + ? "Add your first LLM model to power document summarization, chat, and other agent capabilities" + : "No LLM models have been added to this space yet. Contact a space owner to add one"} +

From 407175ffae46a23d775bcba9c35fdfb465766ba0 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 09:26:16 +0530 Subject: [PATCH 08/61] style: replace loading indicators with Spinner component in alert dialogs --- .../layout/providers/LayoutDataProvider.tsx | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx index fd6b45c52..6138b67fb 100644 --- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx +++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx @@ -775,7 +775,8 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid {t("delete_chat")} - {t("delete_chat_confirm")} {chatToDelete?.name}?{" "} + {t("delete_chat_confirm")}{" "} + {chatToDelete?.name}?{" "} {t("action_cannot_undone")} @@ -835,9 +836,7 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid {tSidebar("rename") || "Rename"} - {isRenamingChat && ( - - )} + {isRenamingChat && } @@ -865,9 +864,7 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid className="relative bg-destructive text-destructive-foreground hover:bg-destructive/90" > {tCommon("delete")} - {isDeletingSearchSpace && ( - - )} + {isDeletingSearchSpace && } @@ -895,9 +892,7 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid className="relative bg-destructive text-destructive-foreground hover:bg-destructive/90" > {t("leave")} - {isLeavingSearchSpace && ( - - )} + {isLeavingSearchSpace && } From d2cf3fb3b7eafce01c881e80049346239c84bc6f Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 09:58:14 +0530 Subject: [PATCH 09/61] refactor: update LLM role management logic and enhance UI feedback --- .../new-llm-config-mutation.atoms.ts | 9 +- .../components/settings/llm-role-manager.tsx | 165 +++++------------- 2 files changed, 52 insertions(+), 122 deletions(-) diff --git a/surfsense_web/atoms/new-llm-config/new-llm-config-mutation.atoms.ts b/surfsense_web/atoms/new-llm-config/new-llm-config-mutation.atoms.ts index 861606f80..d6d3aa820 100644 --- a/surfsense_web/atoms/new-llm-config/new-llm-config-mutation.atoms.ts +++ b/surfsense_web/atoms/new-llm-config/new-llm-config-mutation.atoms.ts @@ -109,10 +109,11 @@ export const updateLLMPreferencesMutationAtom = atomWithMutation((get) => { mutationFn: async (request: UpdateLLMPreferencesRequest) => { return newLLMConfigApiService.updateLLMPreferences(request); }, - onSuccess: () => { - queryClient.invalidateQueries({ - queryKey: cacheKeys.newLLMConfigs.preferences(Number(searchSpaceId)), - }); + onSuccess: (_data, request: UpdateLLMPreferencesRequest) => { + queryClient.setQueryData( + cacheKeys.newLLMConfigs.preferences(Number(searchSpaceId)), + (old: Record | undefined) => ({ ...old, ...request.data }) + ); }, onError: (error: Error) => { toast.error(error.message || "Failed to update LLM preferences"); diff --git a/surfsense_web/components/settings/llm-role-manager.tsx b/surfsense_web/components/settings/llm-role-manager.tsx index 07ec492a3..22e17e431 100644 --- a/surfsense_web/components/settings/llm-role-manager.tsx +++ b/surfsense_web/components/settings/llm-role-manager.tsx @@ -4,16 +4,14 @@ import { useAtomValue } from "jotai"; import { AlertCircle, Bot, - CheckCircle, + CircleCheck, CircleDashed, FileText, ImageIcon, RefreshCw, - RotateCcw, - Save, Shuffle, } from "lucide-react"; -import { useEffect, useState } from "react"; +import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; import { globalImageGenConfigsAtom, @@ -40,6 +38,7 @@ import { SelectValue, } from "@/components/ui/select"; import { Skeleton } from "@/components/ui/skeleton"; +import { Spinner } from "@/components/ui/spinner"; import { getProviderIcon } from "@/lib/provider-icons"; import { cn } from "@/lib/utils"; @@ -48,8 +47,8 @@ const ROLE_DESCRIPTIONS = { icon: Bot, title: "Agent LLM", description: "Primary LLM for chat interactions and agent operations", - color: "text-blue-600 dark:text-blue-400", - bgColor: "bg-blue-500/10", + color: "text-muted-foreground", + bgColor: "bg-muted", prefKey: "agent_llm_id" as const, configType: "llm" as const, }, @@ -57,8 +56,8 @@ const ROLE_DESCRIPTIONS = { icon: FileText, title: "Document Summary LLM", description: "Handles document summarization and research synthesis", - color: "text-purple-600 dark:text-purple-400", - bgColor: "bg-purple-500/10", + color: "text-muted-foreground", + bgColor: "bg-muted", prefKey: "document_summary_llm_id" as const, configType: "llm" as const, }, @@ -66,8 +65,8 @@ const ROLE_DESCRIPTIONS = { icon: ImageIcon, title: "Image Generation Model", description: "Model used for AI image generation (DALL-E, GPT Image, etc.)", - color: "text-teal-600 dark:text-teal-400", - bgColor: "bg-teal-500/10", + color: "text-muted-foreground", + bgColor: "bg-muted", prefKey: "image_generation_config_id" as const, configType: "image" as const, }, @@ -118,88 +117,41 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { image_generation_config_id: preferences.image_generation_config_id ?? "", })); - const [hasChanges, setHasChanges] = useState(false); - const [isSaving, setIsSaving] = useState(false); + const [savingRole, setSavingRole] = useState(null); + const savingRef = useRef(false); useEffect(() => { - const newAssignments = { - agent_llm_id: preferences.agent_llm_id ?? "", - document_summary_llm_id: preferences.document_summary_llm_id ?? "", - image_generation_config_id: preferences.image_generation_config_id ?? "", - }; - setAssignments(newAssignments); - setHasChanges(false); + if (!savingRef.current) { + setAssignments({ + agent_llm_id: preferences.agent_llm_id ?? "", + document_summary_llm_id: preferences.document_summary_llm_id ?? "", + image_generation_config_id: preferences.image_generation_config_id ?? "", + }); + } }, [ preferences?.agent_llm_id, preferences?.document_summary_llm_id, preferences?.image_generation_config_id, ]); - const handleRoleAssignment = (prefKey: string, configId: string) => { - const newAssignments = { - ...assignments, - [prefKey]: configId === "unassigned" ? "" : parseInt(configId), - }; + const handleRoleAssignment = useCallback(async (prefKey: string, configId: string) => { + const value = configId === "unassigned" ? "" : parseInt(configId); - setAssignments(newAssignments); + setAssignments((prev) => ({ ...prev, [prefKey]: value })); + setSavingRole(prefKey); + savingRef.current = true; - const currentPrefs = { - agent_llm_id: preferences.agent_llm_id ?? "", - document_summary_llm_id: preferences.document_summary_llm_id ?? "", - image_generation_config_id: preferences.image_generation_config_id ?? "", - }; - - const hasChangesNow = Object.keys(newAssignments).some( - (key) => - newAssignments[key as keyof typeof newAssignments] !== - currentPrefs[key as keyof typeof currentPrefs] - ); - - setHasChanges(hasChangesNow); - }; - - const handleSave = async () => { - setIsSaving(true); - - const toNumericOrUndefined = (val: string | number) => - typeof val === "string" ? (val ? parseInt(val) : undefined) : val; - - const numericAssignments = { - agent_llm_id: toNumericOrUndefined(assignments.agent_llm_id), - document_summary_llm_id: toNumericOrUndefined(assignments.document_summary_llm_id), - image_generation_config_id: toNumericOrUndefined(assignments.image_generation_config_id), - }; - - await updatePreferences({ - search_space_id: searchSpaceId, - data: numericAssignments, - }); - - setHasChanges(false); - toast.success("Role assignments saved successfully!"); - - setIsSaving(false); - }; - - const handleReset = () => { - setAssignments({ - agent_llm_id: preferences.agent_llm_id ?? "", - document_summary_llm_id: preferences.document_summary_llm_id ?? "", - image_generation_config_id: preferences.image_generation_config_id ?? "", - }); - setHasChanges(false); - }; - - const isAssignmentComplete = - assignments.agent_llm_id !== "" && - assignments.agent_llm_id !== null && - assignments.agent_llm_id !== undefined && - assignments.document_summary_llm_id !== "" && - assignments.document_summary_llm_id !== null && - assignments.document_summary_llm_id !== undefined && - assignments.image_generation_config_id !== "" && - assignments.image_generation_config_id !== null && - assignments.image_generation_config_id !== undefined; + try { + await updatePreferences({ + search_space_id: searchSpaceId, + data: { [prefKey]: value || undefined }, + }); + toast.success("Role assignment updated"); + } finally { + setSavingRole(null); + savingRef.current = false; + } + }, [updatePreferences, searchSpaceId]); // Combine global and custom LLM configs const allLLMConfigs = [ @@ -213,6 +165,11 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { ...(userImageConfigs ?? []).filter((config) => config.id && config.id.toString().trim() !== ""), ]; + const isAssignmentComplete = + allLLMConfigs.some((c) => c.id === assignments.agent_llm_id) && + allLLMConfigs.some((c) => c.id === assignments.document_summary_llm_id) && + allImageConfigs.some((c) => c.id === assignments.image_generation_config_id); + const isLoading = configsLoading || preferencesLoading || @@ -244,9 +201,9 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { {isAssignmentComplete && !isLoading && !hasError && ( - + All roles assigned )} @@ -332,10 +289,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { const roleAllConfigs = isImageRole ? allImageConfigs : allLLMConfigs; const assignedConfig = roleAllConfigs.find((config) => config.id === currentAssignment); - const isAssigned = - currentAssignment !== "" && - currentAssignment !== null && - currentAssignment !== undefined; + const isAssigned = !!assignedConfig; const isAutoMode = assignedConfig && "is_auto_mode" in assignedConfig && assignedConfig.is_auto_mode; @@ -361,8 +315,10 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {

- {isAssigned ? ( - + {savingRole === role.prefKey ? ( + + ) : isAssigned ? ( + ) : ( )} @@ -374,7 +330,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { Configuration + + + + )} + /> + + ( + + Folder Path +
+ + + + {isElectron && ( + + )} +
+ + The absolute path to the folder to watch and sync. + + +
+ )} + /> + + ( + + Display Name + + + + + A friendly name shown in the documents sidebar. + + + + )} + /> + + ( + + Exclude Patterns + + + + + Comma-separated patterns of directories/files to exclude. + + + + )} + /> + + ( + + File Extensions (optional) + + + + + Leave empty to index all supported files, or specify comma-separated extensions. + + + + )} + /> + + + +
+ + {getConnectorBenefits(EnumConnectorName.LOCAL_FOLDER_CONNECTOR) && ( +
+

+ What you get with Local Folder sync: +

+
    + {getConnectorBenefits(EnumConnectorName.LOCAL_FOLDER_CONNECTOR)?.map( + (benefit) =>
  • {benefit}
  • + )} +
+
+ )} +
+ ); +}; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts index 0dc093100..40c6a7fdd 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts @@ -111,6 +111,14 @@ export function getConnectorBenefits(connectorType: string): string[] | null { "Incremental sync - only changed files are re-indexed", "Full support for your vault's folder structure", ], + LOCAL_FOLDER_CONNECTOR: [ + "Watch local folders for real-time changes via the desktop app", + "Automatic change detection — only modified files are re-indexed", + "Version history with up to 20 snapshots per document", + "Mirrors your folder structure in the SurfSense sidebar", + "Supports any text-based file format", + "Works as a periodic sync fallback when the desktop app is not running", + ], }; return benefits[connectorType] || null; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx index 37d4ad5d8..116893399 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx @@ -7,6 +7,7 @@ import { GithubConnectForm } from "./components/github-connect-form"; import { LinkupApiConnectForm } from "./components/linkup-api-connect-form"; import { LumaConnectForm } from "./components/luma-connect-form"; import { MCPConnectForm } from "./components/mcp-connect-form"; +import { LocalFolderConnectForm } from "./components/local-folder-connect-form"; import { ObsidianConnectForm } from "./components/obsidian-connect-form"; import { TavilyApiConnectForm } from "./components/tavily-api-connect-form"; @@ -58,7 +59,8 @@ export function getConnectFormComponent(connectorType: string): ConnectFormCompo return MCPConnectForm; case "OBSIDIAN_CONNECTOR": return ObsidianConnectForm; - // Add other connector types here as needed + case "LOCAL_FOLDER_CONNECTOR": + return LocalFolderConnectForm; default: return null; } diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/local-folder-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/local-folder-config.tsx new file mode 100644 index 000000000..cb4295079 --- /dev/null +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/local-folder-config.tsx @@ -0,0 +1,163 @@ +"use client"; + +import type { FC } from "react"; +import { useState } from "react"; +import { FolderSync } from "lucide-react"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import type { ConnectorConfigProps } from "../index"; + +export const LocalFolderConfig: FC = ({ + connector, + onConfigChange, + onNameChange, +}) => { + const isElectron = typeof window !== "undefined" && !!window.electronAPI; + + const [folderPath, setFolderPath] = useState( + (connector.config?.folder_path as string) || "" + ); + const [folderName, setFolderName] = useState( + (connector.config?.folder_name as string) || "" + ); + const [excludePatterns, setExcludePatterns] = useState(() => { + const patterns = connector.config?.exclude_patterns; + if (Array.isArray(patterns)) { + return patterns.join(", "); + } + return (patterns as string) || "node_modules, .git, .DS_Store"; + }); + const [fileExtensions, setFileExtensions] = useState(() => { + const exts = connector.config?.file_extensions; + if (Array.isArray(exts)) { + return exts.join(", "); + } + return (exts as string) || ""; + }); + const [name, setName] = useState(connector.name || ""); + + const handleFolderPathChange = (value: string) => { + setFolderPath(value); + onConfigChange?.({ ...connector.config, folder_path: value }); + }; + + const handleFolderNameChange = (value: string) => { + setFolderName(value); + onConfigChange?.({ ...connector.config, folder_name: value }); + }; + + const handleExcludePatternsChange = (value: string) => { + setExcludePatterns(value); + const arr = value + .split(",") + .map((p) => p.trim()) + .filter(Boolean); + onConfigChange?.({ ...connector.config, exclude_patterns: arr }); + }; + + const handleFileExtensionsChange = (value: string) => { + setFileExtensions(value); + const arr = value + ? value + .split(",") + .map((e) => { + const ext = e.trim(); + return ext.startsWith(".") ? ext : `.${ext}`; + }) + .filter(Boolean) + : null; + onConfigChange?.({ ...connector.config, file_extensions: arr }); + }; + + const handleNameChange = (value: string) => { + setName(value); + onNameChange?.(value); + }; + + const handleBrowse = async () => { + if (!isElectron) return; + const selected = await window.electronAPI!.selectFolder(); + if (selected) { + handleFolderPathChange(selected); + const autoName = selected.split(/[\\/]/).pop() || "folder"; + if (!folderName) handleFolderNameChange(autoName); + } + }; + + return ( +
+
+
+ + handleNameChange(e.target.value)} + placeholder="Local Folder" + className="border-slate-400/20 focus-visible:border-slate-400/40" + /> +
+
+ +
+

Folder Configuration

+ +
+
+ +
+ handleFolderPathChange(e.target.value)} + placeholder="/path/to/your/folder" + className="border-slate-400/20 focus-visible:border-slate-400/40 font-mono flex-1" + /> + {isElectron && ( + + )} +
+
+ +
+ + handleFolderNameChange(e.target.value)} + placeholder="My Notes" + className="border-slate-400/20 focus-visible:border-slate-400/40" + /> +
+ +
+ + handleExcludePatternsChange(e.target.value)} + placeholder="node_modules, .git, .DS_Store" + className="border-slate-400/20 focus-visible:border-slate-400/40 font-mono" + /> +

+ Comma-separated patterns of directories/files to exclude. +

+
+ +
+ + handleFileExtensionsChange(e.target.value)} + placeholder=".md, .txt, .rst" + className="border-slate-400/20 focus-visible:border-slate-400/40 font-mono" + /> +

+ Leave empty to index all supported files. +

+
+
+
+
+ ); +}; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx index a63435260..3dc1891c8 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx @@ -19,6 +19,7 @@ import { JiraConfig } from "./components/jira-config"; import { LinkupApiConfig } from "./components/linkup-api-config"; import { LumaConfig } from "./components/luma-config"; import { MCPConfig } from "./components/mcp-config"; +import { LocalFolderConfig } from "./components/local-folder-config"; import { ObsidianConfig } from "./components/obsidian-config"; import { OneDriveConfig } from "./components/onedrive-config"; import { SlackConfig } from "./components/slack-config"; @@ -82,6 +83,8 @@ export function getConnectorConfigComponent( return MCPConfig; case "OBSIDIAN_CONNECTOR": return ObsidianConfig; + case "LOCAL_FOLDER_CONNECTOR": + return LocalFolderConfig; case "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": return ComposioDriveConfig; case "COMPOSIO_GMAIL_CONNECTOR": diff --git a/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts b/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts index f924bb15f..dd5978002 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts @@ -29,6 +29,7 @@ export const CONNECTOR_TO_DOCUMENT_TYPE: Record = { BOOKSTACK_CONNECTOR: "BOOKSTACK_CONNECTOR", CIRCLEBACK_CONNECTOR: "CIRCLEBACK", OBSIDIAN_CONNECTOR: "OBSIDIAN_CONNECTOR", + LOCAL_FOLDER_CONNECTOR: "LOCAL_FOLDER_FILE", // Special mappings (connector type differs from document type) GOOGLE_DRIVE_CONNECTOR: "GOOGLE_DRIVE_FILE", diff --git a/surfsense_web/components/documents/version-history.tsx b/surfsense_web/components/documents/version-history.tsx new file mode 100644 index 000000000..29740e079 --- /dev/null +++ b/surfsense_web/components/documents/version-history.tsx @@ -0,0 +1,185 @@ +"use client"; + +import { useCallback, useEffect, useState } from "react"; +import { Clock, RotateCcw } from "lucide-react"; +import { Button } from "@/components/ui/button"; +import { + Sheet, + SheetContent, + SheetHeader, + SheetTitle, + SheetTrigger, +} from "@/components/ui/sheet"; +import { Spinner } from "@/components/ui/spinner"; +import { documentsApiService } from "@/lib/apis/documents-api.service"; +import { toast } from "sonner"; + +interface DocumentVersionSummary { + version_number: number; + title: string; + content_hash: string; + created_at: string | null; +} + +interface VersionHistoryProps { + documentId: number; + documentType: string; +} + +export function VersionHistoryButton({ documentId, documentType }: VersionHistoryProps) { + const showVersionHistory = documentType === "LOCAL_FOLDER_FILE" || documentType === "OBSIDIAN_CONNECTOR"; + if (!showVersionHistory) return null; + + return ( + + + + + + + Version History + + + + + ); +} + +function VersionHistoryPanel({ documentId }: { documentId: number }) { + const [versions, setVersions] = useState([]); + const [loading, setLoading] = useState(true); + const [selectedVersion, setSelectedVersion] = useState(null); + const [versionContent, setVersionContent] = useState(""); + const [contentLoading, setContentLoading] = useState(false); + const [restoring, setRestoring] = useState(false); + + const loadVersions = useCallback(async () => { + setLoading(true); + try { + const data = await documentsApiService.listDocumentVersions(documentId); + setVersions(data as DocumentVersionSummary[]); + } catch { + toast.error("Failed to load version history"); + } finally { + setLoading(false); + } + }, [documentId]); + + useEffect(() => { + loadVersions(); + }, [loadVersions]); + + const handleSelectVersion = async (versionNumber: number) => { + setSelectedVersion(versionNumber); + setContentLoading(true); + try { + const data = (await documentsApiService.getDocumentVersion( + documentId, + versionNumber + )) as { source_markdown: string }; + setVersionContent(data.source_markdown || ""); + } catch { + toast.error("Failed to load version content"); + } finally { + setContentLoading(false); + } + }; + + const handleRestore = async (versionNumber: number) => { + setRestoring(true); + try { + await documentsApiService.restoreDocumentVersion(documentId, versionNumber); + toast.success(`Restored version ${versionNumber}`); + await loadVersions(); + } catch { + toast.error("Failed to restore version"); + } finally { + setRestoring(false); + } + }; + + if (loading) { + return ( +
+ +
+ ); + } + + if (versions.length === 0) { + return ( +
+ +

No version history available yet.

+

Versions are created when file content changes.

+
+ ); + } + + return ( +
+
+ {versions.map((v) => ( +
handleSelectVersion(v.version_number)} + > +
+
+

Version {v.version_number}

+ {v.created_at && ( +

+ {new Date(v.created_at).toLocaleString()} +

+ )} + {v.title && ( +

+ {v.title} +

+ )} +
+ +
+
+ ))} +
+ + {selectedVersion !== null && ( +
+

+ Preview — Version {selectedVersion} +

+ {contentLoading ? ( +
+ +
+ ) : ( +
+							{versionContent || "(empty)"}
+						
+ )} +
+ )} +
+ ); +} diff --git a/surfsense_web/contracts/types/document.types.ts b/surfsense_web/contracts/types/document.types.ts index 1a3326bae..c663d6115 100644 --- a/surfsense_web/contracts/types/document.types.ts +++ b/surfsense_web/contracts/types/document.types.ts @@ -26,6 +26,7 @@ export const documentTypeEnum = z.enum([ "BOOKSTACK_CONNECTOR", "CIRCLEBACK", "OBSIDIAN_CONNECTOR", + "LOCAL_FOLDER_FILE", "SURFSENSE_DOCS", "NOTE", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR", diff --git a/surfsense_web/hooks/use-folder-sync.ts b/surfsense_web/hooks/use-folder-sync.ts new file mode 100644 index 000000000..a35faf98f --- /dev/null +++ b/surfsense_web/hooks/use-folder-sync.ts @@ -0,0 +1,41 @@ +"use client"; + +import { useEffect, useRef } from "react"; +import { connectorsApiService } from "@/lib/apis/connectors-api.service"; + +const DEBOUNCE_MS = 2000; + +export function useFolderSync() { + const pendingRef = useRef>>(new Map()); + + useEffect(() => { + const api = typeof window !== "undefined" ? window.electronAPI : null; + if (!api?.onFileChanged) return; + + const cleanup = api.onFileChanged((event) => { + const key = `${event.connectorId}:${event.fullPath}`; + + const existing = pendingRef.current.get(key); + if (existing) clearTimeout(existing); + + const timeout = setTimeout(async () => { + pendingRef.current.delete(key); + try { + await connectorsApiService.indexFile(event.connectorId, event.fullPath); + } catch (err) { + console.error("[FolderSync] Failed to trigger re-index:", err); + } + }, DEBOUNCE_MS); + + pendingRef.current.set(key, timeout); + }); + + return () => { + cleanup(); + for (const timeout of pendingRef.current.values()) { + clearTimeout(timeout); + } + pendingRef.current.clear(); + }; + }, []); +} diff --git a/surfsense_web/lib/apis/connectors-api.service.ts b/surfsense_web/lib/apis/connectors-api.service.ts index abd16c7a7..f2722df70 100644 --- a/surfsense_web/lib/apis/connectors-api.service.ts +++ b/surfsense_web/lib/apis/connectors-api.service.ts @@ -404,6 +404,18 @@ class ConnectorsApiService { listDiscordChannelsResponse ); }; + + // ============================================================================= + // Local Folder Connector Methods + // ============================================================================= + + indexFile = async (connectorId: number, filePath: string) => { + return baseApiService.post( + `/api/v1/search-source-connectors/${connectorId}/index-file`, + undefined, + { body: { file_path: filePath } } + ); + }; } export type { SlackChannel, DiscordChannel }; diff --git a/surfsense_web/lib/apis/documents-api.service.ts b/surfsense_web/lib/apis/documents-api.service.ts index 14a247032..d4a80f8a0 100644 --- a/surfsense_web/lib/apis/documents-api.service.ts +++ b/surfsense_web/lib/apis/documents-api.service.ts @@ -379,6 +379,22 @@ class DocumentsApiService { }); }; + listDocumentVersions = async (documentId: number) => { + return baseApiService.get(`/api/v1/documents/${documentId}/versions`); + }; + + getDocumentVersion = async (documentId: number, versionNumber: number) => { + return baseApiService.get( + `/api/v1/documents/${documentId}/versions/${versionNumber}` + ); + }; + + restoreDocumentVersion = async (documentId: number, versionNumber: number) => { + return baseApiService.post( + `/api/v1/documents/${documentId}/versions/${versionNumber}/restore` + ); + }; + /** * Delete a document */ diff --git a/surfsense_web/lib/connectors/utils.ts b/surfsense_web/lib/connectors/utils.ts index 90f7f5d21..6ce78be67 100644 --- a/surfsense_web/lib/connectors/utils.ts +++ b/surfsense_web/lib/connectors/utils.ts @@ -30,6 +30,7 @@ export const getConnectorTypeDisplay = (type: string): string => { YOUTUBE_CONNECTOR: "YouTube", CIRCLEBACK_CONNECTOR: "Circleback", OBSIDIAN_CONNECTOR: "Obsidian", + LOCAL_FOLDER_CONNECTOR: "Local Folder", DROPBOX_CONNECTOR: "Dropbox", MCP_CONNECTOR: "MCP Server", }; diff --git a/surfsense_web/types/window.d.ts b/surfsense_web/types/window.d.ts index 9cf1aa596..921449b41 100644 --- a/surfsense_web/types/window.d.ts +++ b/surfsense_web/types/window.d.ts @@ -1,5 +1,30 @@ import type { PostHog } from "posthog-js"; +interface WatchedFolderConfig { + path: string; + name: string; + excludePatterns: string[]; + fileExtensions: string[] | null; + connectorId: number; + searchSpaceId: number; + active: boolean; +} + +interface FolderSyncFileChangedEvent { + connectorId: number; + searchSpaceId: number; + folderPath: string; + relativePath: string; + fullPath: string; + action: "add" | "change" | "unlink"; + timestamp: number; +} + +interface FolderSyncWatcherReadyEvent { + connectorId: number; + folderPath: string; +} + interface ElectronAPI { versions: { electron: string; @@ -14,6 +39,16 @@ interface ElectronAPI { setQuickAskMode: (mode: string) => Promise; getQuickAskMode: () => Promise; replaceText: (text: string) => Promise; + // Folder sync + selectFolder: () => Promise; + addWatchedFolder: (config: WatchedFolderConfig) => Promise; + removeWatchedFolder: (folderPath: string) => Promise; + getWatchedFolders: () => Promise; + getWatcherStatus: () => Promise<{ path: string; active: boolean; watching: boolean }[]>; + onFileChanged: (callback: (data: FolderSyncFileChangedEvent) => void) => () => void; + onWatcherReady: (callback: (data: FolderSyncWatcherReadyEvent) => void) => () => void; + pauseWatcher: () => Promise; + resumeWatcher: () => Promise; } declare global { From b93da843dc6125434d712ad8881bf248906782d1 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 11:55:29 +0530 Subject: [PATCH 19/61] feat: implement mtime tracking and synchronization for folder watcher --- .../src/modules/folder-watcher.ts | 172 +++++++++++++++++- 1 file changed, 164 insertions(+), 8 deletions(-) diff --git a/surfsense_desktop/src/modules/folder-watcher.ts b/surfsense_desktop/src/modules/folder-watcher.ts index bfd2136c9..072ae7b3f 100644 --- a/surfsense_desktop/src/modules/folder-watcher.ts +++ b/surfsense_desktop/src/modules/folder-watcher.ts @@ -1,5 +1,5 @@ import { BrowserWindow, dialog } from 'electron'; -import chokidar from 'chokidar'; +import chokidar, { type FSWatcher } from 'chokidar'; import * as path from 'path'; import * as fs from 'fs'; import { IPC_CHANNELS } from '../ipc/channels'; @@ -16,13 +16,24 @@ export interface WatchedFolderConfig { interface WatcherEntry { config: WatchedFolderConfig; - watcher: chokidar.FSWatcher | null; + watcher: FSWatcher | null; } +type MtimeMap = Record; + const STORE_KEY = 'watchedFolders'; +const MTIME_TOLERANCE_S = 1.0; + let store: any = null; +let mtimeStore: any = null; let watchers: Map = new Map(); +/** + * In-memory cache of mtime maps, keyed by folder path. + * Persisted to electron-store on mutation. + */ +const mtimeMaps: Map = new Map(); + async function getStore() { if (!store) { const { default: Store } = await import('electron-store'); @@ -36,6 +47,73 @@ async function getStore() { return store; } +async function getMtimeStore() { + if (!mtimeStore) { + const { default: Store } = await import('electron-store'); + mtimeStore = new Store({ + name: 'folder-mtime-maps', + defaults: {} as Record, + }); + } + return mtimeStore; +} + +function loadMtimeMap(folderPath: string): MtimeMap { + return mtimeMaps.get(folderPath) ?? {}; +} + +function persistMtimeMap(folderPath: string) { + const map = mtimeMaps.get(folderPath) ?? {}; + getMtimeStore().then((s) => s.set(folderPath, map)); +} + +function walkFolderMtimes(config: WatchedFolderConfig): MtimeMap { + const root = config.path; + const result: MtimeMap = {}; + const excludes = new Set(config.excludePatterns); + + function walk(dir: string) { + let entries: fs.Dirent[]; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + + for (const entry of entries) { + const name = entry.name; + + // Skip dotfiles/dotdirs and excluded names + if (name.startsWith('.') || excludes.has(name)) continue; + + const full = path.join(dir, name); + + if (entry.isDirectory()) { + walk(full); + } else if (entry.isFile()) { + if ( + config.fileExtensions && + config.fileExtensions.length > 0 + ) { + const ext = path.extname(name).toLowerCase(); + if (!config.fileExtensions.includes(ext)) continue; + } + + try { + const stat = fs.statSync(full); + const rel = path.relative(root, full); + result[rel] = stat.mtimeMs; + } catch { + // File may have been removed between readdir and stat + } + } + } + } + + walk(root); + return result; +} + function getMainWindow(): BrowserWindow | null { const windows = BrowserWindow.getAllWindows(); return windows.length > 0 ? windows[0] : null; @@ -48,11 +126,16 @@ function sendToRenderer(channel: string, data: any) { } } -function startWatcher(config: WatchedFolderConfig) { +async function startWatcher(config: WatchedFolderConfig) { if (watchers.has(config.path)) { return; } + // Load persisted mtime map into memory before starting the watcher + const ms = await getMtimeStore(); + const storedMap: MtimeMap = ms.get(config.path) ?? {}; + mtimeMaps.set(config.path, { ...storedMap }); + const ignored = [ /(^|[/\\])\../, // dotfiles by default ...config.excludePatterns.map((p) => `**/${p}/**`), @@ -60,7 +143,7 @@ function startWatcher(config: WatchedFolderConfig) { const watcher = chokidar.watch(config.path, { persistent: true, - ignoreInitial: false, + ignoreInitial: true, awaitWriteFinish: { stabilityThreshold: 500, pollInterval: 100, @@ -72,6 +155,58 @@ function startWatcher(config: WatchedFolderConfig) { watcher.on('ready', () => { ready = true; + + // Detect offline changes by diffing current filesystem against stored mtime map + const currentMap = walkFolderMtimes(config); + const storedSnapshot = loadMtimeMap(config.path); + const now = Date.now(); + + for (const [rel, currentMtime] of Object.entries(currentMap)) { + const storedMtime = storedSnapshot[rel]; + if (storedMtime === undefined) { + // New file added while app was closed + sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, { + connectorId: config.connectorId, + searchSpaceId: config.searchSpaceId, + folderPath: config.path, + relativePath: rel, + fullPath: path.join(config.path, rel), + action: 'add', + timestamp: now, + }); + } else if (Math.abs(currentMtime - storedMtime) >= MTIME_TOLERANCE_S * 1000) { + // File modified while app was closed + sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, { + connectorId: config.connectorId, + searchSpaceId: config.searchSpaceId, + folderPath: config.path, + relativePath: rel, + fullPath: path.join(config.path, rel), + action: 'change', + timestamp: now, + }); + } + } + + for (const rel of Object.keys(storedSnapshot)) { + if (!(rel in currentMap)) { + // File deleted while app was closed + sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, { + connectorId: config.connectorId, + searchSpaceId: config.searchSpaceId, + folderPath: config.path, + relativePath: rel, + fullPath: path.join(config.path, rel), + action: 'unlink', + timestamp: now, + }); + } + } + + // Replace stored map with current filesystem state + mtimeMaps.set(config.path, currentMap); + persistMtimeMap(config.path); + sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_WATCHER_READY, { connectorId: config.connectorId, folderPath: config.path, @@ -91,6 +226,21 @@ function startWatcher(config: WatchedFolderConfig) { if (!config.fileExtensions.includes(ext)) return; } + // Keep mtime map in sync with live changes + const map = mtimeMaps.get(config.path); + if (map) { + if (action === 'unlink') { + delete map[relativePath]; + } else { + try { + map[relativePath] = fs.statSync(filePath).mtimeMs; + } catch { + // File may have been removed between event and stat + } + } + persistMtimeMap(config.path); + } + sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, { connectorId: config.connectorId, searchSpaceId: config.searchSpaceId, @@ -110,6 +260,7 @@ function startWatcher(config: WatchedFolderConfig) { } function stopWatcher(folderPath: string) { + persistMtimeMap(folderPath); const entry = watchers.get(folderPath); if (entry?.watcher) { entry.watcher.close(); @@ -144,7 +295,7 @@ export async function addWatchedFolder( s.set(STORE_KEY, folders); if (config.active) { - startWatcher(config); + await startWatcher(config); } return folders; @@ -160,6 +311,11 @@ export async function removeWatchedFolder( stopWatcher(folderPath); + // Clean up persisted mtime map for this folder + mtimeMaps.delete(folderPath); + const ms = await getMtimeStore(); + ms.delete(folderPath); + return updated; } @@ -190,9 +346,9 @@ export async function pauseWatcher(): Promise { } export async function resumeWatcher(): Promise { - for (const [folderPath, entry] of watchers) { + for (const [, entry] of watchers) { if (!entry.watcher && entry.config.active) { - startWatcher(entry.config); + await startWatcher(entry.config); } } } @@ -203,7 +359,7 @@ export async function registerFolderWatcher(): Promise { for (const config of folders) { if (config.active && fs.existsSync(config.path)) { - startWatcher(config); + await startWatcher(config); } } } From 543b8b9376eb4fd89698deffdfde0f6d9e69a2e8 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 12:08:18 +0530 Subject: [PATCH 20/61] feat: add real-time folder watcher registration and unregistration for Local Folder connector --- .../hooks/use-connector-dialog.ts | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts index 6543bbd72..2404b8eb5 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts @@ -586,6 +586,23 @@ export const useConnectorDialog = () => { }, }); + // Register folder watcher in Electron for real-time sync + if ( + currentConnectorType === EnumConnectorName.LOCAL_FOLDER_CONNECTOR && + window.electronAPI?.addWatchedFolder + ) { + const cfg = connector.config || {}; + await window.electronAPI.addWatchedFolder({ + path: cfg.folder_path as string, + name: cfg.folder_name as string, + excludePatterns: (cfg.exclude_patterns as string[]) || [], + fileExtensions: (cfg.file_extensions as string[] | null) ?? null, + connectorId: connector.id, + searchSpaceId: Number(searchSpaceId), + active: true, + }); + } + const successMessage = currentConnectorType === "MCP_CONNECTOR" ? `${connector.name} added successfully` @@ -1190,6 +1207,17 @@ export const useConnectorDialog = () => { id: editingConnector.id, }); + // Unregister folder watcher in Electron when removing a Local Folder connector + if ( + editingConnector.connector_type === EnumConnectorName.LOCAL_FOLDER_CONNECTOR && + window.electronAPI?.removeWatchedFolder && + editingConnector.config?.folder_path + ) { + await window.electronAPI.removeWatchedFolder( + editingConnector.config.folder_path as string + ); + } + // Track connector deleted event trackConnectorDeleted( Number(searchSpaceId), From 8e58094a861a1517e6587a8e8f55182c3cffed7b Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 12:13:14 +0530 Subject: [PATCH 21/61] refactor: update permission checks in document and connector routes to use specific permission values --- surfsense_backend/app/routes/documents_routes.py | 6 +++--- .../app/routes/search_source_connectors_routes.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index 9271d4630..2d999eae3 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -1156,7 +1156,7 @@ async def list_document_versions( if not document: raise HTTPException(status_code=404, detail="Document not found") - await check_permission(session, user, document.search_space_id, Permission.READ) + await check_permission(session, user, document.search_space_id, Permission.DOCUMENTS_READ.value) versions = ( await session.execute( @@ -1191,7 +1191,7 @@ async def get_document_version( if not document: raise HTTPException(status_code=404, detail="Document not found") - await check_permission(session, user, document.search_space_id, Permission.READ) + await check_permission(session, user, document.search_space_id, Permission.DOCUMENTS_READ.value) version = ( await session.execute( @@ -1229,7 +1229,7 @@ async def restore_document_version( if not document: raise HTTPException(status_code=404, detail="Document not found") - await check_permission(session, user, document.search_space_id, Permission.WRITE) + await check_permission(session, user, document.search_space_id, Permission.DOCUMENTS_UPDATE.value) version = ( await session.execute( diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 439d83ac1..5ea88c418 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -1355,7 +1355,7 @@ async def index_single_file( if not connector: raise HTTPException(status_code=404, detail="Local folder connector not found") - await check_permission(session, user, connector.search_space_id, Permission.WRITE) + await check_permission(session, user, connector.search_space_id, Permission.CONNECTORS_UPDATE.value) folder_path = connector.config.get("folder_path", "") From 40ade4889e23b18ed43eaaa39a0489483fa5bbdc Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 21:01:31 +0530 Subject: [PATCH 22/61] feat: add LOCAL_FOLDER_FILE document type and update document_versions table management --- ...d_local_folder_connector_and_versioning.py | 33 +++++++------------ 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/surfsense_backend/alembic/versions/117_add_local_folder_connector_and_versioning.py b/surfsense_backend/alembic/versions/117_add_local_folder_connector_and_versioning.py index e97a4787c..a9da3beb4 100644 --- a/surfsense_backend/alembic/versions/117_add_local_folder_connector_and_versioning.py +++ b/surfsense_backend/alembic/versions/117_add_local_folder_connector_and_versioning.py @@ -1,4 +1,4 @@ -"""Add local folder connector enums and document_versions table +"""Add LOCAL_FOLDER_FILE document type and document_versions table Revision ID: 117 Revises: 116 @@ -21,23 +21,6 @@ PUBLICATION_NAME = "zero_publication" def upgrade() -> None: conn = op.get_bind() - # Add LOCAL_FOLDER_CONNECTOR to searchsourceconnectortype enum - op.execute( - """ - DO $$ - BEGIN - IF NOT EXISTS ( - SELECT 1 FROM pg_type t - JOIN pg_enum e ON t.oid = e.enumtypid - WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'LOCAL_FOLDER_CONNECTOR' - ) THEN - ALTER TYPE searchsourceconnectortype ADD VALUE 'LOCAL_FOLDER_CONNECTOR'; - END IF; - END - $$; - """ - ) - # Add LOCAL_FOLDER_FILE to documenttype enum op.execute( """ @@ -126,9 +109,17 @@ def downgrade() -> None: {"name": PUBLICATION_NAME}, ).fetchone() if pub_exists: - op.execute( - f"ALTER PUBLICATION {PUBLICATION_NAME} DROP TABLE IF EXISTS document_versions" - ) + already_in_pub = conn.execute( + sa.text( + "SELECT 1 FROM pg_publication_tables " + "WHERE pubname = :name AND tablename = 'document_versions'" + ), + {"name": PUBLICATION_NAME}, + ).fetchone() + if already_in_pub: + op.execute( + f"ALTER PUBLICATION {PUBLICATION_NAME} DROP TABLE document_versions" + ) op.execute("DROP INDEX IF EXISTS ix_document_versions_created_at") op.execute("DROP INDEX IF EXISTS ix_document_versions_document_id") From 1ef0d913e7471c7df6b03b94647064bae76abb39 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 22:19:55 +0530 Subject: [PATCH 23/61] refactor: remove Local Folder connector components and related configurations from the UI --- .../(manage)/components/DocumentsFilters.tsx | 43 ++- .../components/local-folder-connect-form.tsx | 272 ------------------ .../connect-forms/connector-benefits.ts | 8 - .../connector-popup/connect-forms/index.tsx | 3 - .../components/local-folder-config.tsx | 163 ----------- .../connector-configs/index.tsx | 3 - .../views/connector-connect-view.tsx | 1 - .../views/connector-edit-view.tsx | 7 +- .../views/indexing-configuration-view.tsx | 7 +- 9 files changed, 35 insertions(+), 472 deletions(-) delete mode 100644 surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/local-folder-connect-form.tsx delete mode 100644 surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/local-folder-config.tsx diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx index b85af13b7..fcd3a39da 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx @@ -1,6 +1,6 @@ "use client"; -import { FolderPlus, ListFilter, Search, Upload, X } from "lucide-react"; +import { Eye, FolderPlus, ListFilter, Search, Upload, X } from "lucide-react"; import { useTranslations } from "next-intl"; import React, { useCallback, useMemo, useRef, useState } from "react"; import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup"; @@ -19,6 +19,7 @@ export function DocumentsFilters({ onToggleType, activeTypes, onCreateFolder, + onWatchFolder, }: { typeCounts: Partial>; onSearch: (v: string) => void; @@ -26,6 +27,7 @@ export function DocumentsFilters({ onToggleType: (type: DocumentTypeEnum, checked: boolean) => void; activeTypes: DocumentTypeEnum[]; onCreateFolder?: () => void; + onWatchFolder?: () => void; }) { const t = useTranslations("documents"); const id = React.useId(); @@ -214,17 +216,34 @@ export function DocumentsFilters({ )} - {/* Upload Button */} - + {/* Watch Folder Button (desktop only) */} + {onWatchFolder && ( + + + + + Watch folder + + )} + + {/* Upload Button */} +
); diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/local-folder-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/local-folder-connect-form.tsx deleted file mode 100644 index 2e893c1c0..000000000 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/local-folder-connect-form.tsx +++ /dev/null @@ -1,272 +0,0 @@ -"use client"; - -import { zodResolver } from "@hookform/resolvers/zod"; -import { FolderSync, Info } from "lucide-react"; -import type { FC } from "react"; -import { useRef } from "react"; -import { useForm } from "react-hook-form"; -import * as z from "zod"; -import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert"; -import { Button } from "@/components/ui/button"; -import { - Form, - FormControl, - FormDescription, - FormField, - FormItem, - FormLabel, - FormMessage, -} from "@/components/ui/form"; -import { Input } from "@/components/ui/input"; -import { EnumConnectorName } from "@/contracts/enums/connector"; -import { getConnectorBenefits } from "../connector-benefits"; -import type { ConnectFormProps } from "../index"; - -const localFolderFormSchema = z.object({ - name: z.string().min(3, { - message: "Connector name must be at least 3 characters.", - }), - folder_path: z.string().min(1, { - message: "Folder path is required.", - }), - folder_name: z.string().min(1, { - message: "Folder name is required.", - }), - exclude_patterns: z.string().optional(), - file_extensions: z.string().optional(), -}); - -type LocalFolderFormValues = z.infer; - -export const LocalFolderConnectForm: FC = ({ onSubmit, isSubmitting }) => { - const isSubmittingRef = useRef(false); - const isElectron = typeof window !== "undefined" && !!window.electronAPI; - - const form = useForm({ - resolver: zodResolver(localFolderFormSchema), - defaultValues: { - name: "Local Folder", - folder_path: "", - folder_name: "", - exclude_patterns: "node_modules,.git,.DS_Store", - file_extensions: "", - }, - }); - - const handleBrowse = async () => { - if (!isElectron) return; - const selected = await window.electronAPI!.selectFolder(); - if (selected) { - form.setValue("folder_path", selected); - const folderName = selected.split(/[\\/]/).pop() || "folder"; - if (!form.getValues("folder_name")) { - form.setValue("folder_name", folderName); - } - if (form.getValues("name") === "Local Folder") { - form.setValue("name", folderName); - } - } - }; - - const handleSubmit = async (values: LocalFolderFormValues) => { - if (isSubmittingRef.current || isSubmitting) return; - isSubmittingRef.current = true; - - try { - const excludePatterns = values.exclude_patterns - ? values.exclude_patterns - .split(",") - .map((p) => p.trim()) - .filter(Boolean) - : []; - - const fileExtensions = values.file_extensions - ? values.file_extensions - .split(",") - .map((e) => { - const ext = e.trim(); - return ext.startsWith(".") ? ext : `.${ext}`; - }) - .filter(Boolean) - : null; - - await onSubmit({ - name: values.name, - connector_type: EnumConnectorName.LOCAL_FOLDER_CONNECTOR, - config: { - folder_path: values.folder_path, - folder_name: values.folder_name, - exclude_patterns: excludePatterns, - file_extensions: fileExtensions, - }, - is_indexable: true, - is_active: true, - last_indexed_at: null, - periodic_indexing_enabled: false, - indexing_frequency_minutes: null, - next_scheduled_at: null, - }); - } finally { - isSubmittingRef.current = false; - } - }; - - return ( -
- - - Desktop App Required - - Real-time file watching is powered by the SurfSense desktop app. Files are - automatically synced whenever changes are detected. - - - -
-
- - ( - - Connector Name - - - - - - )} - /> - - ( - - Folder Path -
- - - - {isElectron && ( - - )} -
- - The absolute path to the folder to watch and sync. - - -
- )} - /> - - ( - - Display Name - - - - - A friendly name shown in the documents sidebar. - - - - )} - /> - - ( - - Exclude Patterns - - - - - Comma-separated patterns of directories/files to exclude. - - - - )} - /> - - ( - - File Extensions (optional) - - - - - Leave empty to index all supported files, or specify comma-separated extensions. - - - - )} - /> - - - -
- - {getConnectorBenefits(EnumConnectorName.LOCAL_FOLDER_CONNECTOR) && ( -
-

- What you get with Local Folder sync: -

-
    - {getConnectorBenefits(EnumConnectorName.LOCAL_FOLDER_CONNECTOR)?.map( - (benefit) =>
  • {benefit}
  • - )} -
-
- )} -
- ); -}; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts index 40c6a7fdd..0dc093100 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts @@ -111,14 +111,6 @@ export function getConnectorBenefits(connectorType: string): string[] | null { "Incremental sync - only changed files are re-indexed", "Full support for your vault's folder structure", ], - LOCAL_FOLDER_CONNECTOR: [ - "Watch local folders for real-time changes via the desktop app", - "Automatic change detection — only modified files are re-indexed", - "Version history with up to 20 snapshots per document", - "Mirrors your folder structure in the SurfSense sidebar", - "Supports any text-based file format", - "Works as a periodic sync fallback when the desktop app is not running", - ], }; return benefits[connectorType] || null; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx index 116893399..b6d813748 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx @@ -7,7 +7,6 @@ import { GithubConnectForm } from "./components/github-connect-form"; import { LinkupApiConnectForm } from "./components/linkup-api-connect-form"; import { LumaConnectForm } from "./components/luma-connect-form"; import { MCPConnectForm } from "./components/mcp-connect-form"; -import { LocalFolderConnectForm } from "./components/local-folder-connect-form"; import { ObsidianConnectForm } from "./components/obsidian-connect-form"; import { TavilyApiConnectForm } from "./components/tavily-api-connect-form"; @@ -59,8 +58,6 @@ export function getConnectFormComponent(connectorType: string): ConnectFormCompo return MCPConnectForm; case "OBSIDIAN_CONNECTOR": return ObsidianConnectForm; - case "LOCAL_FOLDER_CONNECTOR": - return LocalFolderConnectForm; default: return null; } diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/local-folder-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/local-folder-config.tsx deleted file mode 100644 index cb4295079..000000000 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/local-folder-config.tsx +++ /dev/null @@ -1,163 +0,0 @@ -"use client"; - -import type { FC } from "react"; -import { useState } from "react"; -import { FolderSync } from "lucide-react"; -import { Button } from "@/components/ui/button"; -import { Input } from "@/components/ui/input"; -import { Label } from "@/components/ui/label"; -import type { ConnectorConfigProps } from "../index"; - -export const LocalFolderConfig: FC = ({ - connector, - onConfigChange, - onNameChange, -}) => { - const isElectron = typeof window !== "undefined" && !!window.electronAPI; - - const [folderPath, setFolderPath] = useState( - (connector.config?.folder_path as string) || "" - ); - const [folderName, setFolderName] = useState( - (connector.config?.folder_name as string) || "" - ); - const [excludePatterns, setExcludePatterns] = useState(() => { - const patterns = connector.config?.exclude_patterns; - if (Array.isArray(patterns)) { - return patterns.join(", "); - } - return (patterns as string) || "node_modules, .git, .DS_Store"; - }); - const [fileExtensions, setFileExtensions] = useState(() => { - const exts = connector.config?.file_extensions; - if (Array.isArray(exts)) { - return exts.join(", "); - } - return (exts as string) || ""; - }); - const [name, setName] = useState(connector.name || ""); - - const handleFolderPathChange = (value: string) => { - setFolderPath(value); - onConfigChange?.({ ...connector.config, folder_path: value }); - }; - - const handleFolderNameChange = (value: string) => { - setFolderName(value); - onConfigChange?.({ ...connector.config, folder_name: value }); - }; - - const handleExcludePatternsChange = (value: string) => { - setExcludePatterns(value); - const arr = value - .split(",") - .map((p) => p.trim()) - .filter(Boolean); - onConfigChange?.({ ...connector.config, exclude_patterns: arr }); - }; - - const handleFileExtensionsChange = (value: string) => { - setFileExtensions(value); - const arr = value - ? value - .split(",") - .map((e) => { - const ext = e.trim(); - return ext.startsWith(".") ? ext : `.${ext}`; - }) - .filter(Boolean) - : null; - onConfigChange?.({ ...connector.config, file_extensions: arr }); - }; - - const handleNameChange = (value: string) => { - setName(value); - onNameChange?.(value); - }; - - const handleBrowse = async () => { - if (!isElectron) return; - const selected = await window.electronAPI!.selectFolder(); - if (selected) { - handleFolderPathChange(selected); - const autoName = selected.split(/[\\/]/).pop() || "folder"; - if (!folderName) handleFolderNameChange(autoName); - } - }; - - return ( -
-
-
- - handleNameChange(e.target.value)} - placeholder="Local Folder" - className="border-slate-400/20 focus-visible:border-slate-400/40" - /> -
-
- -
-

Folder Configuration

- -
-
- -
- handleFolderPathChange(e.target.value)} - placeholder="/path/to/your/folder" - className="border-slate-400/20 focus-visible:border-slate-400/40 font-mono flex-1" - /> - {isElectron && ( - - )} -
-
- -
- - handleFolderNameChange(e.target.value)} - placeholder="My Notes" - className="border-slate-400/20 focus-visible:border-slate-400/40" - /> -
- -
- - handleExcludePatternsChange(e.target.value)} - placeholder="node_modules, .git, .DS_Store" - className="border-slate-400/20 focus-visible:border-slate-400/40 font-mono" - /> -

- Comma-separated patterns of directories/files to exclude. -

-
- -
- - handleFileExtensionsChange(e.target.value)} - placeholder=".md, .txt, .rst" - className="border-slate-400/20 focus-visible:border-slate-400/40 font-mono" - /> -

- Leave empty to index all supported files. -

-
-
-
-
- ); -}; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx index 3dc1891c8..a63435260 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx @@ -19,7 +19,6 @@ import { JiraConfig } from "./components/jira-config"; import { LinkupApiConfig } from "./components/linkup-api-config"; import { LumaConfig } from "./components/luma-config"; import { MCPConfig } from "./components/mcp-config"; -import { LocalFolderConfig } from "./components/local-folder-config"; import { ObsidianConfig } from "./components/obsidian-config"; import { OneDriveConfig } from "./components/onedrive-config"; import { SlackConfig } from "./components/slack-config"; @@ -83,8 +82,6 @@ export function getConnectorConfigComponent( return MCPConfig; case "OBSIDIAN_CONNECTOR": return ObsidianConfig; - case "LOCAL_FOLDER_CONNECTOR": - return LocalFolderConfig; case "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": return ComposioDriveConfig; case "COMPOSIO_GMAIL_CONNECTOR": diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx index 0b6d0917a..596b98e93 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx @@ -20,7 +20,6 @@ const FORM_ID_MAP: Record = { CIRCLEBACK_CONNECTOR: "circleback-connect-form", MCP_CONNECTOR: "mcp-connect-form", OBSIDIAN_CONNECTOR: "obsidian-connect-form", - LOCAL_FOLDER_CONNECTOR: "local-folder-connect-form", }; interface ConnectorConnectViewProps { diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx index dcedb4743..05d42adcb 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx @@ -278,8 +278,7 @@ export const ConnectorEditView: FC = ({ connector.connector_type !== "DROPBOX_CONNECTOR" && connector.connector_type !== "ONEDRIVE_CONNECTOR" && connector.connector_type !== "WEBCRAWLER_CONNECTOR" && - connector.connector_type !== "GITHUB_CONNECTOR" && - connector.connector_type !== "LOCAL_FOLDER_CONNECTOR" && ( + connector.connector_type !== "GITHUB_CONNECTOR" && ( = ({ /> )} - {/* Periodic sync - shown for all indexable connectors except Local Folder */} - {connector.connector_type !== "LOCAL_FOLDER_CONNECTOR" && - (() => { + {(() => { const isGoogleDrive = connector.connector_type === "GOOGLE_DRIVE_CONNECTOR"; const isComposioGoogleDrive = connector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx index 436ce7843..e583cbe17 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx @@ -164,8 +164,7 @@ export const IndexingConfigurationView: FC = ({ config.connectorType !== "DROPBOX_CONNECTOR" && config.connectorType !== "ONEDRIVE_CONNECTOR" && config.connectorType !== "WEBCRAWLER_CONNECTOR" && - config.connectorType !== "GITHUB_CONNECTOR" && - config.connectorType !== "LOCAL_FOLDER_CONNECTOR" && ( + config.connectorType !== "GITHUB_CONNECTOR" && ( = ({ /> )} - {/* Periodic sync - not shown for file-based connectors (Drive, Dropbox, OneDrive) or Local Folder in initial setup; configured in edit view instead */} {config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" && config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" && config.connectorType !== "DROPBOX_CONNECTOR" && - config.connectorType !== "ONEDRIVE_CONNECTOR" && - config.connectorType !== "LOCAL_FOLDER_CONNECTOR" && ( + config.connectorType !== "ONEDRIVE_CONNECTOR" && ( Date: Thu, 2 Apr 2026 22:20:11 +0530 Subject: [PATCH 24/61] feat: add renderer readiness signaling and update IPC channels for folder sync --- surfsense_desktop/src/ipc/channels.ts | 1 + surfsense_desktop/src/ipc/handlers.ts | 5 ++ .../src/modules/folder-watcher.ts | 61 +++++++++++++------ surfsense_desktop/src/preload.ts | 1 + 4 files changed, 48 insertions(+), 20 deletions(-) diff --git a/surfsense_desktop/src/ipc/channels.ts b/surfsense_desktop/src/ipc/channels.ts index 362d3362d..66788d90e 100644 --- a/surfsense_desktop/src/ipc/channels.ts +++ b/surfsense_desktop/src/ipc/channels.ts @@ -16,4 +16,5 @@ export const IPC_CHANNELS = { FOLDER_SYNC_WATCHER_READY: 'folder-sync:watcher-ready', FOLDER_SYNC_PAUSE: 'folder-sync:pause', FOLDER_SYNC_RESUME: 'folder-sync:resume', + FOLDER_SYNC_RENDERER_READY: 'folder-sync:renderer-ready', } as const; diff --git a/surfsense_desktop/src/ipc/handlers.ts b/surfsense_desktop/src/ipc/handlers.ts index 2baf957b0..19051e871 100644 --- a/surfsense_desktop/src/ipc/handlers.ts +++ b/surfsense_desktop/src/ipc/handlers.ts @@ -8,6 +8,7 @@ import { getWatcherStatus, pauseWatcher, resumeWatcher, + markRendererReady, } from '../modules/folder-watcher'; export function registerIpcHandlers(): void { @@ -44,4 +45,8 @@ export function registerIpcHandlers(): void { ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_PAUSE, () => pauseWatcher()); ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_RESUME, () => resumeWatcher()); + + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_RENDERER_READY, () => { + markRendererReady(); + }); } diff --git a/surfsense_desktop/src/modules/folder-watcher.ts b/surfsense_desktop/src/modules/folder-watcher.ts index 072ae7b3f..81a835c22 100644 --- a/surfsense_desktop/src/modules/folder-watcher.ts +++ b/surfsense_desktop/src/modules/folder-watcher.ts @@ -9,7 +9,7 @@ export interface WatchedFolderConfig { name: string; excludePatterns: string[]; fileExtensions: string[] | null; - connectorId: number; + rootFolderId: number | null; searchSpaceId: number; active: boolean; } @@ -34,6 +34,25 @@ let watchers: Map = new Map(); */ const mtimeMaps: Map = new Map(); +let rendererReady = false; +const pendingEvents: any[] = []; + +export function markRendererReady() { + rendererReady = true; + for (const event of pendingEvents) { + sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, event); + } + pendingEvents.length = 0; +} + +function sendFileChangedEvent(data: any) { + if (rendererReady) { + sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, data); + } else { + pendingEvents.push(data); + } +} + async function getStore() { if (!store) { const { default: Store } = await import('electron-store'); @@ -83,7 +102,6 @@ function walkFolderMtimes(config: WatchedFolderConfig): MtimeMap { for (const entry of entries) { const name = entry.name; - // Skip dotfiles/dotdirs and excluded names if (name.startsWith('.') || excludes.has(name)) continue; const full = path.join(dir, name); @@ -131,7 +149,6 @@ async function startWatcher(config: WatchedFolderConfig) { return; } - // Load persisted mtime map into memory before starting the watcher const ms = await getMtimeStore(); const storedMap: MtimeMap = ms.get(config.path) ?? {}; mtimeMaps.set(config.path, { ...storedMap }); @@ -156,45 +173,49 @@ async function startWatcher(config: WatchedFolderConfig) { watcher.on('ready', () => { ready = true; - // Detect offline changes by diffing current filesystem against stored mtime map const currentMap = walkFolderMtimes(config); const storedSnapshot = loadMtimeMap(config.path); const now = Date.now(); + // Track which files are unchanged so we can selectively update the mtime map + const unchangedMap: MtimeMap = {}; + for (const [rel, currentMtime] of Object.entries(currentMap)) { const storedMtime = storedSnapshot[rel]; if (storedMtime === undefined) { - // New file added while app was closed - sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, { - connectorId: config.connectorId, + sendFileChangedEvent({ + rootFolderId: config.rootFolderId, searchSpaceId: config.searchSpaceId, folderPath: config.path, + folderName: config.name, relativePath: rel, fullPath: path.join(config.path, rel), action: 'add', timestamp: now, }); } else if (Math.abs(currentMtime - storedMtime) >= MTIME_TOLERANCE_S * 1000) { - // File modified while app was closed - sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, { - connectorId: config.connectorId, + sendFileChangedEvent({ + rootFolderId: config.rootFolderId, searchSpaceId: config.searchSpaceId, folderPath: config.path, + folderName: config.name, relativePath: rel, fullPath: path.join(config.path, rel), action: 'change', timestamp: now, }); + } else { + unchangedMap[rel] = currentMtime; } } for (const rel of Object.keys(storedSnapshot)) { if (!(rel in currentMap)) { - // File deleted while app was closed - sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, { - connectorId: config.connectorId, + sendFileChangedEvent({ + rootFolderId: config.rootFolderId, searchSpaceId: config.searchSpaceId, folderPath: config.path, + folderName: config.name, relativePath: rel, fullPath: path.join(config.path, rel), action: 'unlink', @@ -203,12 +224,13 @@ async function startWatcher(config: WatchedFolderConfig) { } } - // Replace stored map with current filesystem state - mtimeMaps.set(config.path, currentMap); + // Only update the mtime map for unchanged files; changed files keep their + // stored mtime so they'll be re-detected if the app crashes before indexing. + mtimeMaps.set(config.path, unchangedMap); persistMtimeMap(config.path); sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_WATCHER_READY, { - connectorId: config.connectorId, + rootFolderId: config.rootFolderId, folderPath: config.path, }); }); @@ -226,7 +248,6 @@ async function startWatcher(config: WatchedFolderConfig) { if (!config.fileExtensions.includes(ext)) return; } - // Keep mtime map in sync with live changes const map = mtimeMaps.get(config.path); if (map) { if (action === 'unlink') { @@ -241,10 +262,11 @@ async function startWatcher(config: WatchedFolderConfig) { persistMtimeMap(config.path); } - sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, { - connectorId: config.connectorId, + sendFileChangedEvent({ + rootFolderId: config.rootFolderId, searchSpaceId: config.searchSpaceId, folderPath: config.path, + folderName: config.name, relativePath, fullPath: filePath, action, @@ -311,7 +333,6 @@ export async function removeWatchedFolder( stopWatcher(folderPath); - // Clean up persisted mtime map for this folder mtimeMaps.delete(folderPath); const ms = await getMtimeStore(); ms.delete(folderPath); diff --git a/surfsense_desktop/src/preload.ts b/surfsense_desktop/src/preload.ts index 8f65aa633..7c190db10 100644 --- a/surfsense_desktop/src/preload.ts +++ b/surfsense_desktop/src/preload.ts @@ -44,4 +44,5 @@ contextBridge.exposeInMainWorld('electronAPI', { }, pauseWatcher: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_PAUSE), resumeWatcher: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_RESUME), + signalRendererReady: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_RENDERER_READY), }); From 493d720b891cf6ef478223d2645ba9e4b9504ab6 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 22:21:01 +0530 Subject: [PATCH 25/61] refactor: remove Local Folder connector references and enhance folder management features --- .../constants/connector-constants.ts | 8 - .../hooks/use-connector-dialog.ts | 28 --- .../utils/connector-document-mapping.ts | 1 - .../components/documents/FolderNode.tsx | 139 ++++++++----- .../components/documents/FolderTreeView.tsx | 9 + .../components/editor-panel/editor-panel.tsx | 18 +- .../layout/ui/sidebar/DocumentsSidebar.tsx | 186 ++++++++++++++---- 7 files changed, 257 insertions(+), 132 deletions(-) diff --git a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts index 3f7d90cd8..2e92f637b 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts @@ -184,14 +184,6 @@ export const OTHER_CONNECTORS = [ connectorType: EnumConnectorName.OBSIDIAN_CONNECTOR, selfHostedOnly: true, }, - { - id: "local-folder-connector", - title: "Local Folder", - description: "Watch and sync local folders (desktop only)", - connectorType: EnumConnectorName.LOCAL_FOLDER_CONNECTOR, - selfHostedOnly: true, - desktopOnly: true, - }, ] as const; // Composio Connectors - Individual entries for each supported toolkit diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts index 2404b8eb5..6543bbd72 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts @@ -586,23 +586,6 @@ export const useConnectorDialog = () => { }, }); - // Register folder watcher in Electron for real-time sync - if ( - currentConnectorType === EnumConnectorName.LOCAL_FOLDER_CONNECTOR && - window.electronAPI?.addWatchedFolder - ) { - const cfg = connector.config || {}; - await window.electronAPI.addWatchedFolder({ - path: cfg.folder_path as string, - name: cfg.folder_name as string, - excludePatterns: (cfg.exclude_patterns as string[]) || [], - fileExtensions: (cfg.file_extensions as string[] | null) ?? null, - connectorId: connector.id, - searchSpaceId: Number(searchSpaceId), - active: true, - }); - } - const successMessage = currentConnectorType === "MCP_CONNECTOR" ? `${connector.name} added successfully` @@ -1207,17 +1190,6 @@ export const useConnectorDialog = () => { id: editingConnector.id, }); - // Unregister folder watcher in Electron when removing a Local Folder connector - if ( - editingConnector.connector_type === EnumConnectorName.LOCAL_FOLDER_CONNECTOR && - window.electronAPI?.removeWatchedFolder && - editingConnector.config?.folder_path - ) { - await window.electronAPI.removeWatchedFolder( - editingConnector.config.folder_path as string - ); - } - // Track connector deleted event trackConnectorDeleted( Number(searchSpaceId), diff --git a/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts b/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts index dd5978002..f924bb15f 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts @@ -29,7 +29,6 @@ export const CONNECTOR_TO_DOCUMENT_TYPE: Record = { BOOKSTACK_CONNECTOR: "BOOKSTACK_CONNECTOR", CIRCLEBACK_CONNECTOR: "CIRCLEBACK", OBSIDIAN_CONNECTOR: "OBSIDIAN_CONNECTOR", - LOCAL_FOLDER_CONNECTOR: "LOCAL_FOLDER_FILE", // Special mappings (connector type differs from document type) GOOGLE_DRIVE_CONNECTOR: "GOOGLE_DRIVE_FILE", diff --git a/surfsense_web/components/documents/FolderNode.tsx b/surfsense_web/components/documents/FolderNode.tsx index 6a36f724f..1521c06fe 100644 --- a/surfsense_web/components/documents/FolderNode.tsx +++ b/surfsense_web/components/documents/FolderNode.tsx @@ -3,12 +3,15 @@ import { ChevronDown, ChevronRight, + Eye, + EyeOff, Folder, FolderOpen, FolderPlus, MoreHorizontal, Move, PenLine, + RefreshCw, Trash2, } from "lucide-react"; import React, { useCallback, useEffect, useRef, useState } from "react"; @@ -70,6 +73,9 @@ interface FolderNodeProps { disabledDropIds?: Set; contextMenuOpen?: boolean; onContextMenuOpenChange?: (open: boolean) => void; + isWatched?: boolean; + onRescan?: (folder: FolderDisplay) => void; + onStopWatching?: (folder: FolderDisplay) => void; } function getDropZone( @@ -107,6 +113,9 @@ export const FolderNode = React.memo(function FolderNode({ disabledDropIds, contextMenuOpen, onContextMenuOpenChange, + isWatched, + onRescan, + onStopWatching, }: FolderNodeProps) { const [renameValue, setRenameValue] = useState(folder.name); const inputRef = useRef(null); @@ -307,73 +316,107 @@ export const FolderNode = React.memo(function FolderNode({ - + + {isWatched && onRescan && ( { e.stopPropagation(); - onCreateSubfolder(folder.id); + onRescan(folder); }} > - - New subfolder + + Re-scan + )} + {isWatched && onStopWatching && ( { e.stopPropagation(); - startRename(); + onStopWatching(folder); }} > - - Rename + + Stop watching - { - e.stopPropagation(); - onMove(folder); - }} - > - - Move to... - - { - e.stopPropagation(); - onDelete(folder); - }} - > - - Delete - - + )} + { + e.stopPropagation(); + onCreateSubfolder(folder.id); + }} + > + + New subfolder + + { + e.stopPropagation(); + startRename(); + }} + > + + Rename + + { + e.stopPropagation(); + onMove(folder); + }} + > + + Move to... + + { + e.stopPropagation(); + onDelete(folder); + }} + > + + Delete + + )}
- {!isRenaming && contextMenuOpen && ( - - onCreateSubfolder(folder.id)}> - - New subfolder + {!isRenaming && contextMenuOpen && ( + + {isWatched && onRescan && ( + onRescan(folder)}> + + Re-scan - startRename()}> - - Rename + )} + {isWatched && onStopWatching && ( + onStopWatching(folder)}> + + Stop watching - onMove(folder)}> - - Move to... - - onDelete(folder)} - > - - Delete - - - )} + )} + onCreateSubfolder(folder.id)}> + + New subfolder + + startRename()}> + + Rename + + onMove(folder)}> + + Move to... + + onDelete(folder)} + > + + Delete + + + )} ); }); diff --git a/surfsense_web/components/documents/FolderTreeView.tsx b/surfsense_web/components/documents/FolderTreeView.tsx index 7695923e3..5945edccb 100644 --- a/surfsense_web/components/documents/FolderTreeView.tsx +++ b/surfsense_web/components/documents/FolderTreeView.tsx @@ -40,6 +40,9 @@ interface FolderTreeViewProps { targetFolderId: number | null ) => void; onReorderFolder?: (folderId: number, beforePos: string | null, afterPos: string | null) => void; + watchedFolderIds?: Set; + onRescanFolder?: (folder: FolderDisplay) => void; + onStopWatchingFolder?: (folder: FolderDisplay) => void; } function groupBy(items: T[], keyFn: (item: T) => string | number): Record { @@ -73,6 +76,9 @@ export function FolderTreeView({ searchQuery, onDropIntoFolder, onReorderFolder, + watchedFolderIds, + onRescanFolder, + onStopWatchingFolder, }: FolderTreeViewProps) { const foldersByParent = useMemo(() => groupBy(folders, (f) => f.parentId ?? "root"), [folders]); @@ -204,6 +210,9 @@ export function FolderTreeView({ siblingPositions={siblingPositions} contextMenuOpen={openContextMenuId === `folder-${f.id}`} onContextMenuOpenChange={(open) => setOpenContextMenuId(open ? `folder-${f.id}` : null)} + isWatched={watchedFolderIds?.has(f.id)} + onRescan={onRescanFolder} + onStopWatching={onStopWatchingFolder} /> ); diff --git a/surfsense_web/components/editor-panel/editor-panel.tsx b/surfsense_web/components/editor-panel/editor-panel.tsx index 802a5ffc3..a1195ef33 100644 --- a/surfsense_web/components/editor-panel/editor-panel.tsx +++ b/surfsense_web/components/editor-panel/editor-panel.tsx @@ -6,6 +6,7 @@ import dynamic from "next/dynamic"; import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; import { closeEditorPanelAtom, editorPanelAtom } from "@/atoms/editor/editor-panel.atom"; +import { VersionHistoryButton } from "@/components/documents/version-history"; import { MarkdownViewer } from "@/components/markdown-viewer"; import { Button } from "@/components/ui/button"; import { Drawer, DrawerContent, DrawerHandle, DrawerTitle } from "@/components/ui/drawer"; @@ -180,12 +181,16 @@ export function EditorPanelContent({ return ( <>
-
-

{displayTitle}

- {isEditableType && editedMarkdown !== null && ( -

Unsaved changes

- )} -
+
+

{displayTitle}

+ {isEditableType && editedMarkdown !== null && ( +

Unsaved changes

+ )} +
+
+ {editorDoc?.document_type && ( + + )} {onClose && ( )}
+
{isLoading ? ( diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index d880524bd..202d170d9 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -40,6 +40,7 @@ import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; import type { DocumentTypeEnum } from "@/contracts/types/document.types"; import { useDebouncedValue } from "@/hooks/use-debounced-value"; import { useMediaQuery } from "@/hooks/use-media-query"; +import { documentsApiService } from "@/lib/apis/documents-api.service"; import { foldersApiService } from "@/lib/apis/folders-api.service"; import { authenticatedFetch } from "@/lib/auth-utils"; import { queries } from "@/zero/queries/index"; @@ -92,6 +93,24 @@ export function DocumentsSidebar({ const [search, setSearch] = useState(""); const debouncedSearch = useDebouncedValue(search, 250); const [activeTypes, setActiveTypes] = useState([]); + const [watchedFolderIds, setWatchedFolderIds] = useState>(new Set()); + + useEffect(() => { + const api = typeof window !== "undefined" ? window.electronAPI : null; + if (!api?.getWatchedFolders) return; + + async function loadWatchedIds() { + const folders = await api!.getWatchedFolders(); + const ids = new Set( + folders + .filter((f) => f.rootFolderId != null) + .map((f) => f.rootFolderId as number) + ); + setWatchedFolderIds(ids); + } + + loadWatchedIds(); + }, []); const { mutateAsync: deleteDocumentMutation } = useAtomValue(deleteDocumentMutationAtom); const [sidebarDocs, setSidebarDocs] = useAtom(sidebarSelectedDocumentsAtom); @@ -223,6 +242,87 @@ export function DocumentsSidebar({ [createFolderParentId, searchSpaceId, setExpandedFolderMap] ); + const isElectron = typeof window !== "undefined" && !!window.electronAPI; + + const handleWatchFolder = useCallback(async () => { + const api = window.electronAPI; + if (!api) return; + + const folderPath = await api.selectFolder(); + if (!folderPath) return; + + const folderName = folderPath.split("/").pop() || folderPath.split("\\").pop() || folderPath; + + try { + const result = await documentsApiService.folderIndex(searchSpaceId, { + folder_path: folderPath, + folder_name: folderName, + search_space_id: searchSpaceId, + }); + + const rootFolderId = (result as { root_folder_id?: number })?.root_folder_id ?? null; + + await api.addWatchedFolder({ + path: folderPath, + name: folderName, + excludePatterns: [".git", "node_modules", "__pycache__", ".DS_Store", ".obsidian", ".trash"], + fileExtensions: null, + rootFolderId, + searchSpaceId, + active: true, + }); + + toast.success(`Watching folder: ${folderName}`); + } catch (err) { + toast.error((err as Error)?.message || "Failed to watch folder"); + } + }, [searchSpaceId]); + + const handleRescanFolder = useCallback( + async (folder: FolderDisplay) => { + const api = window.electronAPI; + if (!api) return; + + const watchedFolders = await api.getWatchedFolders(); + const matched = watchedFolders.find((wf) => wf.rootFolderId === folder.id); + if (!matched) { + toast.error("This folder is not being watched"); + return; + } + + try { + await documentsApiService.folderIndex(searchSpaceId, { + folder_path: matched.path, + folder_name: matched.name, + search_space_id: searchSpaceId, + root_folder_id: folder.id, + }); + toast.success(`Re-scanning folder: ${matched.name}`); + } catch (err) { + toast.error((err as Error)?.message || "Failed to re-scan folder"); + } + }, + [searchSpaceId] + ); + + const handleStopWatching = useCallback( + async (folder: FolderDisplay) => { + const api = window.electronAPI; + if (!api) return; + + const watchedFolders = await api.getWatchedFolders(); + const matched = watchedFolders.find((wf) => wf.rootFolderId === folder.id); + if (!matched) { + toast.error("This folder is not being watched"); + return; + } + + await api.removeWatchedFolder(matched.path); + toast.success(`Stopped watching: ${matched.name}`); + }, + [] + ); + const handleRenameFolder = useCallback(async (folder: FolderDisplay, newName: string) => { try { await foldersApiService.updateFolder(folder.id, { name: newName }); @@ -641,14 +741,15 @@ export function DocumentsSidebar({
- handleCreateFolder(null)} - /> + handleCreateFolder(null)} + onWatchFolder={isElectron ? handleWatchFolder : undefined} + />
{deletableSelectedIds.length > 0 && ( @@ -666,39 +767,42 @@ export function DocumentsSidebar({ )} { - openEditorPanel({ - documentId: doc.id, - searchSpaceId, - title: doc.title, - }); - }} - onEditDocument={(doc) => { - openEditorPanel({ - documentId: doc.id, - searchSpaceId, - title: doc.title, - }); - }} - onDeleteDocument={(doc) => handleDeleteDocument(doc.id)} - onMoveDocument={handleMoveDocument} - onExportDocument={handleExportDocument} - activeTypes={activeTypes} - onDropIntoFolder={handleDropIntoFolder} - onReorderFolder={handleReorderFolder} - /> + folders={treeFolders} + documents={searchFilteredDocuments} + expandedIds={expandedIds} + onToggleExpand={toggleFolderExpand} + mentionedDocIds={mentionedDocIds} + onToggleChatMention={handleToggleChatMention} + onToggleFolderSelect={handleToggleFolderSelect} + onRenameFolder={handleRenameFolder} + onDeleteFolder={handleDeleteFolder} + onMoveFolder={handleMoveFolder} + onCreateFolder={handleCreateFolder} + searchQuery={debouncedSearch.trim() || undefined} + onPreviewDocument={(doc) => { + openEditorPanel({ + documentId: doc.id, + searchSpaceId, + title: doc.title, + }); + }} + onEditDocument={(doc) => { + openEditorPanel({ + documentId: doc.id, + searchSpaceId, + title: doc.title, + }); + }} + onDeleteDocument={(doc) => handleDeleteDocument(doc.id)} + onMoveDocument={handleMoveDocument} + onExportDocument={handleExportDocument} + activeTypes={activeTypes} + onDropIntoFolder={handleDropIntoFolder} + onReorderFolder={handleReorderFolder} + watchedFolderIds={watchedFolderIds} + onRescanFolder={handleRescanFolder} + onStopWatchingFolder={handleStopWatching} + />
Date: Thu, 2 Apr 2026 22:21:16 +0530 Subject: [PATCH 26/61] refactor: completely remove Local Folder connector references and update folder sync logic --- surfsense_web/contracts/enums/connector.ts | 1 - .../contracts/enums/connectorIcons.tsx | 3 - .../contracts/types/connector.types.ts | 1 - surfsense_web/hooks/use-folder-sync.ts | 62 ++++++++++++++----- .../lib/apis/connectors-api.service.ts | 11 ---- .../lib/apis/documents-api.service.ts | 8 +++ surfsense_web/lib/connectors/utils.ts | 1 - surfsense_web/types/window.d.ts | 8 ++- 8 files changed, 60 insertions(+), 35 deletions(-) diff --git a/surfsense_web/contracts/enums/connector.ts b/surfsense_web/contracts/enums/connector.ts index ecf96d88e..501f5d9a3 100644 --- a/surfsense_web/contracts/enums/connector.ts +++ b/surfsense_web/contracts/enums/connector.ts @@ -25,7 +25,6 @@ export enum EnumConnectorName { YOUTUBE_CONNECTOR = "YOUTUBE_CONNECTOR", CIRCLEBACK_CONNECTOR = "CIRCLEBACK_CONNECTOR", OBSIDIAN_CONNECTOR = "OBSIDIAN_CONNECTOR", - LOCAL_FOLDER_CONNECTOR = "LOCAL_FOLDER_CONNECTOR", DROPBOX_CONNECTOR = "DROPBOX_CONNECTOR", MCP_CONNECTOR = "MCP_CONNECTOR", COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR", diff --git a/surfsense_web/contracts/enums/connectorIcons.tsx b/surfsense_web/contracts/enums/connectorIcons.tsx index f7378b74b..2e609b060 100644 --- a/surfsense_web/contracts/enums/connectorIcons.tsx +++ b/surfsense_web/contracts/enums/connectorIcons.tsx @@ -3,7 +3,6 @@ import { BookOpen, File, FileText, - FolderSync, Globe, Microscope, Search, @@ -76,8 +75,6 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas return Circleback; case EnumConnectorName.MCP_CONNECTOR: return MCP; - case EnumConnectorName.LOCAL_FOLDER_CONNECTOR: - return ; case EnumConnectorName.OBSIDIAN_CONNECTOR: return Obsidian; case EnumConnectorName.COMPOSIO_GOOGLE_DRIVE_CONNECTOR: diff --git a/surfsense_web/contracts/types/connector.types.ts b/surfsense_web/contracts/types/connector.types.ts index 269941375..b83e05dcc 100644 --- a/surfsense_web/contracts/types/connector.types.ts +++ b/surfsense_web/contracts/types/connector.types.ts @@ -30,7 +30,6 @@ export const searchSourceConnectorTypeEnum = z.enum([ "DROPBOX_CONNECTOR", "MCP_CONNECTOR", "OBSIDIAN_CONNECTOR", - "LOCAL_FOLDER_CONNECTOR", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR", "COMPOSIO_GMAIL_CONNECTOR", "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR", diff --git a/surfsense_web/hooks/use-folder-sync.ts b/surfsense_web/hooks/use-folder-sync.ts index a35faf98f..fcfb2814e 100644 --- a/surfsense_web/hooks/use-folder-sync.ts +++ b/surfsense_web/hooks/use-folder-sync.ts @@ -1,41 +1,73 @@ "use client"; import { useEffect, useRef } from "react"; -import { connectorsApiService } from "@/lib/apis/connectors-api.service"; +import { documentsApiService } from "@/lib/apis/documents-api.service"; + +interface FileChangedEvent { + rootFolderId: number | null; + searchSpaceId: number; + folderPath: string; + folderName: string; + relativePath: string; + fullPath: string; + action: string; + timestamp: number; +} const DEBOUNCE_MS = 2000; export function useFolderSync() { - const pendingRef = useRef>>(new Map()); + const queueRef = useRef([]); + const processingRef = useRef(false); + const debounceTimers = useRef>>(new Map()); + + async function processQueue() { + if (processingRef.current) return; + processingRef.current = true; + while (queueRef.current.length > 0) { + const event = queueRef.current.shift()!; + try { + await documentsApiService.folderIndexFile(event.searchSpaceId, { + folder_path: event.folderPath, + folder_name: event.folderName, + search_space_id: event.searchSpaceId, + target_file_path: event.fullPath, + }); + } catch (err) { + console.error("[FolderSync] Failed to trigger re-index:", err); + } + } + processingRef.current = false; + } useEffect(() => { const api = typeof window !== "undefined" ? window.electronAPI : null; if (!api?.onFileChanged) return; - const cleanup = api.onFileChanged((event) => { - const key = `${event.connectorId}:${event.fullPath}`; + // Signal to main process that the renderer is ready to receive events + api.signalRendererReady?.(); - const existing = pendingRef.current.get(key); + const cleanup = api.onFileChanged((event: FileChangedEvent) => { + const key = `${event.folderPath}:${event.fullPath}`; + + const existing = debounceTimers.current.get(key); if (existing) clearTimeout(existing); - const timeout = setTimeout(async () => { - pendingRef.current.delete(key); - try { - await connectorsApiService.indexFile(event.connectorId, event.fullPath); - } catch (err) { - console.error("[FolderSync] Failed to trigger re-index:", err); - } + const timeout = setTimeout(() => { + debounceTimers.current.delete(key); + queueRef.current.push(event); + processQueue(); }, DEBOUNCE_MS); - pendingRef.current.set(key, timeout); + debounceTimers.current.set(key, timeout); }); return () => { cleanup(); - for (const timeout of pendingRef.current.values()) { + for (const timeout of debounceTimers.current.values()) { clearTimeout(timeout); } - pendingRef.current.clear(); + debounceTimers.current.clear(); }; }, []); } diff --git a/surfsense_web/lib/apis/connectors-api.service.ts b/surfsense_web/lib/apis/connectors-api.service.ts index f2722df70..7b94b3746 100644 --- a/surfsense_web/lib/apis/connectors-api.service.ts +++ b/surfsense_web/lib/apis/connectors-api.service.ts @@ -405,17 +405,6 @@ class ConnectorsApiService { ); }; - // ============================================================================= - // Local Folder Connector Methods - // ============================================================================= - - indexFile = async (connectorId: number, filePath: string) => { - return baseApiService.post( - `/api/v1/search-source-connectors/${connectorId}/index-file`, - undefined, - { body: { file_path: filePath } } - ); - }; } export type { SlackChannel, DiscordChannel }; diff --git a/surfsense_web/lib/apis/documents-api.service.ts b/surfsense_web/lib/apis/documents-api.service.ts index d4a80f8a0..c77cd6848 100644 --- a/surfsense_web/lib/apis/documents-api.service.ts +++ b/surfsense_web/lib/apis/documents-api.service.ts @@ -395,6 +395,14 @@ class DocumentsApiService { ); }; + folderIndex = async (searchSpaceId: number, body: { folder_path: string; folder_name: string; search_space_id: number; exclude_patterns?: string[]; file_extensions?: string[]; root_folder_id?: number; enable_summary?: boolean }) => { + return baseApiService.post(`/api/v1/documents/folder-index`, undefined, { body }); + }; + + folderIndexFile = async (searchSpaceId: number, body: { folder_path: string; folder_name: string; search_space_id: number; target_file_path: string; enable_summary?: boolean }) => { + return baseApiService.post(`/api/v1/documents/folder-index-file`, undefined, { body }); + }; + /** * Delete a document */ diff --git a/surfsense_web/lib/connectors/utils.ts b/surfsense_web/lib/connectors/utils.ts index 6ce78be67..90f7f5d21 100644 --- a/surfsense_web/lib/connectors/utils.ts +++ b/surfsense_web/lib/connectors/utils.ts @@ -30,7 +30,6 @@ export const getConnectorTypeDisplay = (type: string): string => { YOUTUBE_CONNECTOR: "YouTube", CIRCLEBACK_CONNECTOR: "Circleback", OBSIDIAN_CONNECTOR: "Obsidian", - LOCAL_FOLDER_CONNECTOR: "Local Folder", DROPBOX_CONNECTOR: "Dropbox", MCP_CONNECTOR: "MCP Server", }; diff --git a/surfsense_web/types/window.d.ts b/surfsense_web/types/window.d.ts index 921449b41..b399664d6 100644 --- a/surfsense_web/types/window.d.ts +++ b/surfsense_web/types/window.d.ts @@ -5,15 +5,16 @@ interface WatchedFolderConfig { name: string; excludePatterns: string[]; fileExtensions: string[] | null; - connectorId: number; + rootFolderId: number | null; searchSpaceId: number; active: boolean; } interface FolderSyncFileChangedEvent { - connectorId: number; + rootFolderId: number | null; searchSpaceId: number; folderPath: string; + folderName: string; relativePath: string; fullPath: string; action: "add" | "change" | "unlink"; @@ -21,7 +22,7 @@ interface FolderSyncFileChangedEvent { } interface FolderSyncWatcherReadyEvent { - connectorId: number; + rootFolderId: number | null; folderPath: string; } @@ -49,6 +50,7 @@ interface ElectronAPI { onWatcherReady: (callback: (data: FolderSyncWatcherReadyEvent) => void) => () => void; pauseWatcher: () => Promise; resumeWatcher: () => Promise; + signalRendererReady: () => Promise; } declare global { From 22ee5c99cc9a656a3c5f0afae9c100874144e1b6 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 22:21:31 +0530 Subject: [PATCH 27/61] refactor: remove Local Folder connector and related tasks, implement new folder indexing endpoints --- surfsense_backend/app/db.py | 1 - .../app/routes/documents_routes.py | 143 ++++++++++++++++ .../routes/search_source_connectors_routes.py | 144 ---------------- .../app/tasks/celery_tasks/connector_tasks.py | 46 ------ .../app/tasks/celery_tasks/document_tasks.py | 66 ++++++++ .../app/tasks/connector_indexers/__init__.py | 2 - .../local_folder_indexer.py | 155 ++++++------------ .../tests/integration/conftest.py | 19 --- .../test_local_folder_pipeline.py | 126 +++++++------- 9 files changed, 326 insertions(+), 376 deletions(-) diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 25045e84a..1a4d3ea06 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -110,7 +110,6 @@ class SearchSourceConnectorType(StrEnum): COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR" COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR" - LOCAL_FOLDER_CONNECTOR = "LOCAL_FOLDER_CONNECTOR" class PodcastStatus(StrEnum): diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index 2d999eae3..d7974f9ff 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -2,6 +2,7 @@ import asyncio from fastapi import APIRouter, Depends, Form, HTTPException, UploadFile +from pydantic import BaseModel as PydanticBaseModel from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from sqlalchemy.orm import selectinload @@ -11,6 +12,7 @@ from app.db import ( Document, DocumentType, DocumentVersion, + Folder, Permission, SearchSpace, SearchSpaceMembership, @@ -1258,3 +1260,144 @@ async def restore_document_version( "document_id": document_id, "restored_version": version_number, } + + +# ===== Local folder indexing endpoints ===== + + +class FolderIndexRequest(PydanticBaseModel): + folder_path: str + folder_name: str + search_space_id: int + exclude_patterns: list[str] | None = None + file_extensions: list[str] | None = None + root_folder_id: int | None = None + enable_summary: bool = False + + +class FolderIndexFileRequest(PydanticBaseModel): + folder_path: str + folder_name: str + search_space_id: int + target_file_path: str + enable_summary: bool = False + + +@router.post("/documents/folder-index") +async def folder_index( + request: FolderIndexRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Full-scan index of a local folder. Creates the root Folder row synchronously + and dispatches the heavy indexing work to a Celery task. + Returns the root_folder_id so the desktop can persist it. + """ + from app.config import config as app_config + + if not app_config.is_self_hosted(): + raise HTTPException( + status_code=400, + detail="Local folder indexing is only available in self-hosted mode", + ) + + await check_permission( + session, + user, + request.search_space_id, + Permission.DOCUMENTS_CREATE.value, + "You don't have permission to create documents in this search space", + ) + + root_folder_id = request.root_folder_id + if root_folder_id: + existing = ( + await session.execute( + select(Folder).where(Folder.id == root_folder_id) + ) + ).scalar_one_or_none() + if not existing: + root_folder_id = None + + if not root_folder_id: + root_folder = Folder( + name=request.folder_name, + search_space_id=request.search_space_id, + created_by_id=str(user.id), + position="a0", + ) + session.add(root_folder) + await session.flush() + root_folder_id = root_folder.id + await session.commit() + + from app.tasks.celery_tasks.document_tasks import index_local_folder_task + + index_local_folder_task.delay( + search_space_id=request.search_space_id, + user_id=str(user.id), + folder_path=request.folder_path, + folder_name=request.folder_name, + exclude_patterns=request.exclude_patterns, + file_extensions=request.file_extensions, + root_folder_id=root_folder_id, + enable_summary=request.enable_summary, + ) + + return { + "message": "Folder indexing started", + "status": "processing", + "root_folder_id": root_folder_id, + } + + +@router.post("/documents/folder-index-file") +async def folder_index_file( + request: FolderIndexFileRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Index a single file within a watched folder (chokidar trigger). + Validates that target_file_path is under folder_path. + """ + from app.config import config as app_config + + if not app_config.is_self_hosted(): + raise HTTPException( + status_code=400, + detail="Local folder indexing is only available in self-hosted mode", + ) + + await check_permission( + session, + user, + request.search_space_id, + Permission.DOCUMENTS_CREATE.value, + "You don't have permission to create documents in this search space", + ) + + from pathlib import Path + + try: + Path(request.target_file_path).relative_to(request.folder_path) + except ValueError: + raise HTTPException( + status_code=400, + detail="target_file_path must be inside folder_path", + ) + + from app.tasks.celery_tasks.document_tasks import index_local_folder_task + + index_local_folder_task.delay( + search_space_id=request.search_space_id, + user_id=str(user.id), + folder_path=request.folder_path, + folder_name=request.folder_name, + target_file_path=request.target_file_path, + enable_summary=request.enable_summary, + ) + + return { + "message": "File indexing started", + "status": "processing", + } diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 5ea88c418..f49ba2d5d 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -1170,24 +1170,6 @@ async def index_connector_content( ) response_message = "Obsidian vault indexing started in the background." - elif connector.connector_type == SearchSourceConnectorType.LOCAL_FOLDER_CONNECTOR: - from app.config import config as app_config - from app.tasks.celery_tasks.connector_tasks import index_local_folder_task - - if not app_config.is_self_hosted(): - raise HTTPException( - status_code=400, - detail="Local folder connector is only available in self-hosted mode", - ) - - logger.info( - f"Triggering local folder indexing for connector {connector_id} into search space {search_space_id}" - ) - index_local_folder_task.delay( - connector_id, search_space_id, str(user.id), indexing_from, indexing_to - ) - response_message = "Local folder indexing started in the background." - elif ( connector.connector_type == SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR @@ -1320,76 +1302,6 @@ async def index_connector_content( ) from e -class IndexFileRequest(BaseModel): - file_path: str = Field(..., description="Absolute path to the file to index") - - -@router.post( - "/search-source-connectors/{connector_id}/index-file", - response_model=dict[str, Any], -) -async def index_single_file( - connector_id: int, - body: IndexFileRequest, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """Index a single file from a local folder connector (chokidar real-time trigger).""" - from app.config import config as app_config - from app.tasks.celery_tasks.connector_tasks import index_local_folder_task - - if not app_config.is_self_hosted(): - raise HTTPException( - status_code=400, - detail="Local folder connector is only available in self-hosted mode", - ) - - result = await session.execute( - select(SearchSourceConnector).filter( - SearchSourceConnector.id == connector_id, - SearchSourceConnector.connector_type - == SearchSourceConnectorType.LOCAL_FOLDER_CONNECTOR, - ) - ) - connector = result.scalars().first() - if not connector: - raise HTTPException(status_code=404, detail="Local folder connector not found") - - await check_permission(session, user, connector.search_space_id, Permission.CONNECTORS_UPDATE.value) - - folder_path = connector.config.get("folder_path", "") - - # Security: resolve symlinks and verify the file is inside folder_path - try: - resolved_file = os.path.realpath(body.file_path) - resolved_folder = os.path.realpath(folder_path) - if not resolved_file.startswith(resolved_folder + os.sep) and resolved_file != resolved_folder: - raise HTTPException( - status_code=403, - detail="File path is outside the configured folder", - ) - except (OSError, ValueError): - raise HTTPException( - status_code=403, - detail="Invalid file path", - ) - - index_local_folder_task.delay( - connector_id, - connector.search_space_id, - str(user.id), - None, - None, - target_file_path=resolved_file, - ) - - return { - "message": "Single file indexing started", - "connector_id": connector_id, - "file_path": body.file_path, - } - - async def _update_connector_timestamp_by_id(session: AsyncSession, connector_id: int): """ Update the last_indexed_at timestamp for a connector by its ID. @@ -3166,62 +3078,6 @@ async def run_obsidian_indexing( ) -async def run_local_folder_indexing_with_new_session( - connector_id: int, - search_space_id: int, - user_id: str, - start_date: str, - end_date: str, - target_file_path: str | None = None, -): - """Wrapper to run local folder indexing with its own database session.""" - logger.info( - f"Background task started: Indexing local folder connector {connector_id} into space {search_space_id}" - ) - async with async_session_maker() as session: - await run_local_folder_indexing( - session, connector_id, search_space_id, user_id, start_date, end_date, - target_file_path=target_file_path, - ) - logger.info(f"Background task finished: Indexing local folder connector {connector_id}") - - -async def run_local_folder_indexing( - session: AsyncSession, - connector_id: int, - search_space_id: int, - user_id: str, - start_date: str, - end_date: str, - target_file_path: str | None = None, -): - """Background task to run local folder indexing.""" - from app.tasks.connector_indexers import index_local_folder - - await _run_indexing_with_notifications( - session=session, - connector_id=connector_id, - search_space_id=search_space_id, - user_id=user_id, - start_date=start_date, - end_date=end_date, - indexing_function=lambda session, connector_id, search_space_id, user_id, - start_date, end_date, update_last_indexed, on_heartbeat_callback: index_local_folder( - session=session, - connector_id=connector_id, - search_space_id=search_space_id, - user_id=user_id, - start_date=start_date, - end_date=end_date, - update_last_indexed=update_last_indexed, - on_heartbeat_callback=on_heartbeat_callback, - target_file_path=target_file_path, - ), - update_timestamp_func=_update_connector_timestamp_by_id, - supports_heartbeat_callback=True, - ) - - async def run_composio_indexing_with_new_session( connector_id: int, search_space_id: int, diff --git a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py index 9ff578ad2..57475c9fd 100644 --- a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py @@ -926,52 +926,6 @@ async def _index_obsidian_vault( ) -@celery_app.task(name="index_local_folder", bind=True) -def index_local_folder_task( - self, - connector_id: int, - search_space_id: int, - user_id: str, - start_date: str = None, - end_date: str = None, - target_file_path: str = None, -): - """Celery task to index a local folder.""" - import asyncio - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - try: - loop.run_until_complete( - _index_local_folder( - connector_id, search_space_id, user_id, start_date, end_date, target_file_path - ) - ) - finally: - loop.close() - - -async def _index_local_folder( - connector_id: int, - search_space_id: int, - user_id: str, - start_date: str = None, - end_date: str = None, - target_file_path: str = None, -): - """Index local folder with new session.""" - from app.routes.search_source_connectors_routes import ( - run_local_folder_indexing, - ) - - async with get_celery_session_maker()() as session: - await run_local_folder_indexing( - session, connector_id, search_space_id, user_id, start_date, end_date, - target_file_path=target_file_path, - ) - - @celery_app.task(name="index_composio_connector", bind=True) def index_composio_connector_task( self, diff --git a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py index 662b41f2a..110f3deee 100644 --- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py @@ -10,6 +10,7 @@ from app.config import config from app.services.notification_service import NotificationService from app.services.task_logging_service import TaskLoggingService from app.tasks.celery_tasks import get_celery_session_maker +from app.tasks.connector_indexers.local_folder_indexer import index_local_folder from app.tasks.document_processors import ( add_extension_received_document, add_youtube_video_document, @@ -1243,3 +1244,68 @@ async def _process_circleback_meeting( heartbeat_task.cancel() if notification: _stop_heartbeat(notification.id) + + +# ===== Local folder indexing task ===== + + +@celery_app.task(name="index_local_folder", bind=True) +def index_local_folder_task( + self, + search_space_id: int, + user_id: str, + folder_path: str, + folder_name: str, + exclude_patterns: list[str] | None = None, + file_extensions: list[str] | None = None, + root_folder_id: int | None = None, + enable_summary: bool = False, + target_file_path: str | None = None, +): + """Celery task to index a local folder. Config is passed directly — no connector row.""" + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + try: + loop.run_until_complete( + _index_local_folder_async( + search_space_id=search_space_id, + user_id=user_id, + folder_path=folder_path, + folder_name=folder_name, + exclude_patterns=exclude_patterns, + file_extensions=file_extensions, + root_folder_id=root_folder_id, + enable_summary=enable_summary, + target_file_path=target_file_path, + ) + ) + finally: + loop.close() + + +async def _index_local_folder_async( + search_space_id: int, + user_id: str, + folder_path: str, + folder_name: str, + exclude_patterns: list[str] | None = None, + file_extensions: list[str] | None = None, + root_folder_id: int | None = None, + enable_summary: bool = False, + target_file_path: str | None = None, +): + """Run local folder indexing with a fresh DB session.""" + async with get_celery_session_maker()() as session: + await index_local_folder( + session=session, + search_space_id=search_space_id, + user_id=user_id, + folder_path=folder_path, + folder_name=folder_name, + exclude_patterns=exclude_patterns, + file_extensions=file_extensions, + root_folder_id=root_folder_id, + enable_summary=enable_summary, + target_file_path=target_file_path, + ) diff --git a/surfsense_backend/app/tasks/connector_indexers/__init__.py b/surfsense_backend/app/tasks/connector_indexers/__init__.py index 8e4ad69e5..1b032d54a 100644 --- a/surfsense_backend/app/tasks/connector_indexers/__init__.py +++ b/surfsense_backend/app/tasks/connector_indexers/__init__.py @@ -44,7 +44,6 @@ from .jira_indexer import index_jira_issues from .linear_indexer import index_linear_issues # Documentation and knowledge management -from .local_folder_indexer import index_local_folder from .luma_indexer import index_luma_events from .notion_indexer import index_notion_pages from .obsidian_indexer import index_obsidian_vault @@ -75,5 +74,4 @@ __all__ = [ # noqa: RUF022 # Communication platforms "index_slack_messages", "index_google_gmail_messages", - "index_local_folder", ] diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index fc7fdaf66..591914625 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -1,5 +1,5 @@ """ -Local folder connector indexer. +Local folder indexer. Indexes files from a local folder on disk. Supports: - Full-scan mode (startup reconciliation / manual trigger) @@ -8,7 +8,9 @@ Indexes files from a local folder on disk. Supports: - Document versioning via create_version_snapshot - ETL-based file parsing for binary formats (PDF, DOCX, images, audio, etc.) -Electron-only: all change detection is driven by chokidar in the desktop app. +Desktop-only: all change detection is driven by chokidar in the desktop app. +Config (folder_path, exclude_patterns, etc.) is passed in from the caller — +no connector row is read. """ import os @@ -17,10 +19,9 @@ from collections.abc import Awaitable, Callable from datetime import UTC, datetime from pathlib import Path -from sqlalchemy import delete, select +from sqlalchemy import select from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm.attributes import flag_modified from app.config import config from app.db import ( @@ -28,7 +29,6 @@ from app.db import ( DocumentStatus, DocumentType, Folder, - SearchSourceConnectorType, ) from app.services.llm_service import get_user_long_context_llm from app.services.task_logging_service import TaskLoggingService @@ -45,11 +45,9 @@ from .base import ( build_document_metadata_string, check_document_by_unique_identifier, check_duplicate_document_by_hash, - get_connector_by_id, get_current_timestamp, logger, safe_set_chunks, - update_connector_last_indexed, ) PLAINTEXT_EXTENSIONS = frozenset({ @@ -131,12 +129,10 @@ def scan_folder( for dirpath, dirnames, filenames in os.walk(root): rel_dir = Path(dirpath).relative_to(root) - # Prune excluded directories in-place so os.walk skips them dirnames[:] = [ d for d in dirnames if d not in exclude_patterns ] - # Check if the current directory itself is excluded if any(part in exclude_patterns for part in rel_dir.parts): continue @@ -232,20 +228,18 @@ async def _mirror_folder_structure( folder_name: str, search_space_id: int, user_id: str, - connector_config: dict, - connector, + root_folder_id: int | None = None, exclude_patterns: list[str] | None = None, -) -> dict[str, int]: +) -> tuple[dict[str, int], int]: """Mirror the local filesystem directory structure into DB Folder rows. - Returns a mapping of relative_dir_path -> folder_id. - The empty string key ("") maps to the root folder. + Returns (mapping, root_folder_id) where mapping is + relative_dir_path -> folder_id. The empty string key maps to the root folder. """ root = Path(folder_path) if exclude_patterns is None: exclude_patterns = [] - # Collect all subdirectory paths relative to root subdirs: list[str] = [] for dirpath, dirnames, _ in os.walk(root): dirnames[:] = [d for d in dirnames if d not in exclude_patterns] @@ -256,13 +250,10 @@ async def _mirror_folder_structure( if rel_str: subdirs.append(rel_str) - # Sort by depth so parents are created before children subdirs.sort(key=lambda p: p.count(os.sep)) mapping: dict[str, int] = {} - # Get or create root folder - root_folder_id = connector_config.get("root_folder_id") if root_folder_id: existing = ( await session.execute( @@ -284,12 +275,8 @@ async def _mirror_folder_structure( session.add(root_folder) await session.flush() mapping[""] = root_folder.id - # Persist root_folder_id into connector config - connector_config["root_folder_id"] = root_folder.id - connector.config = {**connector.config, "root_folder_id": root_folder.id} - flag_modified(connector, "config") + root_folder_id = root_folder.id - # Create/reuse subdirectory Folder rows for rel_dir in subdirs: dir_parts = Path(rel_dir).parts dir_name = dir_parts[-1] @@ -322,7 +309,7 @@ async def _mirror_folder_structure( mapping[rel_dir] = new_folder.id await session.flush() - return mapping + return mapping, root_folder_id async def _cleanup_empty_folders( @@ -332,16 +319,11 @@ async def _cleanup_empty_folders( existing_dirs_on_disk: set[str], folder_mapping: dict[str, int], ) -> None: - """Delete Folder rows that are empty (no docs, no children) and no longer on disk. + """Delete Folder rows that are empty (no docs, no children) and no longer on disk.""" + from sqlalchemy import delete as sa_delete - Queries ALL folders under this search space (not just the current mapping) - so that stale folders from previous syncs are also cleaned up. - """ - # Build a reverse mapping from folder_id → rel_dir for known dirs id_to_rel: dict[int, str] = {fid: rel for rel, fid in folder_mapping.items() if rel} - # Also find any folders in the DB that are children of the root but NOT - # in the current mapping (stale from a previous sync). all_folders = ( await session.execute( select(Folder).where( @@ -351,7 +333,6 @@ async def _cleanup_empty_folders( ) ).scalars().all() - # Build candidates: folders not on disk that we might delete candidates: list[Folder] = [] for folder in all_folders: rel = id_to_rel.get(folder.id) @@ -359,8 +340,6 @@ async def _cleanup_empty_folders( continue candidates.append(folder) - # Sort deepest first (by name depth heuristic — folders with no children first) - # Repeat until no more deletions happen (cascading empty parents) changed = True while changed: changed = False @@ -384,57 +363,46 @@ async def _cleanup_empty_folders( remaining.append(folder) continue - await session.execute(delete(Folder).where(Folder.id == folder.id)) + await session.execute(sa_delete(Folder).where(Folder.id == folder.id)) changed = True candidates = remaining async def index_local_folder( session: AsyncSession, - connector_id: int, search_space_id: int, user_id: str, - start_date: str | None = None, - end_date: str | None = None, - update_last_indexed: bool = True, - on_heartbeat_callback: HeartbeatCallbackType | None = None, + folder_path: str, + folder_name: str, + exclude_patterns: list[str] | None = None, + file_extensions: list[str] | None = None, + root_folder_id: int | None = None, + enable_summary: bool = False, target_file_path: str | None = None, -) -> tuple[int, int, str | None]: + on_heartbeat_callback: HeartbeatCallbackType | None = None, +) -> tuple[int, int, int | None, str | None]: """Index files from a local folder. Supports two modes: - Full scan (target_file_path=None): walks entire folder, handles new/changed/deleted files. - Single-file (target_file_path set): processes only that file. - Returns (indexed_count, skipped_count, error_or_warning_message). + Returns (indexed_count, skipped_count, root_folder_id, error_or_warning_message). """ task_logger = TaskLoggingService(session, search_space_id) log_entry = await task_logger.log_task_start( task_name="local_folder_indexing", - source="connector_indexing_task", - message=f"Starting local folder indexing for connector {connector_id}", + source="local_folder_indexing_task", + message=f"Starting local folder indexing for {folder_name}", metadata={ - "connector_id": connector_id, + "folder_path": folder_path, "user_id": str(user_id), "target_file_path": target_file_path, }, ) try: - connector = await get_connector_by_id( - session, connector_id, SearchSourceConnectorType.LOCAL_FOLDER_CONNECTOR - ) - if not connector: - await task_logger.log_task_failure( - log_entry, - f"Connector {connector_id} not found", - "Connector not found", - {}, - ) - return 0, 0, f"Connector {connector_id} not found" - - folder_path = connector.config.get("folder_path") if not folder_path or not os.path.exists(folder_path): await task_logger.log_task_failure( log_entry, @@ -442,59 +410,54 @@ async def index_local_folder( "Folder not found", {}, ) - return 0, 0, f"Folder path missing or does not exist: {folder_path}" + return 0, 0, root_folder_id, f"Folder path missing or does not exist: {folder_path}" - folder_name = connector.config.get("folder_name") or os.path.basename(folder_path) - exclude_patterns = connector.config.get("exclude_patterns", DEFAULT_EXCLUDE_PATTERNS) - file_extensions = connector.config.get("file_extensions") # None = all + if exclude_patterns is None: + exclude_patterns = DEFAULT_EXCLUDE_PATTERNS # ==================================================================== # SINGLE-FILE MODE # ==================================================================== if target_file_path: - return await _index_single_file( + indexed, skipped, err = await _index_single_file( session=session, - connector=connector, - connector_id=connector_id, search_space_id=search_space_id, user_id=user_id, folder_path=folder_path, folder_name=folder_name, target_file_path=target_file_path, + enable_summary=enable_summary, task_logger=task_logger, log_entry=log_entry, - update_last_indexed=update_last_indexed, ) + return indexed, skipped, root_folder_id, err # ==================================================================== # FULL-SCAN MODE # ==================================================================== - # Phase 0: Mirror folder structure await task_logger.log_task_progress( log_entry, "Mirroring folder structure", {"stage": "folder_mirror"} ) - folder_mapping = await _mirror_folder_structure( + folder_mapping, root_folder_id = await _mirror_folder_structure( session=session, folder_path=folder_path, folder_name=folder_name, search_space_id=search_space_id, user_id=user_id, - connector_config=connector.config, - connector=connector, + root_folder_id=root_folder_id, exclude_patterns=exclude_patterns, ) await session.flush() - # Scan files on disk try: files = scan_folder(folder_path, file_extensions, exclude_patterns) except Exception as e: await task_logger.log_task_failure( log_entry, f"Failed to scan folder: {e}", "Scan error", {} ) - return 0, 0, f"Failed to scan folder: {e}" + return 0, 0, root_folder_id, f"Failed to scan folder: {e}" logger.info(f"Found {len(files)} files in folder") @@ -530,7 +493,6 @@ async def index_local_folder( ) if existing_document: - # Check mtime first (cheap) stored_mtime = (existing_document.document_metadata or {}).get("mtime") current_mtime = file_info["modified_at"].timestamp() @@ -542,7 +504,6 @@ async def index_local_folder( skipped_count += 1 continue - # mtime differs — read file and check content hash try: content, content_hash = await _compute_file_content_hash( file_path_abs, file_info["relative_path"], search_space_id @@ -553,7 +514,6 @@ async def index_local_folder( continue if existing_document.content_hash == content_hash: - # Content same, just update mtime in metadata meta = dict(existing_document.document_metadata or {}) meta["mtime"] = current_mtime existing_document.document_metadata = meta @@ -564,7 +524,6 @@ async def index_local_folder( skipped_count += 1 continue - # Content actually changed — snapshot version, queue for re-index await create_version_snapshot(session, existing_document) files_to_process.append( @@ -581,7 +540,6 @@ async def index_local_folder( ) continue - # New document — read content try: content, content_hash = await _compute_file_content_hash( file_path_abs, file_info["relative_path"], search_space_id @@ -595,7 +553,6 @@ async def index_local_folder( skipped_count += 1 continue - # Check for duplicate content from another connector with session.no_autoflush: dup = await check_duplicate_document_by_hash(session, content_hash) if dup: @@ -603,7 +560,6 @@ async def index_local_folder( skipped_count += 1 continue - # Determine folder_id for this file parent_dir = str(Path(relative_path).parent) if parent_dir == ".": parent_dir = "" @@ -616,17 +572,16 @@ async def index_local_folder( document_metadata={ "folder_name": folder_name, "file_path": relative_path, - "connector_id": connector_id, "mtime": file_info["modified_at"].timestamp(), }, content="Pending...", - content_hash=unique_identifier_hash, # Temp unique — updated in phase 2 + content_hash=unique_identifier_hash, unique_identifier_hash=unique_identifier_hash, embedding=None, status=DocumentStatus.pending(), updated_at=get_current_timestamp(), created_by_id=user_id, - connector_id=connector_id, + connector_id=None, folder_id=folder_id, ) session.add(document) @@ -655,16 +610,17 @@ async def index_local_folder( # ================================================================ # PHASE 1.5: Delete documents no longer on disk # ================================================================ - all_connector_docs = ( + all_folder_docs = ( await session.execute( select(Document).where( - Document.connector_id == connector_id, Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == search_space_id, + Document.folder_id.in_(list(folder_mapping.values())), ) ) ).scalars().all() - for doc in all_connector_docs: + for doc in all_folder_docs: if doc.unique_identifier_hash not in seen_unique_hashes: await session.delete(doc) @@ -709,7 +665,7 @@ async def index_local_folder( document_string = build_document_metadata_string(metadata_sections) summary_content = "" - if long_context_llm and connector.enable_summary: + if long_context_llm and enable_summary: doc_meta = { "folder_name": folder_name, "file_path": relative_path, @@ -721,7 +677,6 @@ async def index_local_folder( embedding = embed_text(document_string) chunks = await create_document_chunks(document_string) - # Determine folder_id parent_dir = str(Path(relative_path).parent) if parent_dir == ".": parent_dir = "" @@ -735,7 +690,6 @@ async def index_local_folder( document.document_metadata = { "folder_name": folder_name, "file_path": relative_path, - "connector_id": connector_id, "summary": summary_content, "mtime": file_info["modified_at"].timestamp(), } @@ -782,8 +736,6 @@ async def index_local_folder( session, root_fid, search_space_id, existing_dirs, folder_mapping ) - await update_connector_last_indexed(session, connector, update_last_indexed) - try: await session.commit() except Exception as e: @@ -802,7 +754,7 @@ async def index_local_folder( await task_logger.log_task_success( log_entry, - f"Completed local folder indexing for connector {connector_id}", + f"Completed local folder indexing for {folder_name}", { "indexed": indexed_count, "skipped": skipped_count, @@ -811,7 +763,7 @@ async def index_local_folder( }, ) - return indexed_count, skipped_count, warning_message + return indexed_count, skipped_count, root_folder_id, warning_message except SQLAlchemyError as e: logger.exception(f"Database error during local folder indexing: {e}") @@ -819,34 +771,31 @@ async def index_local_folder( await task_logger.log_task_failure( log_entry, f"DB error: {e}", "Database error", {} ) - return 0, 0, f"Database error: {e}" + return 0, 0, root_folder_id, f"Database error: {e}" except Exception as e: logger.exception(f"Error during local folder indexing: {e}") await task_logger.log_task_failure( log_entry, f"Error: {e}", "Unexpected error", {} ) - return 0, 0, str(e) + return 0, 0, root_folder_id, str(e) async def _index_single_file( session: AsyncSession, - connector, - connector_id: int, search_space_id: int, user_id: str, folder_path: str, folder_name: str, target_file_path: str, + enable_summary: bool, task_logger, log_entry, - update_last_indexed: bool = True, ) -> tuple[int, int, str | None]: """Process a single file (chokidar real-time trigger).""" try: full_path = Path(target_file_path) if not full_path.exists(): - # File was deleted — find and remove the document rel = str(full_path.relative_to(folder_path)) unique_id = f"{folder_name}:{rel}" uid_hash = generate_unique_identifier_hash( @@ -880,7 +829,6 @@ async def _index_single_file( if existing: if existing.content_hash == content_hash: - # Update mtime mtime = full_path.stat().st_mtime meta = dict(existing.document_metadata or {}) meta["mtime"] = mtime @@ -888,10 +836,8 @@ async def _index_single_file( await session.commit() return 0, 1, None - # Content changed — snapshot + re-index await create_version_snapshot(session, existing) - # Get LLM long_context_llm = await get_user_long_context_llm( session, user_id, search_space_id ) @@ -906,7 +852,7 @@ async def _index_single_file( document_string = build_document_metadata_string(metadata_sections) summary_content = "" - if long_context_llm and connector.enable_summary: + if long_context_llm and enable_summary: summary_content, _ = await generate_document_summary( document_string, long_context_llm, {"folder_name": folder_name, "file_path": rel_path} ) @@ -917,7 +863,6 @@ async def _index_single_file( doc_metadata = { "folder_name": folder_name, "file_path": rel_path, - "connector_id": connector_id, "summary": summary_content, "mtime": mtime, } @@ -946,16 +891,14 @@ async def _index_single_file( status=DocumentStatus.ready(), updated_at=get_current_timestamp(), created_by_id=user_id, - connector_id=connector_id, + connector_id=None, ) session.add(document) - # Set chunks await session.flush() for chunk in chunks: chunk.document_id = document.id session.add_all(chunks) - await update_connector_last_indexed(session, connector, update_last_indexed) await session.commit() await task_logger.log_task_success( diff --git a/surfsense_backend/tests/integration/conftest.py b/surfsense_backend/tests/integration/conftest.py index 840246e2f..9c91011ae 100644 --- a/surfsense_backend/tests/integration/conftest.py +++ b/surfsense_backend/tests/integration/conftest.py @@ -168,22 +168,3 @@ def make_connector_document(db_connector, db_user): return _make -@pytest_asyncio.fixture -async def db_local_folder_connector( - db_session: AsyncSession, db_user: User, db_search_space: SearchSpace, tmp_path -) -> SearchSourceConnector: - connector = SearchSourceConnector( - name="Test Local Folder", - connector_type=SearchSourceConnectorType.LOCAL_FOLDER_CONNECTOR, - config={ - "folder_path": str(tmp_path), - "folder_name": "test-folder", - "exclude_patterns": [], - "file_extensions": None, - }, - search_space_id=db_search_space.id, - user_id=db_user.id, - ) - db_session.add(connector) - await db_session.flush() - return connector diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py index 988905f8f..e46d59a67 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py @@ -14,7 +14,6 @@ from app.db import ( DocumentType, DocumentVersion, Folder, - SearchSourceConnector, SearchSpace, User, ) @@ -72,7 +71,6 @@ class TestFullIndexer: async def test_i1_new_file_indexed( self, db_session: AsyncSession, - db_local_folder_connector: SearchSourceConnector, db_user: User, db_search_space: SearchSpace, tmp_path: Path, @@ -82,11 +80,12 @@ class TestFullIndexer: (tmp_path / "note.md").write_text("# Hello World\n\nContent here.") - count, skipped, err = await index_local_folder( + count, skipped, root_folder_id, err = await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", ) assert err is None @@ -95,7 +94,8 @@ class TestFullIndexer: docs = ( await db_session.execute( select(Document).where( - Document.connector_id == db_local_folder_connector.id + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, ) ) ).scalars().all() @@ -112,7 +112,6 @@ class TestFullIndexer: async def test_i2_unchanged_skipped( self, db_session: AsyncSession, - db_local_folder_connector: SearchSourceConnector, db_user: User, db_search_space: SearchSpace, tmp_path: Path, @@ -122,27 +121,31 @@ class TestFullIndexer: (tmp_path / "note.md").write_text("# Hello\n\nSame content.") - count1, _, _ = await index_local_folder( + count1, _, root_folder_id, _ = await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", ) assert count1 == 1 - # Second run — unchanged - count2, _, _ = await index_local_folder( + # Second run — unchanged, pass root_folder_id from first run + count2, _, _, _ = await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + root_folder_id=root_folder_id, ) assert count2 == 0 total = ( await db_session.execute( select(func.count()).select_from(Document).where( - Document.connector_id == db_local_folder_connector.id + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, ) ) ).scalar_one() @@ -157,7 +160,6 @@ class TestFullIndexer: async def test_i3_changed_reindexed( self, db_session: AsyncSession, - db_local_folder_connector: SearchSourceConnector, db_user: User, db_search_space: SearchSpace, tmp_path: Path, @@ -168,11 +170,12 @@ class TestFullIndexer: f = tmp_path / "note.md" f.write_text("# Version 1\n\nOriginal.") - await index_local_folder( + _, _, root_folder_id, _ = await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", ) # Modify @@ -180,11 +183,13 @@ class TestFullIndexer: # Touch mtime to ensure it's detected as different os.utime(f, (f.stat().st_atime + 10, f.stat().st_mtime + 10)) - count, _, _ = await index_local_folder( + count, _, _, _ = await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + root_folder_id=root_folder_id, ) assert count == 1 @@ -192,7 +197,8 @@ class TestFullIndexer: versions = ( await db_session.execute( select(DocumentVersion).join(Document).where( - Document.connector_id == db_local_folder_connector.id + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, ) ) ).scalars().all() @@ -207,7 +213,6 @@ class TestFullIndexer: async def test_i4_deleted_removed( self, db_session: AsyncSession, - db_local_folder_connector: SearchSourceConnector, db_user: User, db_search_space: SearchSpace, tmp_path: Path, @@ -218,17 +223,19 @@ class TestFullIndexer: f = tmp_path / "to_delete.md" f.write_text("# Delete me") - await index_local_folder( + _, _, root_folder_id, _ = await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", ) docs_before = ( await db_session.execute( select(func.count()).select_from(Document).where( - Document.connector_id == db_local_folder_connector.id + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, ) ) ).scalar_one() @@ -238,15 +245,18 @@ class TestFullIndexer: await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + root_folder_id=root_folder_id, ) docs_after = ( await db_session.execute( select(func.count()).select_from(Document).where( - Document.connector_id == db_local_folder_connector.id + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, ) ) ).scalar_one() @@ -261,7 +271,6 @@ class TestFullIndexer: async def test_i5_single_file_mode( self, db_session: AsyncSession, - db_local_folder_connector: SearchSourceConnector, db_user: User, db_search_space: SearchSpace, tmp_path: Path, @@ -273,11 +282,12 @@ class TestFullIndexer: (tmp_path / "b.md").write_text("File B") (tmp_path / "c.md").write_text("File C") - count, _, _ = await index_local_folder( + count, _, _, _ = await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", target_file_path=str(tmp_path / "b.md"), ) assert count == 1 @@ -285,12 +295,13 @@ class TestFullIndexer: docs = ( await db_session.execute( select(Document).where( - Document.connector_id == db_local_folder_connector.id + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, ) ) ).scalars().all() assert len(docs) == 1 - assert docs[0].title == "b" + assert docs[0].title == "b.md" # ==================================================================== @@ -309,30 +320,27 @@ class TestFolderMirroring: async def test_f1_root_folder_created( self, db_session: AsyncSession, - db_local_folder_connector: SearchSourceConnector, db_user: User, db_search_space: SearchSpace, tmp_path: Path, ): - """F1: First sync creates a root Folder and stores root_folder_id.""" + """F1: First sync creates a root Folder and returns root_folder_id.""" from app.tasks.connector_indexers.local_folder_indexer import index_local_folder (tmp_path / "root.md").write_text("Root file") - await index_local_folder( + _, _, root_folder_id, _ = await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", ) - # Refresh connector - await db_session.refresh(db_local_folder_connector) - root_id = db_local_folder_connector.config.get("root_folder_id") - assert root_id is not None + assert root_folder_id is not None root_folder = ( - await db_session.execute(select(Folder).where(Folder.id == root_id)) + await db_session.execute(select(Folder).where(Folder.id == root_folder_id)) ).scalar_one() assert root_folder.name == "test-folder" @@ -345,7 +353,6 @@ class TestFolderMirroring: async def test_f2_nested_folder_rows( self, db_session: AsyncSession, - db_local_folder_connector: SearchSourceConnector, db_user: User, db_search_space: SearchSpace, tmp_path: Path, @@ -362,9 +369,10 @@ class TestFolderMirroring: await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", ) folders = ( @@ -394,7 +402,6 @@ class TestFolderMirroring: async def test_f3_resync_reuses_folders( self, db_session: AsyncSession, - db_local_folder_connector: SearchSourceConnector, db_user: User, db_search_space: SearchSpace, tmp_path: Path, @@ -406,11 +413,12 @@ class TestFolderMirroring: sub.mkdir() (sub / "file.md").write_text("content") - await index_local_folder( + _, _, root_folder_id, _ = await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", ) folders_before = ( @@ -420,12 +428,14 @@ class TestFolderMirroring: ).scalars().all() ids_before = {f.id for f in folders_before} - # Re-sync + # Re-sync with root_folder_id from first run await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + root_folder_id=root_folder_id, ) folders_after = ( @@ -446,7 +456,6 @@ class TestFolderMirroring: async def test_f4_folder_id_assigned( self, db_session: AsyncSession, - db_local_folder_connector: SearchSourceConnector, db_user: User, db_search_space: SearchSpace, tmp_path: Path, @@ -459,17 +468,19 @@ class TestFolderMirroring: (daily / "today.md").write_text("today note") (tmp_path / "root.md").write_text("root note") - await index_local_folder( + _, _, root_folder_id, _ = await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", ) docs = ( await db_session.execute( select(Document).where( - Document.connector_id == db_local_folder_connector.id + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, ) ) ).scalars().all() @@ -486,9 +497,7 @@ class TestFolderMirroring: assert today_doc.folder_id == daily_folder.id # Root doc should be in the root folder - await db_session.refresh(db_local_folder_connector) - root_fid = db_local_folder_connector.config.get("root_folder_id") - assert root_doc.folder_id == root_fid + assert root_doc.folder_id == root_folder_id @pytest.mark.usefixtures( "patched_self_hosted", @@ -499,7 +508,6 @@ class TestFolderMirroring: async def test_f5_empty_folder_cleanup( self, db_session: AsyncSession, - db_local_folder_connector: SearchSourceConnector, db_user: User, db_search_space: SearchSpace, tmp_path: Path, @@ -515,11 +523,12 @@ class TestFolderMirroring: (daily / "today.md").write_text("today") (weekly / "review.md").write_text("review") - await index_local_folder( + _, _, root_folder_id, _ = await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", ) # Verify weekly folder exists @@ -535,9 +544,11 @@ class TestFolderMirroring: await index_local_folder( session=db_session, - connector_id=db_local_folder_connector.id, search_space_id=db_search_space.id, user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + root_folder_id=root_folder_id, ) # weekly Folder should be gone (empty, dir removed) @@ -570,7 +581,6 @@ class TestPipelineIntegration: async def test_p1_local_folder_file_through_pipeline( self, db_session: AsyncSession, - db_local_folder_connector: SearchSourceConnector, db_user: User, db_search_space: SearchSpace, mocker, @@ -585,7 +595,7 @@ class TestPipelineIntegration: unique_id="test-folder:test.md", document_type=DocumentType.LOCAL_FOLDER_FILE, search_space_id=db_search_space.id, - connector_id=db_local_folder_connector.id, + connector_id=None, created_by_id=str(db_user.id), ) From caf2525ab5d32ffbb6db0c96a5e4109996a24030 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 22:29:07 +0530 Subject: [PATCH 28/61] fix: update folder ID collection logic to include deleted directories and adjust test cases for document titles --- .../connector_indexers/local_folder_indexer.py | 15 ++++++++++++++- .../test_local_folder_pipeline.py | 4 ++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index 591914625..93c6649a2 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -610,12 +610,25 @@ async def index_local_folder( # ================================================================ # PHASE 1.5: Delete documents no longer on disk # ================================================================ + # Collect ALL folder IDs under this root (including folders that no + # longer exist on disk but still have rows in the DB) so we catch + # documents in deleted directories too. + all_root_folder_ids = set(folder_mapping.values()) + all_db_folders = ( + await session.execute( + select(Folder.id).where( + Folder.search_space_id == search_space_id, + ) + ) + ).scalars().all() + all_root_folder_ids.update(all_db_folders) + all_folder_docs = ( await session.execute( select(Document).where( Document.document_type == DocumentType.LOCAL_FOLDER_FILE, Document.search_space_id == search_space_id, - Document.folder_id.in_(list(folder_mapping.values())), + Document.folder_id.in_(list(all_root_folder_ids)), ) ) ).scalars().all() diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py index e46d59a67..34efad789 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py @@ -485,8 +485,8 @@ class TestFolderMirroring: ) ).scalars().all() - today_doc = next(d for d in docs if d.title == "today") - root_doc = next(d for d in docs if d.title == "root") + today_doc = next(d for d in docs if d.title == "today.md") + root_doc = next(d for d in docs if d.title == "root.md") daily_folder = ( await db_session.execute( From c27d24a117633aac32de889b12f153239b58a832 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 22:41:45 +0530 Subject: [PATCH 29/61] feat: enhance folder indexing by adding root folder ID support and implement folder creation and cleanup logic --- .../app/routes/documents_routes.py | 2 + .../local_folder_indexer.py | 102 ++++++++++++++ .../test_local_folder_pipeline.py | 130 ++++++++++++++++++ surfsense_web/hooks/use-folder-sync.ts | 1 + .../lib/apis/documents-api.service.ts | 2 +- 5 files changed, 236 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index d7974f9ff..05221b192 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -1280,6 +1280,7 @@ class FolderIndexFileRequest(PydanticBaseModel): folder_name: str search_space_id: int target_file_path: str + root_folder_id: int | None = None enable_summary: bool = False @@ -1394,6 +1395,7 @@ async def folder_index_file( folder_path=request.folder_path, folder_name=request.folder_name, target_file_path=request.target_file_path, + root_folder_id=request.root_folder_id, enable_summary=request.enable_summary, ) diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index 93c6649a2..3d4ddc19e 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -312,6 +312,92 @@ async def _mirror_folder_structure( return mapping, root_folder_id +async def _resolve_folder_for_file( + session: AsyncSession, + rel_path: str, + root_folder_id: int, + search_space_id: int, + user_id: str, +) -> int: + """Given a file's relative path, ensure all parent Folder rows exist and + return the folder_id for the file's immediate parent directory. + + For a file at "notes/daily/today.md", this ensures Folder rows exist for + "notes" and "notes/daily", and returns the id of "notes/daily". + For a file at "readme.md" (root level), returns root_folder_id. + """ + parent_dir = str(Path(rel_path).parent) + if parent_dir == ".": + return root_folder_id + + parts = Path(parent_dir).parts + current_parent_id = root_folder_id + + for part in parts: + existing = ( + await session.execute( + select(Folder).where( + Folder.name == part, + Folder.parent_id == current_parent_id, + Folder.search_space_id == search_space_id, + ) + ) + ).scalar_one_or_none() + + if existing: + current_parent_id = existing.id + else: + new_folder = Folder( + name=part, + parent_id=current_parent_id, + search_space_id=search_space_id, + created_by_id=user_id, + position="a0", + ) + session.add(new_folder) + await session.flush() + current_parent_id = new_folder.id + + return current_parent_id + + +async def _cleanup_empty_folder_chain( + session: AsyncSession, + folder_id: int, + root_folder_id: int, +) -> None: + """Walk up from folder_id toward root, deleting empty folders (no docs, no + children). Stops at root_folder_id which is never deleted.""" + current_id = folder_id + while current_id and current_id != root_folder_id: + has_doc = ( + await session.execute( + select(Document.id).where(Document.folder_id == current_id).limit(1) + ) + ).scalar_one_or_none() + if has_doc is not None: + break + + has_child = ( + await session.execute( + select(Folder.id).where(Folder.parent_id == current_id).limit(1) + ) + ).scalar_one_or_none() + if has_child is not None: + break + + folder = ( + await session.execute(select(Folder).where(Folder.id == current_id)) + ).scalar_one_or_none() + if not folder: + break + + parent_id = folder.parent_id + await session.delete(folder) + await session.flush() + current_id = parent_id + + async def _cleanup_empty_folders( session: AsyncSession, root_folder_id: int, @@ -427,6 +513,7 @@ async def index_local_folder( folder_name=folder_name, target_file_path=target_file_path, enable_summary=enable_summary, + root_folder_id=root_folder_id, task_logger=task_logger, log_entry=log_entry, ) @@ -802,6 +889,7 @@ async def _index_single_file( folder_name: str, target_file_path: str, enable_summary: bool, + root_folder_id: int | None, task_logger, log_entry, ) -> tuple[int, int, str | None]: @@ -816,7 +904,13 @@ async def _index_single_file( ) existing = await check_document_by_unique_identifier(session, uid_hash) if existing: + deleted_folder_id = existing.folder_id await session.delete(existing) + await session.flush() + if deleted_folder_id and root_folder_id: + await _cleanup_empty_folder_chain( + session, deleted_folder_id, root_folder_id + ) await session.commit() return 0, 0, None return 0, 0, None @@ -880,6 +974,12 @@ async def _index_single_file( "mtime": mtime, } + folder_id = None + if root_folder_id: + folder_id = await _resolve_folder_for_file( + session, rel_path, root_folder_id, search_space_id, user_id + ) + if existing: existing.title = title existing.content = document_string @@ -887,6 +987,7 @@ async def _index_single_file( existing.source_markdown = content existing.embedding = embedding existing.document_metadata = doc_metadata + existing.folder_id = folder_id await safe_set_chunks(session, existing, chunks) existing.updated_at = get_current_timestamp() existing.status = DocumentStatus.ready() @@ -905,6 +1006,7 @@ async def _index_single_file( updated_at=get_current_timestamp(), created_by_id=user_id, connector_id=None, + folder_id=folder_id, ) session.add(document) await session.flush() diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py index 34efad789..110aa6caf 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py @@ -567,6 +567,136 @@ class TestFolderMirroring: ).scalar_one_or_none() assert daily_after is not None + @pytest.mark.usefixtures( + "patched_self_hosted", + "patched_embed_for_indexer", + "patched_chunks_for_indexer", + "patched_summary_for_indexer", + ) + async def test_f6_single_file_creates_subfolder( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """F6: Single-file mode creates missing Folder rows and assigns correct folder_id.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "root.md").write_text("root") + + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + sub = tmp_path / "notes" / "daily" + sub.mkdir(parents=True) + (sub / "new.md").write_text("new note in subfolder") + + count, _, _, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_path=str(sub / "new.md"), + root_folder_id=root_folder_id, + ) + assert count == 1 + + doc = ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.title == "new.md", + ) + ) + ).scalar_one() + + daily_folder = ( + await db_session.execute( + select(Folder).where(Folder.name == "daily") + ) + ).scalar_one() + + assert doc.folder_id == daily_folder.id + assert daily_folder.parent_id is not None + + notes_folder = ( + await db_session.execute( + select(Folder).where(Folder.name == "notes") + ) + ).scalar_one() + assert daily_folder.parent_id == notes_folder.id + assert notes_folder.parent_id == root_folder_id + + @pytest.mark.usefixtures( + "patched_self_hosted", + "patched_embed_for_indexer", + "patched_chunks_for_indexer", + "patched_summary_for_indexer", + ) + async def test_f7_single_file_delete_cleans_empty_folders( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """F7: Deleting the only file in a subfolder via single-file mode removes empty Folder rows.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + sub = tmp_path / "notes" / "ephemeral" + sub.mkdir(parents=True) + (sub / "temp.md").write_text("temporary") + (tmp_path / "keep.md").write_text("keep this") + + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + eph_folder = ( + await db_session.execute( + select(Folder).where(Folder.name == "ephemeral") + ) + ).scalar_one_or_none() + assert eph_folder is not None + + target = sub / "temp.md" + target.unlink() + + await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_path=str(target), + root_folder_id=root_folder_id, + ) + + eph_after = ( + await db_session.execute( + select(Folder).where(Folder.name == "ephemeral") + ) + ).scalar_one_or_none() + assert eph_after is None + + notes_after = ( + await db_session.execute( + select(Folder).where(Folder.name == "notes") + ) + ).scalar_one_or_none() + assert notes_after is None + # ==================================================================== # Tier 5: Pipeline Integration (P1) diff --git a/surfsense_web/hooks/use-folder-sync.ts b/surfsense_web/hooks/use-folder-sync.ts index fcfb2814e..f051b7df6 100644 --- a/surfsense_web/hooks/use-folder-sync.ts +++ b/surfsense_web/hooks/use-folder-sync.ts @@ -32,6 +32,7 @@ export function useFolderSync() { folder_name: event.folderName, search_space_id: event.searchSpaceId, target_file_path: event.fullPath, + root_folder_id: event.rootFolderId, }); } catch (err) { console.error("[FolderSync] Failed to trigger re-index:", err); diff --git a/surfsense_web/lib/apis/documents-api.service.ts b/surfsense_web/lib/apis/documents-api.service.ts index c77cd6848..a8e3831d4 100644 --- a/surfsense_web/lib/apis/documents-api.service.ts +++ b/surfsense_web/lib/apis/documents-api.service.ts @@ -399,7 +399,7 @@ class DocumentsApiService { return baseApiService.post(`/api/v1/documents/folder-index`, undefined, { body }); }; - folderIndexFile = async (searchSpaceId: number, body: { folder_path: string; folder_name: string; search_space_id: number; target_file_path: string; enable_summary?: boolean }) => { + folderIndexFile = async (searchSpaceId: number, body: { folder_path: string; folder_name: string; search_space_id: number; target_file_path: string; root_folder_id?: number | null; enable_summary?: boolean }) => { return baseApiService.post(`/api/v1/documents/folder-index-file`, undefined, { body }); }; From 53df393cf7ca300e9eb79f14429bb94857bde492 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 23:28:23 +0530 Subject: [PATCH 30/61] refactor: streamline local folder indexing logic by removing unused imports, enhancing content hashing, and improving document creation process --- .../local_folder_indexer.py | 415 +++++++----------- .../test_local_folder_pipeline.py | 139 +----- 2 files changed, 174 insertions(+), 380 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index 3d4ddc19e..a3281eaea 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -14,7 +14,6 @@ no connector row is read. """ import os -import time from collections.abc import Awaitable, Callable from datetime import UTC, datetime from pathlib import Path @@ -30,24 +29,16 @@ from app.db import ( DocumentType, Folder, ) +from app.indexing_pipeline.connector_document import ConnectorDocument +from app.indexing_pipeline.document_hashing import compute_identifier_hash +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService from app.services.llm_service import get_user_long_context_llm from app.services.task_logging_service import TaskLoggingService -from app.utils.document_converters import ( - create_document_chunks, - embed_text, - generate_content_hash, - generate_document_summary, - generate_unique_identifier_hash, -) from app.utils.document_versioning import create_version_snapshot from .base import ( - build_document_metadata_string, check_document_by_unique_identifier, - check_duplicate_document_by_hash, - get_current_timestamp, logger, - safe_set_chunks, ) PLAINTEXT_EXTENSIONS = frozenset({ @@ -89,7 +80,6 @@ def _needs_etl(filename: str) -> bool: return not _is_plaintext_file(filename) and not _is_audio_file(filename) HeartbeatCallbackType = Callable[[int], Awaitable[None]] -HEARTBEAT_INTERVAL_SECONDS = 30 DEFAULT_EXCLUDE_PATTERNS = [ ".git", @@ -210,6 +200,16 @@ async def _read_file_content(file_path: str, filename: str) -> str: return await _parse_file_to_markdown(file_path, filename) +def _content_hash(content: str, search_space_id: int) -> str: + """SHA-256 hash of content scoped to a search space. + + Matches the format used by ``compute_content_hash`` in the unified + pipeline so that dedup checks are consistent. + """ + import hashlib + return hashlib.sha256(f"{search_space_id}:{content}".encode("utf-8")).hexdigest() + + async def _compute_file_content_hash( file_path: str, filename: str, search_space_id: int, ) -> tuple[str, str]: @@ -218,8 +218,7 @@ async def _compute_file_content_hash( Returns (content_text, content_hash). """ content = await _read_file_content(file_path, filename) - content_hash = generate_content_hash(content, search_space_id) - return content, content_hash + return content, _content_hash(content, search_space_id) async def _mirror_folder_structure( @@ -454,6 +453,40 @@ async def _cleanup_empty_folders( candidates = remaining +def _build_connector_doc( + title: str, + content: str, + relative_path: str, + folder_name: str, + *, + search_space_id: int, + user_id: str, + enable_summary: bool, +) -> ConnectorDocument: + """Build a ConnectorDocument from a local file's extracted content.""" + unique_id = f"{folder_name}:{relative_path}" + metadata = { + "folder_name": folder_name, + "file_path": relative_path, + "document_type": "Local Folder File", + "connector_type": "Local Folder", + } + fallback_summary = f"File: {title}\n\n{content[:4000]}" + + return ConnectorDocument( + title=title, + source_markdown=content, + unique_id=unique_id, + document_type=DocumentType.LOCAL_FOLDER_FILE, + search_space_id=search_space_id, + connector_id=None, + created_by_id=user_id, + should_summarize=enable_summary, + fallback_summary=fallback_summary, + metadata=metadata, + ) + + async def index_local_folder( session: AsyncSession, search_space_id: int, @@ -551,15 +584,13 @@ async def index_local_folder( indexed_count = 0 skipped_count = 0 failed_count = 0 - duplicate_count = 0 - - last_heartbeat_time = time.time() # ================================================================ - # PHASE 1: Analyze all files, create pending documents + # PHASE 1: Pre-filter files (mtime / content-hash), version changed # ================================================================ - files_to_process: list[dict] = [] - new_documents_created = False + connector_docs: list[ConnectorDocument] = [] + # Maps unique_id -> (relative_path, mtime) for post-pipeline folder_id assignment + file_meta_map: dict[str, dict] = {} seen_unique_hashes: set[str] = set() for file_info in files: @@ -568,8 +599,8 @@ async def index_local_folder( file_path_abs = file_info["path"] unique_identifier = f"{folder_name}:{relative_path}" - unique_identifier_hash = generate_unique_identifier_hash( - DocumentType.LOCAL_FOLDER_FILE, + unique_identifier_hash = compute_identifier_hash( + DocumentType.LOCAL_FOLDER_FILE.value, unique_identifier, search_space_id, ) @@ -612,94 +643,42 @@ async def index_local_folder( continue await create_version_snapshot(session, existing_document) + else: + try: + content, content_hash = await _compute_file_content_hash( + file_path_abs, file_info["relative_path"], search_space_id + ) + except Exception as read_err: + logger.warning(f"Could not read {file_path_abs}: {read_err}") + skipped_count += 1 + continue - files_to_process.append( - { - "document": existing_document, - "is_new": False, - "file_info": file_info, - "content": content, - "content_hash": content_hash, - "unique_identifier_hash": unique_identifier_hash, - "relative_path": relative_path, - "title": file_info["name"], - } - ) - continue + if not content.strip(): + skipped_count += 1 + continue - try: - content, content_hash = await _compute_file_content_hash( - file_path_abs, file_info["relative_path"], search_space_id - ) - except Exception as read_err: - logger.warning(f"Could not read {file_path_abs}: {read_err}") - skipped_count += 1 - continue - - if not content.strip(): - skipped_count += 1 - continue - - with session.no_autoflush: - dup = await check_duplicate_document_by_hash(session, content_hash) - if dup: - duplicate_count += 1 - skipped_count += 1 - continue - - parent_dir = str(Path(relative_path).parent) - if parent_dir == ".": - parent_dir = "" - folder_id = folder_mapping.get(parent_dir, folder_mapping.get("")) - - document = Document( - search_space_id=search_space_id, + doc = _build_connector_doc( title=file_info["name"], - document_type=DocumentType.LOCAL_FOLDER_FILE, - document_metadata={ - "folder_name": folder_name, - "file_path": relative_path, - "mtime": file_info["modified_at"].timestamp(), - }, - content="Pending...", - content_hash=unique_identifier_hash, - unique_identifier_hash=unique_identifier_hash, - embedding=None, - status=DocumentStatus.pending(), - updated_at=get_current_timestamp(), - created_by_id=user_id, - connector_id=None, - folder_id=folder_id, - ) - session.add(document) - new_documents_created = True - - files_to_process.append( - { - "document": document, - "is_new": True, - "file_info": file_info, - "content": content, - "content_hash": content_hash, - "unique_identifier_hash": unique_identifier_hash, - "relative_path": relative_path, - "title": file_info["name"], - } + content=content, + relative_path=relative_path, + folder_name=folder_name, + search_space_id=search_space_id, + user_id=user_id, + enable_summary=enable_summary, ) + connector_docs.append(doc) + file_meta_map[unique_identifier] = { + "relative_path": relative_path, + "mtime": file_info["modified_at"].timestamp(), + } except Exception as e: logger.exception(f"Phase 1 error for {file_info.get('path')}: {e}") failed_count += 1 - if new_documents_created: - await session.commit() - # ================================================================ # PHASE 1.5: Delete documents no longer on disk # ================================================================ - # Collect ALL folder IDs under this root (including folders that no - # longer exist on disk but still have rows in the DB) so we catch - # documents in deleted directories too. all_root_folder_ids = set(folder_mapping.values()) all_db_folders = ( await session.execute( @@ -727,98 +706,51 @@ async def index_local_folder( await session.flush() # ================================================================ - # PHASE 2: Process each document + # PHASE 2: Index via unified pipeline # ================================================================ - long_context_llm = await get_user_long_context_llm( - session, user_id, search_space_id - ) + if connector_docs: + from app.indexing_pipeline.document_hashing import ( + compute_unique_identifier_hash, + ) - for item in files_to_process: - if on_heartbeat_callback: - current_time = time.time() - if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS: + pipeline = IndexingPipelineService(session) + doc_map = { + compute_unique_identifier_hash(cd): cd for cd in connector_docs + } + documents = await pipeline.prepare_for_indexing(connector_docs) + + llm = await get_user_long_context_llm(session, user_id, search_space_id) + + for document in documents: + connector_doc = doc_map.get(document.unique_identifier_hash) + if connector_doc is None: + failed_count += 1 + continue + + result = await pipeline.index(document, connector_doc, llm) + + if DocumentStatus.is_state(result.status, DocumentStatus.READY): + indexed_count += 1 + + # Assign folder_id and mtime post-pipeline + rel_path = (connector_doc.metadata or {}).get("file_path", "") + parent_dir = str(Path(rel_path).parent) if rel_path else "" + if parent_dir == ".": + parent_dir = "" + fid = folder_mapping.get(parent_dir, folder_mapping.get("")) + + unique_id = connector_doc.unique_id + mtime_info = file_meta_map.get(unique_id, {}) + + result.folder_id = fid + doc_meta = dict(result.document_metadata or {}) + doc_meta["mtime"] = mtime_info.get("mtime") + result.document_metadata = doc_meta + else: + failed_count += 1 + + if on_heartbeat_callback and indexed_count % 5 == 0: await on_heartbeat_callback(indexed_count) - last_heartbeat_time = current_time - - document = item["document"] - try: - document.status = DocumentStatus.processing() - await session.commit() - - title = item["title"] - relative_path = item["relative_path"] - content = item["content"] - content_hash = item["content_hash"] - file_info = item["file_info"] - - metadata_sections = [ - ( - "METADATA", - [ - f"Title: {title}", - f"Folder: {folder_name}", - f"Path: {relative_path}", - ], - ), - ("CONTENT", [content]), - ] - document_string = build_document_metadata_string(metadata_sections) - - summary_content = "" - if long_context_llm and enable_summary: - doc_meta = { - "folder_name": folder_name, - "file_path": relative_path, - } - summary_content, _ = await generate_document_summary( - document_string, long_context_llm, doc_meta - ) - - embedding = embed_text(document_string) - chunks = await create_document_chunks(document_string) - - parent_dir = str(Path(relative_path).parent) - if parent_dir == ".": - parent_dir = "" - folder_id = folder_mapping.get(parent_dir, folder_mapping.get("")) - - document.title = title - document.content = document_string - document.content_hash = content_hash - document.source_markdown = content - document.embedding = embedding - document.document_metadata = { - "folder_name": folder_name, - "file_path": relative_path, - "summary": summary_content, - "mtime": file_info["modified_at"].timestamp(), - } - document.folder_id = folder_id - await safe_set_chunks(session, document, chunks) - document.updated_at = get_current_timestamp() - document.status = DocumentStatus.ready() - - indexed_count += 1 - - if indexed_count % 10 == 0: - await session.commit() - - except Exception as e: - logger.exception(f"Phase 2 error for {item.get('relative_path')}: {e}") - try: - await session.rollback() - except Exception: - pass - try: - document.status = DocumentStatus.failed(str(e)[:500]) - document.updated_at = get_current_timestamp() - await session.commit() - except Exception: - try: - await session.rollback() - except Exception: - pass - failed_count += 1 # Cleanup empty folders existing_dirs = set() @@ -846,8 +778,6 @@ async def index_local_folder( raise warning_parts = [] - if duplicate_count > 0: - warning_parts.append(f"{duplicate_count} duplicate") if failed_count > 0: warning_parts.append(f"{failed_count} failed") warning_message = ", ".join(warning_parts) if warning_parts else None @@ -859,7 +789,6 @@ async def index_local_folder( "indexed": indexed_count, "skipped": skipped_count, "failed": failed_count, - "duplicates": duplicate_count, }, ) @@ -899,8 +828,8 @@ async def _index_single_file( if not full_path.exists(): rel = str(full_path.relative_to(folder_path)) unique_id = f"{folder_name}:{rel}" - uid_hash = generate_unique_identifier_hash( - DocumentType.LOCAL_FOLDER_FILE, unique_id, search_space_id + uid_hash = compute_identifier_hash( + DocumentType.LOCAL_FOLDER_FILE.value, unique_id, search_space_id ) existing = await check_document_by_unique_identifier(session, uid_hash) if existing: @@ -918,8 +847,8 @@ async def _index_single_file( rel_path = str(full_path.relative_to(folder_path)) unique_id = f"{folder_name}:{rel_path}" - uid_hash = generate_unique_identifier_hash( - DocumentType.LOCAL_FOLDER_FILE, unique_id, search_space_id + uid_hash = compute_identifier_hash( + DocumentType.LOCAL_FOLDER_FILE.value, unique_id, search_space_id ) try: @@ -945,83 +874,51 @@ async def _index_single_file( await create_version_snapshot(session, existing) - long_context_llm = await get_user_long_context_llm( - session, user_id, search_space_id - ) - - title = full_path.name mtime = full_path.stat().st_mtime - metadata_sections = [ - ("METADATA", [f"Title: {title}", f"Folder: {folder_name}", f"Path: {rel_path}"]), - ("CONTENT", [content]), - ] - document_string = build_document_metadata_string(metadata_sections) + connector_doc = _build_connector_doc( + title=full_path.name, + content=content, + relative_path=rel_path, + folder_name=folder_name, + search_space_id=search_space_id, + user_id=user_id, + enable_summary=enable_summary, + ) - summary_content = "" - if long_context_llm and enable_summary: - summary_content, _ = await generate_document_summary( - document_string, long_context_llm, {"folder_name": folder_name, "file_path": rel_path} - ) + pipeline = IndexingPipelineService(session) + llm = await get_user_long_context_llm(session, user_id, search_space_id) + documents = await pipeline.prepare_for_indexing([connector_doc]) - embedding = embed_text(document_string) - chunks = await create_document_chunks(document_string) + if not documents: + return 0, 1, None - doc_metadata = { - "folder_name": folder_name, - "file_path": rel_path, - "summary": summary_content, - "mtime": mtime, - } + db_doc = documents[0] + await pipeline.index(db_doc, connector_doc, llm) + # Post-pipeline: assign folder_id and mtime + await session.refresh(db_doc) folder_id = None if root_folder_id: folder_id = await _resolve_folder_for_file( session, rel_path, root_folder_id, search_space_id, user_id ) - - if existing: - existing.title = title - existing.content = document_string - existing.content_hash = content_hash - existing.source_markdown = content - existing.embedding = embedding - existing.document_metadata = doc_metadata - existing.folder_id = folder_id - await safe_set_chunks(session, existing, chunks) - existing.updated_at = get_current_timestamp() - existing.status = DocumentStatus.ready() - else: - document = Document( - search_space_id=search_space_id, - title=title, - document_type=DocumentType.LOCAL_FOLDER_FILE, - document_metadata=doc_metadata, - content=document_string, - content_hash=content_hash, - unique_identifier_hash=uid_hash, - source_markdown=content, - embedding=embedding, - status=DocumentStatus.ready(), - updated_at=get_current_timestamp(), - created_by_id=user_id, - connector_id=None, - folder_id=folder_id, - ) - session.add(document) - await session.flush() - for chunk in chunks: - chunk.document_id = document.id - session.add_all(chunks) - + db_doc.folder_id = folder_id + doc_meta = dict(db_doc.document_metadata or {}) + doc_meta["mtime"] = mtime + db_doc.document_metadata = doc_meta await session.commit() - await task_logger.log_task_success( - log_entry, - f"Single file indexed: {rel_path}", - {"file": rel_path}, - ) - return 1, 0, None + indexed = 1 if DocumentStatus.is_state(db_doc.status, DocumentStatus.READY) else 0 + failed_msg = None if indexed else "Indexing failed" + + if indexed: + await task_logger.log_task_success( + log_entry, + f"Single file indexed: {rel_path}", + {"file": rel_path}, + ) + return indexed, 0 if indexed else 1, failed_msg except Exception as e: logger.exception(f"Error indexing single file {target_file_path}: {e}") diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py index 110aa6caf..154cc6e0e 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py @@ -1,8 +1,7 @@ -"""Integration tests for local folder indexer — Tier 3 (I1-I5), Tier 4 (F1-F5), Tier 5 (P1).""" +"""Integration tests for local folder indexer — Tier 3 (I1-I5), Tier 4 (F1-F7), Tier 5 (P1).""" import os from pathlib import Path -from unittest.mock import AsyncMock, MagicMock import pytest from sqlalchemy import func, select @@ -18,41 +17,11 @@ from app.db import ( User, ) -import app.tasks.connector_indexers.local_folder_indexer as _lfi_mod - pytestmark = pytest.mark.integration - -@pytest.fixture -def patched_self_hosted(monkeypatch): - _cfg = type("_Cfg", (), {"is_self_hosted": staticmethod(lambda: True)})() - monkeypatch.setattr(_lfi_mod, "config", _cfg) - - -@pytest.fixture -def patched_embed_for_indexer(monkeypatch): - from app.config import config as app_config - dim = app_config.embedding_model_instance.dimension - mock = MagicMock(return_value=[0.1] * dim) - monkeypatch.setattr(_lfi_mod, "embed_text", mock) - return mock - - -@pytest.fixture -def patched_chunks_for_indexer(monkeypatch): - from app.db import Chunk - from app.config import config as app_config - dim = app_config.embedding_model_instance.dimension - - async def mock_create_chunks(text): - return [Chunk(content="chunk", embedding=[0.1] * dim)] - - monkeypatch.setattr(_lfi_mod, "create_document_chunks", mock_create_chunks) - - -@pytest.fixture -def patched_summary_for_indexer(monkeypatch): - monkeypatch.setattr(_lfi_mod, "get_user_long_context_llm", AsyncMock(return_value=None)) +UNIFIED_FIXTURES = ( + "patched_summarize", "patched_embed_texts", "patched_chunk_text", +) # ==================================================================== @@ -62,12 +31,7 @@ def patched_summary_for_indexer(monkeypatch): class TestFullIndexer: - @pytest.mark.usefixtures( - "patched_self_hosted", - "patched_embed_for_indexer", - "patched_chunks_for_indexer", - "patched_summary_for_indexer", - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_i1_new_file_indexed( self, db_session: AsyncSession, @@ -103,12 +67,7 @@ class TestFullIndexer: assert docs[0].document_type == DocumentType.LOCAL_FOLDER_FILE assert DocumentStatus.is_state(docs[0].status, DocumentStatus.READY) - @pytest.mark.usefixtures( - "patched_self_hosted", - "patched_embed_for_indexer", - "patched_chunks_for_indexer", - "patched_summary_for_indexer", - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_i2_unchanged_skipped( self, db_session: AsyncSession, @@ -130,7 +89,6 @@ class TestFullIndexer: ) assert count1 == 1 - # Second run — unchanged, pass root_folder_id from first run count2, _, _, _ = await index_local_folder( session=db_session, search_space_id=db_search_space.id, @@ -151,12 +109,7 @@ class TestFullIndexer: ).scalar_one() assert total == 1 - @pytest.mark.usefixtures( - "patched_self_hosted", - "patched_embed_for_indexer", - "patched_chunks_for_indexer", - "patched_summary_for_indexer", - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_i3_changed_reindexed( self, db_session: AsyncSession, @@ -178,9 +131,7 @@ class TestFullIndexer: folder_name="test-folder", ) - # Modify f.write_text("# Version 2\n\nUpdated.") - # Touch mtime to ensure it's detected as different os.utime(f, (f.stat().st_atime + 10, f.stat().st_mtime + 10)) count, _, _, _ = await index_local_folder( @@ -193,7 +144,6 @@ class TestFullIndexer: ) assert count == 1 - # Should have a version snapshot versions = ( await db_session.execute( select(DocumentVersion).join(Document).where( @@ -204,12 +154,7 @@ class TestFullIndexer: ).scalars().all() assert len(versions) >= 1 - @pytest.mark.usefixtures( - "patched_self_hosted", - "patched_embed_for_indexer", - "patched_chunks_for_indexer", - "patched_summary_for_indexer", - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_i4_deleted_removed( self, db_session: AsyncSession, @@ -262,12 +207,7 @@ class TestFullIndexer: ).scalar_one() assert docs_after == 0 - @pytest.mark.usefixtures( - "patched_self_hosted", - "patched_embed_for_indexer", - "patched_chunks_for_indexer", - "patched_summary_for_indexer", - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_i5_single_file_mode( self, db_session: AsyncSession, @@ -305,18 +245,13 @@ class TestFullIndexer: # ==================================================================== -# Tier 4: Folder Mirroring (F1-F5) +# Tier 4: Folder Mirroring (F1-F7) # ==================================================================== class TestFolderMirroring: - @pytest.mark.usefixtures( - "patched_self_hosted", - "patched_embed_for_indexer", - "patched_chunks_for_indexer", - "patched_summary_for_indexer", - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_f1_root_folder_created( self, db_session: AsyncSession, @@ -344,12 +279,7 @@ class TestFolderMirroring: ).scalar_one() assert root_folder.name == "test-folder" - @pytest.mark.usefixtures( - "patched_self_hosted", - "patched_embed_for_indexer", - "patched_chunks_for_indexer", - "patched_summary_for_indexer", - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_f2_nested_folder_rows( self, db_session: AsyncSession, @@ -393,12 +323,7 @@ class TestFolderMirroring: assert daily_folder.parent_id == notes_folder.id assert weekly_folder.parent_id == notes_folder.id - @pytest.mark.usefixtures( - "patched_self_hosted", - "patched_embed_for_indexer", - "patched_chunks_for_indexer", - "patched_summary_for_indexer", - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_f3_resync_reuses_folders( self, db_session: AsyncSession, @@ -428,7 +353,6 @@ class TestFolderMirroring: ).scalars().all() ids_before = {f.id for f in folders_before} - # Re-sync with root_folder_id from first run await index_local_folder( session=db_session, search_space_id=db_search_space.id, @@ -447,12 +371,7 @@ class TestFolderMirroring: assert ids_before == ids_after - @pytest.mark.usefixtures( - "patched_self_hosted", - "patched_embed_for_indexer", - "patched_chunks_for_indexer", - "patched_summary_for_indexer", - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_f4_folder_id_assigned( self, db_session: AsyncSession, @@ -496,15 +415,9 @@ class TestFolderMirroring: assert today_doc.folder_id == daily_folder.id - # Root doc should be in the root folder assert root_doc.folder_id == root_folder_id - @pytest.mark.usefixtures( - "patched_self_hosted", - "patched_embed_for_indexer", - "patched_chunks_for_indexer", - "patched_summary_for_indexer", - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_f5_empty_folder_cleanup( self, db_session: AsyncSession, @@ -531,7 +444,6 @@ class TestFolderMirroring: folder_name="test-folder", ) - # Verify weekly folder exists weekly_folder = ( await db_session.execute( select(Folder).where(Folder.name == "weekly") @@ -539,7 +451,6 @@ class TestFolderMirroring: ).scalar_one_or_none() assert weekly_folder is not None - # Delete weekly directory + its file shutil.rmtree(weekly) await index_local_folder( @@ -551,7 +462,6 @@ class TestFolderMirroring: root_folder_id=root_folder_id, ) - # weekly Folder should be gone (empty, dir removed) weekly_after = ( await db_session.execute( select(Folder).where(Folder.name == "weekly") @@ -559,7 +469,6 @@ class TestFolderMirroring: ).scalar_one_or_none() assert weekly_after is None - # daily should still exist daily_after = ( await db_session.execute( select(Folder).where(Folder.name == "daily") @@ -567,12 +476,7 @@ class TestFolderMirroring: ).scalar_one_or_none() assert daily_after is not None - @pytest.mark.usefixtures( - "patched_self_hosted", - "patched_embed_for_indexer", - "patched_chunks_for_indexer", - "patched_summary_for_indexer", - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_f6_single_file_creates_subfolder( self, db_session: AsyncSession, @@ -634,12 +538,7 @@ class TestFolderMirroring: assert daily_folder.parent_id == notes_folder.id assert notes_folder.parent_id == root_folder_id - @pytest.mark.usefixtures( - "patched_self_hosted", - "patched_embed_for_indexer", - "patched_chunks_for_indexer", - "patched_summary_for_indexer", - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_f7_single_file_delete_cleans_empty_folders( self, db_session: AsyncSession, @@ -705,9 +604,7 @@ class TestFolderMirroring: class TestPipelineIntegration: - @pytest.mark.usefixtures( - "patched_summarize", "patched_embed_texts", "patched_chunk_text" - ) + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_p1_local_folder_file_through_pipeline( self, db_session: AsyncSession, From 25358fddcf17ff41c2f02a534bf0218eef96701f Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 2 Apr 2026 23:46:21 +0530 Subject: [PATCH 31/61] feat: implement local folder synchronization and versioning with new metadata handling and document_versions table --- ...7_add_local_folder_sync_and_versioning.py} | 25 +++++- surfsense_backend/app/db.py | 1 + .../app/routes/documents_routes.py | 42 ++++++++++ .../app/routes/folders_routes.py | 27 +++++++ surfsense_backend/app/schemas/folders.py | 3 + .../components/documents/FolderNode.tsx | 24 ++++-- .../components/documents/FolderTreeView.tsx | 11 ++- .../layout/ui/sidebar/DocumentsSidebar.tsx | 79 +++++++++++++++++-- surfsense_web/contracts/types/folder.types.ts | 1 + .../lib/apis/documents-api.service.ts | 5 ++ surfsense_web/lib/apis/folders-api.service.ts | 4 + 11 files changed, 205 insertions(+), 17 deletions(-) rename surfsense_backend/alembic/versions/{117_add_local_folder_connector_and_versioning.py => 117_add_local_folder_sync_and_versioning.py} (82%) diff --git a/surfsense_backend/alembic/versions/117_add_local_folder_connector_and_versioning.py b/surfsense_backend/alembic/versions/117_add_local_folder_sync_and_versioning.py similarity index 82% rename from surfsense_backend/alembic/versions/117_add_local_folder_connector_and_versioning.py rename to surfsense_backend/alembic/versions/117_add_local_folder_sync_and_versioning.py index a9da3beb4..e322a608d 100644 --- a/surfsense_backend/alembic/versions/117_add_local_folder_connector_and_versioning.py +++ b/surfsense_backend/alembic/versions/117_add_local_folder_sync_and_versioning.py @@ -1,4 +1,4 @@ -"""Add LOCAL_FOLDER_FILE document type and document_versions table +"""Add LOCAL_FOLDER_FILE document type, folder metadata, and document_versions table Revision ID: 117 Revises: 116 @@ -38,6 +38,19 @@ def upgrade() -> None: """ ) + # Add JSONB metadata column to folders table + col_exists = conn.execute( + sa.text( + "SELECT 1 FROM information_schema.columns " + "WHERE table_name = 'folders' AND column_name = 'metadata'" + ) + ).fetchone() + if not col_exists: + op.add_column( + "folders", + sa.Column("metadata", sa.dialects.postgresql.JSONB, nullable=True), + ) + # Create document_versions table table_exists = conn.execute( sa.text( @@ -124,3 +137,13 @@ def downgrade() -> None: op.execute("DROP INDEX IF EXISTS ix_document_versions_created_at") op.execute("DROP INDEX IF EXISTS ix_document_versions_document_id") op.execute("DROP TABLE IF EXISTS document_versions") + + # Drop metadata column from folders + col_exists = conn.execute( + sa.text( + "SELECT 1 FROM information_schema.columns " + "WHERE table_name = 'folders' AND column_name = 'metadata'" + ) + ).fetchone() + if col_exists: + op.drop_column("folders", "metadata") diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 1a4d3ea06..077b7daa6 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -956,6 +956,7 @@ class Folder(BaseModel, TimestampMixin): onupdate=lambda: datetime.now(UTC), index=True, ) + folder_metadata = Column("metadata", JSONB, nullable=True) parent = relationship("Folder", remote_side="Folder.id", backref="children") search_space = relationship("SearchSpace", back_populates="folders") diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index 05221b192..81bbb1477 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -1310,6 +1310,13 @@ async def folder_index( "You don't have permission to create documents in this search space", ) + watched_metadata = { + "watched": True, + "folder_path": request.folder_path, + "exclude_patterns": request.exclude_patterns, + "file_extensions": request.file_extensions, + } + root_folder_id = request.root_folder_id if root_folder_id: existing = ( @@ -1319,6 +1326,9 @@ async def folder_index( ).scalar_one_or_none() if not existing: root_folder_id = None + else: + existing.folder_metadata = watched_metadata + await session.commit() if not root_folder_id: root_folder = Folder( @@ -1326,6 +1336,7 @@ async def folder_index( search_space_id=request.search_space_id, created_by_id=str(user.id), position="a0", + folder_metadata=watched_metadata, ) session.add(root_folder) await session.flush() @@ -1403,3 +1414,34 @@ async def folder_index_file( "message": "File indexing started", "status": "processing", } + + +@router.get("/documents/watched-folders", response_model=list["FolderRead"]) +async def get_watched_folders( + search_space_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Return root folders that are marked as watched (metadata->>'watched' = 'true').""" + from app.schemas import FolderRead # noqa: F811 + + await check_permission( + session, + user, + search_space_id, + Permission.DOCUMENTS_READ.value, + "You don't have permission to read documents in this search space", + ) + + folders = ( + await session.execute( + select(Folder).where( + Folder.search_space_id == search_space_id, + Folder.parent_id.is_(None), + Folder.folder_metadata.isnot(None), + Folder.folder_metadata["watched"].astext == "true", + ) + ) + ).scalars().all() + + return folders diff --git a/surfsense_backend/app/routes/folders_routes.py b/surfsense_backend/app/routes/folders_routes.py index d688e692a..6e524d4a4 100644 --- a/surfsense_backend/app/routes/folders_routes.py +++ b/surfsense_backend/app/routes/folders_routes.py @@ -192,6 +192,33 @@ async def get_folder_breadcrumb( ) from e +@router.patch("/folders/{folder_id}/watched") +async def stop_watching_folder( + folder_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Clear the watched flag from a folder's metadata.""" + folder = await session.get(Folder, folder_id) + if not folder: + raise HTTPException(status_code=404, detail="Folder not found") + + await check_permission( + session, + user, + folder.search_space_id, + Permission.DOCUMENTS_UPDATE.value, + "You don't have permission to update folders in this search space", + ) + + if folder.folder_metadata and isinstance(folder.folder_metadata, dict): + updated = {**folder.folder_metadata, "watched": False} + folder.folder_metadata = updated + await session.commit() + + return {"message": "Folder watch status updated"} + + @router.put("/folders/{folder_id}", response_model=FolderRead) async def update_folder( folder_id: int, diff --git a/surfsense_backend/app/schemas/folders.py b/surfsense_backend/app/schemas/folders.py index 263817182..e8bdf3821 100644 --- a/surfsense_backend/app/schemas/folders.py +++ b/surfsense_backend/app/schemas/folders.py @@ -3,6 +3,8 @@ from datetime import datetime from uuid import UUID +from typing import Any + from pydantic import BaseModel, ConfigDict, Field @@ -34,6 +36,7 @@ class FolderRead(BaseModel): created_by_id: UUID | None created_at: datetime updated_at: datetime + metadata: dict[str, Any] | None = Field(default=None, validation_alias="folder_metadata") model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_web/components/documents/FolderNode.tsx b/surfsense_web/components/documents/FolderNode.tsx index 1521c06fe..6780bd1e5 100644 --- a/surfsense_web/components/documents/FolderNode.tsx +++ b/surfsense_web/components/documents/FolderNode.tsx @@ -76,6 +76,7 @@ interface FolderNodeProps { isWatched?: boolean; onRescan?: (folder: FolderDisplay) => void; onStopWatching?: (folder: FolderDisplay) => void; + onViewMetadata?: (folder: FolderDisplay) => void; } function getDropZone( @@ -116,6 +117,7 @@ export const FolderNode = React.memo(function FolderNode({ isWatched, onRescan, onStopWatching, + onViewMetadata, }: FolderNodeProps) { const [renameValue, setRenameValue] = useState(folder.name); const inputRef = useRef(null); @@ -251,13 +253,21 @@ export const FolderNode = React.memo(function FolderNode({ isOver && !canDrop && "cursor-not-allowed" )} style={{ paddingLeft: `${depth * 16 + 4}px` }} - onClick={() => onToggleExpand(folder.id)} - onKeyDown={(e) => { - if (e.key === "Enter" || e.key === " ") { - e.preventDefault(); - onToggleExpand(folder.id); - } - }} + onClick={(e) => { + if ((e.ctrlKey || e.metaKey) && onViewMetadata) { + e.preventDefault(); + e.stopPropagation(); + onViewMetadata(folder); + return; + } + onToggleExpand(folder.id); + }} + onKeyDown={(e) => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + onToggleExpand(folder.id); + } + }} onDoubleClick={(e) => { e.stopPropagation(); startRename(); diff --git a/surfsense_web/components/documents/FolderTreeView.tsx b/surfsense_web/components/documents/FolderTreeView.tsx index 5945edccb..f34b9a0c2 100644 --- a/surfsense_web/components/documents/FolderTreeView.tsx +++ b/surfsense_web/components/documents/FolderTreeView.tsx @@ -43,6 +43,7 @@ interface FolderTreeViewProps { watchedFolderIds?: Set; onRescanFolder?: (folder: FolderDisplay) => void; onStopWatchingFolder?: (folder: FolderDisplay) => void; + onViewFolderMetadata?: (folder: FolderDisplay) => void; } function groupBy(items: T[], keyFn: (item: T) => string | number): Record { @@ -79,6 +80,7 @@ export function FolderTreeView({ watchedFolderIds, onRescanFolder, onStopWatchingFolder, + onViewFolderMetadata, }: FolderTreeViewProps) { const foldersByParent = useMemo(() => groupBy(folders, (f) => f.parentId ?? "root"), [folders]); @@ -210,10 +212,11 @@ export function FolderTreeView({ siblingPositions={siblingPositions} contextMenuOpen={openContextMenuId === `folder-${f.id}`} onContextMenuOpenChange={(open) => setOpenContextMenuId(open ? `folder-${f.id}` : null)} - isWatched={watchedFolderIds?.has(f.id)} - onRescan={onRescanFolder} - onStopWatching={onStopWatchingFolder} - /> + isWatched={watchedFolderIds?.has(f.id)} + onRescan={onRescanFolder} + onStopWatching={onStopWatchingFolder} + onViewMetadata={onViewFolderMetadata} + /> ); if (isExpanded) { diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index 202d170d9..f9d32bf98 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -21,6 +21,7 @@ import type { DocumentNodeDoc } from "@/components/documents/DocumentNode"; import type { FolderDisplay } from "@/components/documents/FolderNode"; import { FolderPickerDialog } from "@/components/documents/FolderPickerDialog"; import { FolderTreeView } from "@/components/documents/FolderTreeView"; +import { JsonMetadataViewer } from "@/components/json-metadata-viewer"; import { EXPORT_FILE_EXTENSIONS } from "@/components/shared/ExportMenuItems"; import { AlertDialog, @@ -95,12 +96,46 @@ export function DocumentsSidebar({ const [activeTypes, setActiveTypes] = useState([]); const [watchedFolderIds, setWatchedFolderIds] = useState>(new Set()); + const [metadataFolder, setMetadataFolder] = useState(null); + const [metadataJson, setMetadataJson] = useState | null>(null); + const [metadataLoading, setMetadataLoading] = useState(false); + useEffect(() => { const api = typeof window !== "undefined" ? window.electronAPI : null; if (!api?.getWatchedFolders) return; async function loadWatchedIds() { const folders = await api!.getWatchedFolders(); + + if (folders.length === 0) { + try { + const backendFolders = await documentsApiService.getWatchedFolders(searchSpaceId); + for (const bf of backendFolders) { + const meta = bf.metadata as Record | null; + if (!meta?.watched || !meta.folder_path) continue; + await api!.addWatchedFolder({ + path: meta.folder_path as string, + name: bf.name, + rootFolderId: bf.id, + searchSpaceId: bf.search_space_id, + excludePatterns: (meta.exclude_patterns as string[]) ?? [], + fileExtensions: (meta.file_extensions as string[] | null) ?? null, + active: true, + }); + } + const recovered = await api!.getWatchedFolders(); + const ids = new Set( + recovered + .filter((f) => f.rootFolderId != null) + .map((f) => f.rootFolderId as number) + ); + setWatchedFolderIds(ids); + return; + } catch (err) { + console.error("[DocumentsSidebar] Recovery from backend failed:", err); + } + } + const ids = new Set( folders .filter((f) => f.rootFolderId != null) @@ -110,7 +145,7 @@ export function DocumentsSidebar({ } loadWatchedIds(); - }, []); + }, [searchSpaceId]); const { mutateAsync: deleteDocumentMutation } = useAtomValue(deleteDocumentMutationAtom); const [sidebarDocs, setSidebarDocs] = useAtom(sidebarSelectedDocumentsAtom); @@ -318,11 +353,30 @@ export function DocumentsSidebar({ } await api.removeWatchedFolder(matched.path); + try { + await foldersApiService.stopWatching(folder.id); + } catch (err) { + console.error("[DocumentsSidebar] Failed to clear watched metadata:", err); + } toast.success(`Stopped watching: ${matched.name}`); }, [] ); + const handleViewFolderMetadata = useCallback(async (folder: FolderDisplay) => { + setMetadataFolder(folder); + setMetadataLoading(true); + try { + const fullFolder = await foldersApiService.getFolder(folder.id); + setMetadataJson((fullFolder.metadata as Record) ?? {}); + } catch (err) { + console.error("[DocumentsSidebar] Failed to fetch folder metadata:", err); + setMetadataJson({ error: "Failed to load folder metadata" }); + } finally { + setMetadataLoading(false); + } + }, []); + const handleRenameFolder = useCallback(async (folder: FolderDisplay, newName: string) => { try { await foldersApiService.updateFolder(folder.id, { name: newName }); @@ -801,11 +855,26 @@ export function DocumentsSidebar({ onReorderFolder={handleReorderFolder} watchedFolderIds={watchedFolderIds} onRescanFolder={handleRescanFolder} - onStopWatchingFolder={handleStopWatching} - /> -
+ onStopWatchingFolder={handleStopWatching} + onViewFolderMetadata={handleViewFolderMetadata} + /> +
- { + if (!open) { + setMetadataFolder(null); + setMetadataJson(null); + setMetadataLoading(false); + } + }} + /> + + { + return baseApiService.get(`/api/v1/documents/watched-folders?search_space_id=${searchSpaceId}`, folderListResponse); + }; + /** * Delete a document */ diff --git a/surfsense_web/lib/apis/folders-api.service.ts b/surfsense_web/lib/apis/folders-api.service.ts index 99d9ad774..2e535d615 100644 --- a/surfsense_web/lib/apis/folders-api.service.ts +++ b/surfsense_web/lib/apis/folders-api.service.ts @@ -85,6 +85,10 @@ class FoldersApiService { return baseApiService.delete(`/api/v1/folders/${folderId}`, folderDeleteResponse); }; + stopWatching = async (folderId: number) => { + return baseApiService.patch(`/api/v1/folders/${folderId}/watched`, undefined); + }; + moveDocument = async (documentId: number, request: DocumentMoveRequest) => { const parsed = documentMoveRequest.safeParse(request); if (!parsed.success) { From f0a7c7134a7e81a8ee202e854afbfc98d9ad182a Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 00:09:17 +0530 Subject: [PATCH 32/61] feat: move endpoint above to retrieve watched folders based on search space ID --- .../app/routes/documents_routes.py | 60 ++++++++++--------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index 81bbb1477..0acc1d30b 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -953,6 +953,37 @@ async def get_document_by_chunk_id( ) from e +@router.get("/documents/watched-folders", response_model=list["FolderRead"]) +async def get_watched_folders( + search_space_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Return root folders that are marked as watched (metadata->>'watched' = 'true').""" + from app.schemas import FolderRead # noqa: F811 + + await check_permission( + session, + user, + search_space_id, + Permission.DOCUMENTS_READ.value, + "You don't have permission to read documents in this search space", + ) + + folders = ( + await session.execute( + select(Folder).where( + Folder.search_space_id == search_space_id, + Folder.parent_id.is_(None), + Folder.folder_metadata.isnot(None), + Folder.folder_metadata["watched"].astext == "true", + ) + ) + ).scalars().all() + + return folders + + @router.get("/documents/{document_id}", response_model=DocumentRead) async def read_document( document_id: int, @@ -1416,32 +1447,3 @@ async def folder_index_file( } -@router.get("/documents/watched-folders", response_model=list["FolderRead"]) -async def get_watched_folders( - search_space_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """Return root folders that are marked as watched (metadata->>'watched' = 'true').""" - from app.schemas import FolderRead # noqa: F811 - - await check_permission( - session, - user, - search_space_id, - Permission.DOCUMENTS_READ.value, - "You don't have permission to read documents in this search space", - ) - - folders = ( - await session.execute( - select(Folder).where( - Folder.search_space_id == search_space_id, - Folder.parent_id.is_(None), - Folder.folder_metadata.isnot(None), - Folder.folder_metadata["watched"].astext == "true", - ) - ) - ).scalars().all() - - return folders From b46c5532b3fb02c3fd7277021d128e4f2f8a3180 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 00:28:24 +0530 Subject: [PATCH 33/61] feat: add unified file and folder browsing functionality with IPC channel integration --- surfsense_desktop/src/ipc/channels.ts | 2 + surfsense_desktop/src/ipc/handlers.ts | 8 + .../src/modules/folder-watcher.ts | 68 +++++ surfsense_desktop/src/preload.ts | 4 + .../(manage)/components/DocumentsFilters.tsx | 23 +- .../layout/ui/sidebar/DocumentsSidebar.tsx | 51 +--- .../components/sources/DocumentUploadTab.tsx | 284 ++++++++++++++---- surfsense_web/types/window.d.ts | 15 + 8 files changed, 335 insertions(+), 120 deletions(-) diff --git a/surfsense_desktop/src/ipc/channels.ts b/surfsense_desktop/src/ipc/channels.ts index 66788d90e..19c26607d 100644 --- a/surfsense_desktop/src/ipc/channels.ts +++ b/surfsense_desktop/src/ipc/channels.ts @@ -17,4 +17,6 @@ export const IPC_CHANNELS = { FOLDER_SYNC_PAUSE: 'folder-sync:pause', FOLDER_SYNC_RESUME: 'folder-sync:resume', FOLDER_SYNC_RENDERER_READY: 'folder-sync:renderer-ready', + BROWSE_FILE_OR_FOLDER: 'browse:file-or-folder', + READ_LOCAL_FILES: 'browse:read-local-files', } as const; diff --git a/surfsense_desktop/src/ipc/handlers.ts b/surfsense_desktop/src/ipc/handlers.ts index 19051e871..246f0f6ac 100644 --- a/surfsense_desktop/src/ipc/handlers.ts +++ b/surfsense_desktop/src/ipc/handlers.ts @@ -9,6 +9,8 @@ import { pauseWatcher, resumeWatcher, markRendererReady, + browseFileOrFolder, + readLocalFiles, } from '../modules/folder-watcher'; export function registerIpcHandlers(): void { @@ -49,4 +51,10 @@ export function registerIpcHandlers(): void { ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_RENDERER_READY, () => { markRendererReady(); }); + + ipcMain.handle(IPC_CHANNELS.BROWSE_FILE_OR_FOLDER, () => browseFileOrFolder()); + + ipcMain.handle(IPC_CHANNELS.READ_LOCAL_FILES, (_event, paths: string[]) => + readLocalFiles(paths) + ); } diff --git a/surfsense_desktop/src/modules/folder-watcher.ts b/surfsense_desktop/src/modules/folder-watcher.ts index 81a835c22..1324858a0 100644 --- a/surfsense_desktop/src/modules/folder-watcher.ts +++ b/surfsense_desktop/src/modules/folder-watcher.ts @@ -391,3 +391,71 @@ export async function unregisterFolderWatcher(): Promise { } watchers.clear(); } + +export interface BrowseResult { + type: 'files' | 'folder'; + paths: string[]; +} + +export async function browseFileOrFolder(): Promise { + const result = await dialog.showOpenDialog({ + properties: ['openFile', 'openDirectory', 'multiSelections'], + title: 'Select files or a folder', + }); + if (result.canceled || result.filePaths.length === 0) return null; + + const stat = fs.statSync(result.filePaths[0]); + if (stat.isDirectory()) { + return { type: 'folder', paths: [result.filePaths[0]] }; + } + return { type: 'files', paths: result.filePaths }; +} + +const MIME_MAP: Record = { + '.pdf': 'application/pdf', + '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + '.html': 'text/html', '.htm': 'text/html', + '.csv': 'text/csv', + '.txt': 'text/plain', + '.md': 'text/markdown', '.markdown': 'text/markdown', + '.mp3': 'audio/mpeg', '.mpeg': 'audio/mpeg', '.mpga': 'audio/mpeg', + '.mp4': 'audio/mp4', '.m4a': 'audio/mp4', + '.wav': 'audio/wav', + '.webm': 'audio/webm', + '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', + '.png': 'image/png', + '.bmp': 'image/bmp', + '.webp': 'image/webp', + '.tiff': 'image/tiff', + '.doc': 'application/msword', + '.rtf': 'application/rtf', + '.xml': 'application/xml', + '.epub': 'application/epub+zip', + '.xls': 'application/vnd.ms-excel', + '.ppt': 'application/vnd.ms-powerpoint', + '.eml': 'message/rfc822', + '.odt': 'application/vnd.oasis.opendocument.text', + '.msg': 'application/vnd.ms-outlook', +}; + +export interface LocalFileData { + name: string; + data: ArrayBuffer; + mimeType: string; + size: number; +} + +export function readLocalFiles(filePaths: string[]): LocalFileData[] { + return filePaths.map((p) => { + const buf = fs.readFileSync(p); + const ext = path.extname(p).toLowerCase(); + return { + name: path.basename(p), + data: buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength), + mimeType: MIME_MAP[ext] || 'application/octet-stream', + size: buf.byteLength, + }; + }); +} diff --git a/surfsense_desktop/src/preload.ts b/surfsense_desktop/src/preload.ts index 7c190db10..08ca87f8f 100644 --- a/surfsense_desktop/src/preload.ts +++ b/surfsense_desktop/src/preload.ts @@ -45,4 +45,8 @@ contextBridge.exposeInMainWorld('electronAPI', { pauseWatcher: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_PAUSE), resumeWatcher: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_RESUME), signalRendererReady: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_RENDERER_READY), + + // Unified browse (files + folders) + browseFileOrFolder: () => ipcRenderer.invoke(IPC_CHANNELS.BROWSE_FILE_OR_FOLDER), + readLocalFiles: (paths: string[]) => ipcRenderer.invoke(IPC_CHANNELS.READ_LOCAL_FILES, paths), }); diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx index fcd3a39da..150c119de 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters.tsx @@ -1,6 +1,6 @@ "use client"; -import { Eye, FolderPlus, ListFilter, Search, Upload, X } from "lucide-react"; +import { FolderPlus, ListFilter, Search, Upload, X } from "lucide-react"; import { useTranslations } from "next-intl"; import React, { useCallback, useMemo, useRef, useState } from "react"; import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup"; @@ -19,7 +19,6 @@ export function DocumentsFilters({ onToggleType, activeTypes, onCreateFolder, - onWatchFolder, }: { typeCounts: Partial>; onSearch: (v: string) => void; @@ -27,7 +26,6 @@ export function DocumentsFilters({ onToggleType: (type: DocumentTypeEnum, checked: boolean) => void; activeTypes: DocumentTypeEnum[]; onCreateFolder?: () => void; - onWatchFolder?: () => void; }) { const t = useTranslations("documents"); const id = React.useId(); @@ -216,24 +214,7 @@ export function DocumentsFilters({ )} - {/* Watch Folder Button (desktop only) */} - {onWatchFolder && ( - - - - - Watch folder - - )} - - {/* Upload Button */} + {/* Upload Button */} + ) : ( + )} +
+ )} +
+ + + + {selectedFolder && ( + + +
+
+ +
+ + {selectedFolder.name} + + + {selectedFolder.path} +
- )} +
+
+
+ +
+ + +
+ + + +
+ )} {files.length > 0 && ( diff --git a/surfsense_web/types/window.d.ts b/surfsense_web/types/window.d.ts index b399664d6..826a575c7 100644 --- a/surfsense_web/types/window.d.ts +++ b/surfsense_web/types/window.d.ts @@ -26,6 +26,18 @@ interface FolderSyncWatcherReadyEvent { folderPath: string; } +interface BrowseResult { + type: "files" | "folder"; + paths: string[]; +} + +interface LocalFileData { + name: string; + data: ArrayBuffer; + mimeType: string; + size: number; +} + interface ElectronAPI { versions: { electron: string; @@ -51,6 +63,9 @@ interface ElectronAPI { pauseWatcher: () => Promise; resumeWatcher: () => Promise; signalRendererReady: () => Promise; + // Unified browse + browseFileOrFolder: () => Promise; + readLocalFiles: (paths: string[]) => Promise; } declare global { From e0b35cfbabe43add555771e19166558376b30ff7 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 00:40:49 +0530 Subject: [PATCH 34/61] feat: implement pending file event handling using durable queue with acknowledgment support in folder synchronization --- surfsense_desktop/src/ipc/channels.ts | 2 + surfsense_desktop/src/ipc/handlers.ts | 10 ++ .../src/modules/folder-watcher.ts | 111 +++++++++++++++--- surfsense_desktop/src/preload.ts | 2 + surfsense_web/hooks/use-folder-sync.ts | 82 +++++++++---- surfsense_web/types/window.d.ts | 3 + 6 files changed, 175 insertions(+), 35 deletions(-) diff --git a/surfsense_desktop/src/ipc/channels.ts b/surfsense_desktop/src/ipc/channels.ts index 19c26607d..2761960f7 100644 --- a/surfsense_desktop/src/ipc/channels.ts +++ b/surfsense_desktop/src/ipc/channels.ts @@ -17,6 +17,8 @@ export const IPC_CHANNELS = { FOLDER_SYNC_PAUSE: 'folder-sync:pause', FOLDER_SYNC_RESUME: 'folder-sync:resume', FOLDER_SYNC_RENDERER_READY: 'folder-sync:renderer-ready', + FOLDER_SYNC_GET_PENDING_EVENTS: 'folder-sync:get-pending-events', + FOLDER_SYNC_ACK_EVENTS: 'folder-sync:ack-events', BROWSE_FILE_OR_FOLDER: 'browse:file-or-folder', READ_LOCAL_FILES: 'browse:read-local-files', } as const; diff --git a/surfsense_desktop/src/ipc/handlers.ts b/surfsense_desktop/src/ipc/handlers.ts index 246f0f6ac..7194aaaff 100644 --- a/surfsense_desktop/src/ipc/handlers.ts +++ b/surfsense_desktop/src/ipc/handlers.ts @@ -6,6 +6,8 @@ import { removeWatchedFolder, getWatchedFolders, getWatcherStatus, + getPendingFileEvents, + acknowledgeFileEvents, pauseWatcher, resumeWatcher, markRendererReady, @@ -52,6 +54,14 @@ export function registerIpcHandlers(): void { markRendererReady(); }); + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_GET_PENDING_EVENTS, () => + getPendingFileEvents() + ); + + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_ACK_EVENTS, (_event, eventIds: string[]) => + acknowledgeFileEvents(eventIds) + ); + ipcMain.handle(IPC_CHANNELS.BROWSE_FILE_OR_FOLDER, () => browseFileOrFolder()); ipcMain.handle(IPC_CHANNELS.READ_LOCAL_FILES, (_event, paths: string[]) => diff --git a/surfsense_desktop/src/modules/folder-watcher.ts b/surfsense_desktop/src/modules/folder-watcher.ts index 1324858a0..9cbdd9775 100644 --- a/surfsense_desktop/src/modules/folder-watcher.ts +++ b/surfsense_desktop/src/modules/folder-watcher.ts @@ -1,5 +1,6 @@ import { BrowserWindow, dialog } from 'electron'; import chokidar, { type FSWatcher } from 'chokidar'; +import { randomUUID } from 'crypto'; import * as path from 'path'; import * as fs from 'fs'; import { IPC_CHANNELS } from '../ipc/channels'; @@ -20,12 +21,27 @@ interface WatcherEntry { } type MtimeMap = Record; +type FolderSyncAction = 'add' | 'change' | 'unlink'; + +export interface FolderSyncFileChangedEvent { + id: string; + rootFolderId: number | null; + searchSpaceId: number; + folderPath: string; + folderName: string; + relativePath: string; + fullPath: string; + action: FolderSyncAction; + timestamp: number; +} const STORE_KEY = 'watchedFolders'; +const OUTBOX_STORE_KEY = 'events'; const MTIME_TOLERANCE_S = 1.0; let store: any = null; let mtimeStore: any = null; +let outboxStore: any = null; let watchers: Map = new Map(); /** @@ -35,22 +51,11 @@ let watchers: Map = new Map(); const mtimeMaps: Map = new Map(); let rendererReady = false; -const pendingEvents: any[] = []; +const outboxEvents: Map = new Map(); +let outboxLoaded = false; export function markRendererReady() { rendererReady = true; - for (const event of pendingEvents) { - sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, event); - } - pendingEvents.length = 0; -} - -function sendFileChangedEvent(data: any) { - if (rendererReady) { - sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, data); - } else { - pendingEvents.push(data); - } } async function getStore() { @@ -77,6 +82,57 @@ async function getMtimeStore() { return mtimeStore; } +async function getOutboxStore() { + if (!outboxStore) { + const { default: Store } = await import('electron-store'); + outboxStore = new Store({ + name: 'folder-sync-outbox', + defaults: { + [OUTBOX_STORE_KEY]: [] as FolderSyncFileChangedEvent[], + }, + }); + } + return outboxStore; +} + +function makeEventKey(event: Pick): string { + return `${event.folderPath}:${event.relativePath}`; +} + +function persistOutbox() { + getOutboxStore().then((s) => { + s.set(OUTBOX_STORE_KEY, Array.from(outboxEvents.values())); + }); +} + +async function loadOutbox() { + if (outboxLoaded) return; + const s = await getOutboxStore(); + const stored: FolderSyncFileChangedEvent[] = s.get(OUTBOX_STORE_KEY, []); + outboxEvents.clear(); + for (const event of stored) { + if (!event?.id || !event.folderPath || !event.relativePath) continue; + outboxEvents.set(makeEventKey(event), event); + } + outboxLoaded = true; +} + +function sendFileChangedEvent( + data: Omit +) { + const event: FolderSyncFileChangedEvent = { + id: randomUUID(), + ...data, + }; + + outboxEvents.set(makeEventKey(event), event); + persistOutbox(); + + if (rendererReady) { + sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, event); + } +} + function loadMtimeMap(folderPath: string): MtimeMap { return mtimeMaps.get(folderPath) ?? {}; } @@ -235,7 +291,7 @@ async function startWatcher(config: WatchedFolderConfig) { }); }); - const handleFileEvent = (filePath: string, action: string) => { + const handleFileEvent = (filePath: string, action: FolderSyncAction) => { if (!ready) return; const relativePath = path.relative(config.path, filePath); @@ -357,6 +413,32 @@ export async function getWatcherStatus(): Promise< })); } +export async function getPendingFileEvents(): Promise { + await loadOutbox(); + return Array.from(outboxEvents.values()).sort((a, b) => a.timestamp - b.timestamp); +} + +export async function acknowledgeFileEvents(eventIds: string[]): Promise<{ acknowledged: number }> { + if (!eventIds || eventIds.length === 0) return { acknowledged: 0 }; + await loadOutbox(); + + const ackSet = new Set(eventIds); + let acknowledged = 0; + + for (const [key, event] of outboxEvents.entries()) { + if (ackSet.has(event.id)) { + outboxEvents.delete(key); + acknowledged += 1; + } + } + + if (acknowledged > 0) { + persistOutbox(); + } + + return { acknowledged }; +} + export async function pauseWatcher(): Promise { for (const [, entry] of watchers) { if (entry.watcher) { @@ -375,6 +457,7 @@ export async function resumeWatcher(): Promise { } export async function registerFolderWatcher(): Promise { + await loadOutbox(); const s = await getStore(); const folders: WatchedFolderConfig[] = s.get(STORE_KEY, []); diff --git a/surfsense_desktop/src/preload.ts b/surfsense_desktop/src/preload.ts index 08ca87f8f..6a2610dc8 100644 --- a/surfsense_desktop/src/preload.ts +++ b/surfsense_desktop/src/preload.ts @@ -45,6 +45,8 @@ contextBridge.exposeInMainWorld('electronAPI', { pauseWatcher: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_PAUSE), resumeWatcher: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_RESUME), signalRendererReady: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_RENDERER_READY), + getPendingFileEvents: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_GET_PENDING_EVENTS), + acknowledgeFileEvents: (eventIds: string[]) => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_ACK_EVENTS, eventIds), // Unified browse (files + folders) browseFileOrFolder: () => ipcRenderer.invoke(IPC_CHANNELS.BROWSE_FILE_OR_FOLDER), diff --git a/surfsense_web/hooks/use-folder-sync.ts b/surfsense_web/hooks/use-folder-sync.ts index f051b7df6..59c061afb 100644 --- a/surfsense_web/hooks/use-folder-sync.ts +++ b/surfsense_web/hooks/use-folder-sync.ts @@ -4,6 +4,7 @@ import { useEffect, useRef } from "react"; import { documentsApiService } from "@/lib/apis/documents-api.service"; interface FileChangedEvent { + id: string; rootFolderId: number | null; searchSpaceId: number; folderPath: string; @@ -15,25 +16,35 @@ interface FileChangedEvent { } const DEBOUNCE_MS = 2000; +interface QueueItem { + event: FileChangedEvent; + ackIds: string[]; +} export function useFolderSync() { - const queueRef = useRef([]); + const queueRef = useRef([]); const processingRef = useRef(false); const debounceTimers = useRef>>(new Map()); + const pendingByKey = useRef>(new Map()); + const isMountedRef = useRef(false); async function processQueue() { if (processingRef.current) return; processingRef.current = true; while (queueRef.current.length > 0) { - const event = queueRef.current.shift()!; + const item = queueRef.current.shift()!; try { - await documentsApiService.folderIndexFile(event.searchSpaceId, { - folder_path: event.folderPath, - folder_name: event.folderName, - search_space_id: event.searchSpaceId, - target_file_path: event.fullPath, - root_folder_id: event.rootFolderId, + await documentsApiService.folderIndexFile(item.event.searchSpaceId, { + folder_path: item.event.folderPath, + folder_name: item.event.folderName, + search_space_id: item.event.searchSpaceId, + target_file_path: item.event.fullPath, + root_folder_id: item.event.rootFolderId, }); + const api = typeof window !== "undefined" ? window.electronAPI : null; + if (api?.acknowledgeFileEvents && item.ackIds.length > 0) { + await api.acknowledgeFileEvents(item.ackIds); + } } catch (err) { console.error("[FolderSync] Failed to trigger re-index:", err); } @@ -41,34 +52,63 @@ export function useFolderSync() { processingRef.current = false; } + function enqueueWithDebounce(event: FileChangedEvent) { + const key = `${event.folderPath}:${event.relativePath}`; + const existing = pendingByKey.current.get(key); + const ackSet = new Set(existing?.ackIds ?? []); + ackSet.add(event.id); + pendingByKey.current.set(key, { + event, + ackIds: Array.from(ackSet), + }); + + const existingTimeout = debounceTimers.current.get(key); + if (existingTimeout) clearTimeout(existingTimeout); + + const timeout = setTimeout(() => { + debounceTimers.current.delete(key); + const pending = pendingByKey.current.get(key); + if (!pending) return; + pendingByKey.current.delete(key); + queueRef.current.push(pending); + processQueue(); + }, DEBOUNCE_MS); + + debounceTimers.current.set(key, timeout); + } + useEffect(() => { + isMountedRef.current = true; const api = typeof window !== "undefined" ? window.electronAPI : null; - if (!api?.onFileChanged) return; + if (!api?.onFileChanged) { + return () => { + isMountedRef.current = false; + }; + } // Signal to main process that the renderer is ready to receive events api.signalRendererReady?.(); + // Drain durable outbox first so events survive renderer startup gaps and restarts + void api.getPendingFileEvents?.().then((pendingEvents) => { + if (!isMountedRef.current || !pendingEvents?.length) return; + for (const event of pendingEvents) { + enqueueWithDebounce(event); + } + }); + const cleanup = api.onFileChanged((event: FileChangedEvent) => { - const key = `${event.folderPath}:${event.fullPath}`; - - const existing = debounceTimers.current.get(key); - if (existing) clearTimeout(existing); - - const timeout = setTimeout(() => { - debounceTimers.current.delete(key); - queueRef.current.push(event); - processQueue(); - }, DEBOUNCE_MS); - - debounceTimers.current.set(key, timeout); + enqueueWithDebounce(event); }); return () => { + isMountedRef.current = false; cleanup(); for (const timeout of debounceTimers.current.values()) { clearTimeout(timeout); } debounceTimers.current.clear(); + pendingByKey.current.clear(); }; }, []); } diff --git a/surfsense_web/types/window.d.ts b/surfsense_web/types/window.d.ts index 826a575c7..719373e02 100644 --- a/surfsense_web/types/window.d.ts +++ b/surfsense_web/types/window.d.ts @@ -11,6 +11,7 @@ interface WatchedFolderConfig { } interface FolderSyncFileChangedEvent { + id: string; rootFolderId: number | null; searchSpaceId: number; folderPath: string; @@ -63,6 +64,8 @@ interface ElectronAPI { pauseWatcher: () => Promise; resumeWatcher: () => Promise; signalRendererReady: () => Promise; + getPendingFileEvents: () => Promise; + acknowledgeFileEvents: (eventIds: string[]) => Promise<{ acknowledged: number }>; // Unified browse browseFileOrFolder: () => Promise; readLocalFiles: (paths: string[]) => Promise; From 530db1053939cd69a6ce41107a6169babab1f707 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 02:56:24 +0530 Subject: [PATCH 35/61] refactor: remove unused Electron API check and update search space ID handling in document upload --- surfsense_desktop/src/ipc/channels.ts | 2 +- surfsense_desktop/src/ipc/handlers.ts | 4 +- .../src/modules/folder-watcher.ts | 18 +- surfsense_desktop/src/preload.ts | 4 +- .../layout/ui/sidebar/DocumentsSidebar.tsx | 2 - .../components/sources/DocumentUploadTab.tsx | 162 ++++++++++-------- surfsense_web/types/window.d.ts | 9 +- 7 files changed, 100 insertions(+), 101 deletions(-) diff --git a/surfsense_desktop/src/ipc/channels.ts b/surfsense_desktop/src/ipc/channels.ts index 2761960f7..2000964c7 100644 --- a/surfsense_desktop/src/ipc/channels.ts +++ b/surfsense_desktop/src/ipc/channels.ts @@ -19,6 +19,6 @@ export const IPC_CHANNELS = { FOLDER_SYNC_RENDERER_READY: 'folder-sync:renderer-ready', FOLDER_SYNC_GET_PENDING_EVENTS: 'folder-sync:get-pending-events', FOLDER_SYNC_ACK_EVENTS: 'folder-sync:ack-events', - BROWSE_FILE_OR_FOLDER: 'browse:file-or-folder', + BROWSE_FILES: 'browse:files', READ_LOCAL_FILES: 'browse:read-local-files', } as const; diff --git a/surfsense_desktop/src/ipc/handlers.ts b/surfsense_desktop/src/ipc/handlers.ts index 7194aaaff..c4251b30b 100644 --- a/surfsense_desktop/src/ipc/handlers.ts +++ b/surfsense_desktop/src/ipc/handlers.ts @@ -11,7 +11,7 @@ import { pauseWatcher, resumeWatcher, markRendererReady, - browseFileOrFolder, + browseFiles, readLocalFiles, } from '../modules/folder-watcher'; @@ -62,7 +62,7 @@ export function registerIpcHandlers(): void { acknowledgeFileEvents(eventIds) ); - ipcMain.handle(IPC_CHANNELS.BROWSE_FILE_OR_FOLDER, () => browseFileOrFolder()); + ipcMain.handle(IPC_CHANNELS.BROWSE_FILES, () => browseFiles()); ipcMain.handle(IPC_CHANNELS.READ_LOCAL_FILES, (_event, paths: string[]) => readLocalFiles(paths) diff --git a/surfsense_desktop/src/modules/folder-watcher.ts b/surfsense_desktop/src/modules/folder-watcher.ts index 9cbdd9775..969dabe97 100644 --- a/surfsense_desktop/src/modules/folder-watcher.ts +++ b/surfsense_desktop/src/modules/folder-watcher.ts @@ -475,23 +475,13 @@ export async function unregisterFolderWatcher(): Promise { watchers.clear(); } -export interface BrowseResult { - type: 'files' | 'folder'; - paths: string[]; -} - -export async function browseFileOrFolder(): Promise { +export async function browseFiles(): Promise { const result = await dialog.showOpenDialog({ - properties: ['openFile', 'openDirectory', 'multiSelections'], - title: 'Select files or a folder', + properties: ['openFile', 'multiSelections'], + title: 'Select files', }); if (result.canceled || result.filePaths.length === 0) return null; - - const stat = fs.statSync(result.filePaths[0]); - if (stat.isDirectory()) { - return { type: 'folder', paths: [result.filePaths[0]] }; - } - return { type: 'files', paths: result.filePaths }; + return result.filePaths; } const MIME_MAP: Record = { diff --git a/surfsense_desktop/src/preload.ts b/surfsense_desktop/src/preload.ts index 6a2610dc8..6fbfd354a 100644 --- a/surfsense_desktop/src/preload.ts +++ b/surfsense_desktop/src/preload.ts @@ -48,7 +48,7 @@ contextBridge.exposeInMainWorld('electronAPI', { getPendingFileEvents: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_GET_PENDING_EVENTS), acknowledgeFileEvents: (eventIds: string[]) => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_ACK_EVENTS, eventIds), - // Unified browse (files + folders) - browseFileOrFolder: () => ipcRenderer.invoke(IPC_CHANNELS.BROWSE_FILE_OR_FOLDER), + // Browse files via native dialog + browseFiles: () => ipcRenderer.invoke(IPC_CHANNELS.BROWSE_FILES), readLocalFiles: (paths: string[]) => ipcRenderer.invoke(IPC_CHANNELS.READ_LOCAL_FILES, paths), }); diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index ed3a78786..f8b774d26 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -277,8 +277,6 @@ export function DocumentsSidebar({ [createFolderParentId, searchSpaceId, setExpandedFolderMap] ); - const isElectron = typeof window !== "undefined" && !!window.electronAPI; - const handleRescanFolder = useCallback( async (folder: FolderDisplay) => { const api = window.electronAPI; diff --git a/surfsense_web/components/sources/DocumentUploadTab.tsx b/surfsense_web/components/sources/DocumentUploadTab.tsx index 3fdf576b5..d5ac2770a 100644 --- a/surfsense_web/components/sources/DocumentUploadTab.tsx +++ b/surfsense_web/components/sources/DocumentUploadTab.tsx @@ -1,7 +1,7 @@ "use client"; import { useAtom } from "jotai"; -import { CheckCircle2, FileType, FolderOpen, Info, Upload, X } from "lucide-react"; +import { CheckCircle2, ChevronDown, File as FileIcon, FileType, FolderOpen, Info, Upload, X } from "lucide-react"; import { useTranslations } from "next-intl"; import { useCallback, useMemo, useRef, useState } from "react"; @@ -19,6 +19,12 @@ import { Alert, AlertDescription } from "@/components/ui/alert"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu"; import { Label } from "@/components/ui/label"; import { Progress } from "@/components/ui/progress"; import { Separator } from "@/components/ui/separator"; @@ -146,7 +152,7 @@ export function DocumentUploadTab({ const [selectedFolder, setSelectedFolder] = useState(null); const [watchFolder, setWatchFolder] = useState(true); const [folderSubmitting, setFolderSubmitting] = useState(false); - const isElectron = typeof window !== "undefined" && !!window.electronAPI?.browseFileOrFolder; + const isElectron = typeof window !== "undefined" && !!window.electronAPI?.browseFiles; const acceptedFileTypes = useMemo(() => { const etlService = process.env.NEXT_PUBLIC_ETL_SERVICE; @@ -193,7 +199,7 @@ export function DocumentUploadTab({ onDrop, accept: acceptedFileTypes, maxSize: 50 * 1024 * 1024, // 50MB per file - noClick: !isElectron, + noClick: isElectron, disabled: files.length >= MAX_FILES, }); @@ -201,52 +207,51 @@ export function DocumentUploadTab({ e.stopPropagation(); }, []); - const handleBrowse = useCallback(async (e: React.MouseEvent) => { - e.stopPropagation(); - e.preventDefault(); - + const handleBrowseFiles = useCallback(async () => { const api = window.electronAPI; - if (!api?.browseFileOrFolder) { - fileInputRef.current?.click(); - return; - } + if (!api?.browseFiles) return; - const result = await api.browseFileOrFolder(); - if (!result) return; + const paths = await api.browseFiles(); + if (!paths || paths.length === 0) return; - if (result.type === "folder") { - const folderPath = result.paths[0]; - const folderName = folderPath.split("/").pop() || folderPath.split("\\").pop() || folderPath; - setFiles([]); - setSelectedFolder({ path: folderPath, name: folderName }); - setWatchFolder(true); - } else { - setSelectedFolder(null); - const fileDataList = await api.readLocalFiles(result.paths); - const newFiles: FileWithId[] = fileDataList.map((fd) => ({ - id: crypto.randomUUID?.() ?? `file-${Date.now()}-${Math.random().toString(36)}`, - file: new File([fd.data], fd.name, { type: fd.mimeType }), - })); - setFiles((prev) => { - const merged = [...prev, ...newFiles]; - if (merged.length > MAX_FILES) { - toast.error(t("max_files_exceeded"), { - description: t("max_files_exceeded_desc", { max: MAX_FILES }), - }); - return prev; - } - const totalSize = merged.reduce((sum, e) => sum + e.file.size, 0); - if (totalSize > MAX_TOTAL_SIZE_BYTES) { - toast.error(t("max_size_exceeded"), { - description: t("max_size_exceeded_desc", { max: MAX_TOTAL_SIZE_MB }), - }); - return prev; - } - return merged; - }); - } + setSelectedFolder(null); + const fileDataList = await api.readLocalFiles(paths); + const newFiles: FileWithId[] = fileDataList.map((fd) => ({ + id: crypto.randomUUID?.() ?? `file-${Date.now()}-${Math.random().toString(36)}`, + file: new File([fd.data], fd.name, { type: fd.mimeType }), + })); + setFiles((prev) => { + const merged = [...prev, ...newFiles]; + if (merged.length > MAX_FILES) { + toast.error(t("max_files_exceeded"), { + description: t("max_files_exceeded_desc", { max: MAX_FILES }), + }); + return prev; + } + const totalSize = merged.reduce((sum, e) => sum + e.file.size, 0); + if (totalSize > MAX_TOTAL_SIZE_BYTES) { + toast.error(t("max_size_exceeded"), { + description: t("max_size_exceeded_desc", { max: MAX_TOTAL_SIZE_MB }), + }); + return prev; + } + return merged; + }); }, [t]); + const handleBrowseFolder = useCallback(async () => { + const api = window.electronAPI; + if (!api?.selectFolder) return; + + const folderPath = await api.selectFolder(); + if (!folderPath) return; + + const folderName = folderPath.split("/").pop() || folderPath.split("\\").pop() || folderPath; + setFiles([]); + setSelectedFolder({ path: folderPath, name: folderName }); + setWatchFolder(true); + }, []); + const formatFileSize = (bytes: number) => { if (bytes === 0) return "0 Bytes"; const k = 1024; @@ -280,10 +285,11 @@ export function DocumentUploadTab({ setFolderSubmitting(true); try { - const result = await documentsApiService.folderIndex(Number(searchSpaceId), { + const numericSpaceId = Number(searchSpaceId); + const result = await documentsApiService.folderIndex(numericSpaceId, { folder_path: selectedFolder.path, folder_name: selectedFolder.name, - search_space_id: searchSpaceId, + search_space_id: numericSpaceId, enable_summary: shouldSummarize, }); @@ -409,33 +415,43 @@ export function DocumentUploadTab({ )}
)} - {!isFileCountLimitReached && ( -
- {isElectron ? ( - - ) : ( - - )} -
- )} + {!isFileCountLimitReached && ( +
+ {isElectron ? ( + + e.stopPropagation()}> + + + e.stopPropagation()}> + + + Files + + + + Folder + + + + ) : ( + + )} +
+ )} diff --git a/surfsense_web/types/window.d.ts b/surfsense_web/types/window.d.ts index 719373e02..0842ed655 100644 --- a/surfsense_web/types/window.d.ts +++ b/surfsense_web/types/window.d.ts @@ -27,11 +27,6 @@ interface FolderSyncWatcherReadyEvent { folderPath: string; } -interface BrowseResult { - type: "files" | "folder"; - paths: string[]; -} - interface LocalFileData { name: string; data: ArrayBuffer; @@ -66,8 +61,8 @@ interface ElectronAPI { signalRendererReady: () => Promise; getPendingFileEvents: () => Promise; acknowledgeFileEvents: (eventIds: string[]) => Promise<{ acknowledged: number }>; - // Unified browse - browseFileOrFolder: () => Promise; + // Browse files/folders via native dialogs + browseFiles: () => Promise; readLocalFiles: (paths: string[]) => Promise; } From bd21c2842dec8dfeae80a00a040c4b1513c1cf3d Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 04:14:09 +0530 Subject: [PATCH 36/61] feat: enhance document upload and folder synchronization UI with improved processing state indicators and responsive design adjustments --- .../app/routes/documents_routes.py | 5 +- .../assistant-ui/document-upload-popup.tsx | 29 +- .../components/documents/DocumentNode.tsx | 14 +- .../components/documents/FolderNode.tsx | 48 +- .../components/documents/FolderTreeView.tsx | 30 + .../components/sources/DocumentUploadTab.tsx | 514 +++++++++--------- .../contracts/enums/connectorIcons.tsx | 2 + 7 files changed, 359 insertions(+), 283 deletions(-) diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index 0acc1d30b..edb01d4cc 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -29,6 +29,7 @@ from app.schemas import ( DocumentTitleSearchResponse, DocumentUpdate, DocumentWithChunksRead, + FolderRead, PaginatedResponse, ) from app.services.task_dispatcher import TaskDispatcher, get_task_dispatcher @@ -953,15 +954,13 @@ async def get_document_by_chunk_id( ) from e -@router.get("/documents/watched-folders", response_model=list["FolderRead"]) +@router.get("/documents/watched-folders", response_model=list[FolderRead]) async def get_watched_folders( search_space_id: int, session: AsyncSession = Depends(get_async_session), user: User = Depends(current_active_user), ): """Return root folders that are marked as watched (metadata->>'watched' = 'true').""" - from app.schemas import FolderRead # noqa: F811 - await check_permission( session, user, diff --git a/surfsense_web/components/assistant-ui/document-upload-popup.tsx b/surfsense_web/components/assistant-ui/document-upload-popup.tsx index 06b0d38e7..78600be47 100644 --- a/surfsense_web/components/assistant-ui/document-upload-popup.tsx +++ b/surfsense_web/components/assistant-ui/document-upload-popup.tsx @@ -125,29 +125,23 @@ const DocumentUploadPopupContent: FC<{ onPointerDownOutside={(e) => e.preventDefault()} onInteractOutside={(e) => e.preventDefault()} onEscapeKeyDown={(e) => e.preventDefault()} - className="select-none max-w-4xl w-[95vw] sm:w-full h-[calc(100dvh-2rem)] sm:h-[85vh] flex flex-col p-0 gap-0 overflow-hidden border border-border ring-0 bg-muted dark:bg-muted text-foreground [&>button]:right-3 sm:[&>button]:right-12 [&>button]:top-3 sm:[&>button]:top-10 [&>button]:opacity-80 hover:[&>button]:opacity-100 [&>button]:z-[100] [&>button_svg]:size-4 sm:[&>button_svg]:size-5" + className="select-none max-w-2xl w-[95vw] sm:w-[640px] h-[min(460px,75dvh)] sm:h-[min(520px,80vh)] flex flex-col p-0 gap-0 overflow-hidden border border-border ring-0 bg-muted dark:bg-muted text-foreground [&>button]:right-3 sm:[&>button]:right-6 [&>button]:top-3 sm:[&>button]:top-5 [&>button]:opacity-80 hover:[&>button]:opacity-100 [&>button]:z-[100] [&>button_svg]:size-4 sm:[&>button_svg]:size-5" > Upload Document - {/* Scrollable container for mobile */}
- {/* Header - scrolls with content on mobile */} -
- {/* Upload header */} -
-
-

- Upload Documents -

-

- Upload and sync your documents to your search space -

-
+
+
+

+ Upload Documents +

+

+ Upload and sync your documents to your search space +

- {/* Content */} -
+
{!isLoading && !hasDocumentSummaryLLM ? ( @@ -179,9 +173,6 @@ const DocumentUploadPopupContent: FC<{ )}
- - {/* Bottom fade shadow - hidden on very small screens */} -
); diff --git a/surfsense_web/components/documents/DocumentNode.tsx b/surfsense_web/components/documents/DocumentNode.tsx index 691a6eb0d..7a3b3e0ca 100644 --- a/surfsense_web/components/documents/DocumentNode.tsx +++ b/surfsense_web/components/documents/DocumentNode.tsx @@ -195,12 +195,14 @@ export const DocumentNode = React.memo(function DocumentNode({ {doc.title} - - {getDocumentTypeIcon( - doc.document_type as DocumentTypeEnum, - "h-3.5 w-3.5 text-muted-foreground" - )} - + {getDocumentTypeIcon(doc.document_type as DocumentTypeEnum, "h-3.5 w-3.5 text-muted-foreground") && ( + + {getDocumentTypeIcon( + doc.document_type as DocumentTypeEnum, + "h-3.5 w-3.5 text-muted-foreground" + )} + + )} diff --git a/surfsense_web/components/documents/FolderNode.tsx b/surfsense_web/components/documents/FolderNode.tsx index 6780bd1e5..41c1d8f73 100644 --- a/surfsense_web/components/documents/FolderNode.tsx +++ b/surfsense_web/components/documents/FolderNode.tsx @@ -1,6 +1,7 @@ "use client"; import { + AlertCircle, ChevronDown, ChevronRight, Eye, @@ -30,6 +31,8 @@ import { DropdownMenuItem, DropdownMenuTrigger, } from "@/components/ui/dropdown-menu"; +import { Spinner } from "@/components/ui/spinner"; +import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { cn } from "@/lib/utils"; import type { FolderSelectionState } from "./FolderTreeView"; @@ -55,6 +58,7 @@ interface FolderNodeProps { isRenaming: boolean; childCount: number; selectionState: FolderSelectionState; + processingState: "idle" | "processing" | "failed"; onToggleSelect: (folderId: number, selectAll: boolean) => void; onToggleExpand: (folderId: number) => void; onRename: (folder: FolderDisplay, newName: string) => void; @@ -100,6 +104,7 @@ export const FolderNode = React.memo(function FolderNode({ isRenaming, childCount, selectionState, + processingState, onToggleSelect, onToggleExpand, onRename, @@ -281,14 +286,41 @@ export const FolderNode = React.memo(function FolderNode({ )} - e.stopPropagation()} - className="h-3.5 w-3.5 shrink-0" - /> + {processingState !== "idle" && selectionState === "none" ? ( + <> + + + + {processingState === "processing" ? ( + + ) : ( + + )} + + + + {processingState === "processing" + ? "Syncing folder contents" + : "Some files failed to process"} + + + e.stopPropagation()} + className="h-3.5 w-3.5 shrink-0 hidden group-hover:flex" + /> + + ) : ( + e.stopPropagation()} + className="h-3.5 w-3.5 shrink-0" + /> + )} diff --git a/surfsense_web/components/documents/FolderTreeView.tsx b/surfsense_web/components/documents/FolderTreeView.tsx index f34b9a0c2..01af73edc 100644 --- a/surfsense_web/components/documents/FolderTreeView.tsx +++ b/surfsense_web/components/documents/FolderTreeView.tsx @@ -166,6 +166,35 @@ export function FolderTreeView({ return states; }, [folders, docsByFolder, foldersByParent, mentionedDocIds]); + const folderProcessingStates = useMemo(() => { + const states: Record = {}; + + function compute(folderId: number): { hasProcessing: boolean; hasFailed: boolean } { + const directDocs = docsByFolder[folderId] ?? []; + let hasProcessing = directDocs.some( + (d) => d.status?.state === "pending" || d.status?.state === "processing" + ); + let hasFailed = directDocs.some((d) => d.status?.state === "failed"); + + for (const child of foldersByParent[folderId] ?? []) { + const sub = compute(child.id); + hasProcessing = hasProcessing || sub.hasProcessing; + hasFailed = hasFailed || sub.hasFailed; + } + + if (hasProcessing) states[folderId] = "processing"; + else if (hasFailed) states[folderId] = "failed"; + else states[folderId] = "idle"; + + return { hasProcessing, hasFailed }; + } + + for (const f of folders) { + if (states[f.id] === undefined) compute(f.id); + } + return states; + }, [folders, docsByFolder, foldersByParent]); + function renderLevel(parentId: number | null, depth: number): React.ReactNode[] { const key = parentId ?? "root"; const childFolders = (foldersByParent[key] ?? []) @@ -199,6 +228,7 @@ export function FolderTreeView({ isRenaming={renamingFolderId === f.id} childCount={folderChildCounts[f.id] ?? 0} selectionState={folderSelectionStates[f.id] ?? "none"} + processingState={folderProcessingStates[f.id] ?? "idle"} onToggleSelect={onToggleFolderSelect} onToggleExpand={onToggleExpand} onRename={onRenameFolder} diff --git a/surfsense_web/components/sources/DocumentUploadTab.tsx b/surfsense_web/components/sources/DocumentUploadTab.tsx index d5ac2770a..7176afae5 100644 --- a/surfsense_web/components/sources/DocumentUploadTab.tsx +++ b/surfsense_web/components/sources/DocumentUploadTab.tsx @@ -1,24 +1,21 @@ "use client"; import { useAtom } from "jotai"; -import { CheckCircle2, ChevronDown, File as FileIcon, FileType, FolderOpen, Info, Upload, X } from "lucide-react"; +import { CheckCircle2, ChevronDown, File as FileIcon, FileType, FolderOpen, Plus, Upload, X } from "lucide-react"; import { useTranslations } from "next-intl"; import { useCallback, useMemo, useRef, useState } from "react"; import { useDropzone } from "react-dropzone"; import { toast } from "sonner"; import { uploadDocumentMutationAtom } from "@/atoms/documents/document-mutation.atoms"; -import { SummaryConfig } from "@/components/assistant-ui/connector-popup/components/summary-config"; import { Accordion, AccordionContent, AccordionItem, AccordionTrigger, } from "@/components/ui/accordion"; -import { Alert, AlertDescription } from "@/components/ui/alert"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; -import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; import { DropdownMenu, DropdownMenuContent, @@ -27,7 +24,6 @@ import { } from "@/components/ui/dropdown-menu"; import { Label } from "@/components/ui/label"; import { Progress } from "@/components/ui/progress"; -import { Separator } from "@/components/ui/separator"; import { Spinner } from "@/components/ui/spinner"; import { Switch } from "@/components/ui/switch"; import { documentsApiService } from "@/lib/apis/documents-api.service"; @@ -36,7 +32,6 @@ import { trackDocumentUploadStarted, trackDocumentUploadSuccess, } from "@/lib/posthog/events"; -import { GridPattern } from "./GridPattern"; interface SelectedFolder { path: string; @@ -128,13 +123,12 @@ interface FileWithId { file: File; } -const cardClass = "border border-border bg-slate-400/5 dark:bg-white/5"; - -// Upload limits — files are sent in batches of 5 to avoid proxy timeouts const MAX_FILES = 50; const MAX_TOTAL_SIZE_MB = 200; const MAX_TOTAL_SIZE_BYTES = MAX_TOTAL_SIZE_MB * 1024 * 1024; +const toggleRowClass = "flex items-center justify-between rounded-lg bg-slate-400/5 dark:bg-white/5 p-3"; + export function DocumentUploadTab({ searchSpaceId, onSuccess, @@ -198,7 +192,7 @@ export function DocumentUploadTab({ const { getRootProps, getInputProps, isDragActive } = useDropzone({ onDrop, accept: acceptedFileTypes, - maxSize: 50 * 1024 * 1024, // 50MB per file + maxSize: 50 * 1024 * 1024, noClick: isElectron, disabled: files.length >= MAX_FILES, }); @@ -270,6 +264,8 @@ export function DocumentUploadTab({ (MAX_TOTAL_SIZE_BYTES - totalFileSize) / (1024 * 1024) ).toFixed(1); + const hasContent = files.length > 0 || selectedFolder !== null; + const handleAccordionChange = useCallback( (value: string) => { setAccordionValue(value); @@ -307,7 +303,7 @@ export function DocumentUploadTab({ }); toast.success(`Watching folder: ${selectedFolder.name}`); } else { - toast.success(`Indexing folder: ${selectedFolder.name}`); + toast.success(`Syncing folder: ${selectedFolder.name}`); } setSelectedFolder(null); @@ -355,139 +351,180 @@ export function DocumentUploadTab({ ); }; - return ( -
- - - - {t("file_size_limit")}{" "} - {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })} - - + const renderBrowseButton = (options?: { compact?: boolean; fullWidth?: boolean }) => { + const { compact, fullWidth } = options ?? {}; + if (isFileCountLimitReached) return null; - -
- -
- -
- - {isFileCountLimitReached ? ( -
- -
-

- {t("file_limit_reached")} -

-

- {t("file_limit_reached_desc", { max: MAX_FILES })} -

+ const sizeClass = compact ? "h-7" : "h-8"; + const widthClass = fullWidth ? "w-full" : ""; + + if (isElectron) { + return ( + + e.stopPropagation()}> + + + e.stopPropagation()}> + + + Files + + + + Folder + + + + ); + } + + return ( + + ); + }; + + return ( +
+ {/* Hidden file input for mobile browse */} + + + {/* MOBILE DROP ZONE */} +
+ {hasContent ? ( + !selectedFolder && !isFileCountLimitReached && ( + isElectron ? ( +
+ {renderBrowseButton({ compact: true, fullWidth: true })}
-
- ) : isDragActive ? ( -
- -

{t("drop_files")}

-
- ) : ( -
- -
-

{t("drag_drop")}

-

{t("or_browse")}

-
- {files.length > 0 && ( -

- {t("remaining_capacity", { files: remainingFiles, sizeMB: remainingSizeMB })} -

- )} -
- )} - {!isFileCountLimitReached && ( -
- {isElectron ? ( - - e.stopPropagation()}> - - - e.stopPropagation()}> - - - Files - - - - Folder - - - ) : ( + ) + ) + ) : ( +
{ + if (!isElectron) fileInputRef.current?.click(); + }} + > + +
+

+ {isElectron ? "Select files or folder" : "Tap to select files"} +

+

+ {t("file_size_limit")}{" "} + {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })} +

+
+ {isElectron && ( +
e.stopPropagation()}> + {renderBrowseButton({ fullWidth: true })} +
)}
)} -
- - +
- {selectedFolder && ( - - -
-
- -
- - {selectedFolder.name} - - - {selectedFolder.path} - -
+ {/* DESKTOP DROP ZONE */} +
+ {hasContent ? ( +
+ + + {isDragActive + ? t("drop_files") + : isFileCountLimitReached + ? t("file_limit_reached") + : t("remaining_capacity", { files: remainingFiles, sizeMB: remainingSizeMB })} + + {renderBrowseButton({ compact: true })} +
+ ) : isFileCountLimitReached ? ( +
+ +

{t("file_limit_reached")}

+

+ {t("file_limit_reached_desc", { max: MAX_FILES })} +

+
+ ) : isDragActive ? ( +
+ +

{t("drop_files")}

+
+ ) : ( +
+ +

{t("drag_drop")}

+

+ {t("file_size_limit")}{" "} + {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })} +

+
{renderBrowseButton()}
+
+ )} +
+ + {/* FOLDER SELECTED */} + {selectedFolder && ( +
+
+ +
+

{selectedFolder.name}

+

{selectedFolder.path}

- - -
-
)} + {/* FILES SELECTED */} + {files.length > 0 && ( +
+
+

+ {t("selected_files", { count: files.length })} · {formatFileSize(totalFileSize)} +

+ +
+ +
+ {files.map((entry) => ( +
+ + {entry.file.name} + + {formatFileSize(entry.file.size)} + + +
+ ))} +
+ + {isUploading && ( +
+
+ {t("uploading_files")} + {Math.round(uploadProgress)}% +
+ +
+ )} + +
+
+

Enable AI Summary

+

+ Improves search quality but adds latency +

+
+ +
+ + +
+ )} + + {/* SUPPORTED FORMATS */} - - -
-
-
- {t("supported_file_types")} -
-
- {t("file_types_desc")} -
-
-
+ + + + {t("supported_file_types")} + - -
+ +
{supportedExtensions.map((ext) => ( - + {ext} ))} diff --git a/surfsense_web/contracts/enums/connectorIcons.tsx b/surfsense_web/contracts/enums/connectorIcons.tsx index 2e609b060..ab71d58b5 100644 --- a/surfsense_web/contracts/enums/connectorIcons.tsx +++ b/surfsense_web/contracts/enums/connectorIcons.tsx @@ -126,6 +126,8 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas return ; case "DEEPEST": return ; + case "LOCAL_FOLDER_FILE": + return null; default: return ; } From 44e39792da6fc7a35edbe8ae1cb68807e4ac3b3f Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 04:14:28 +0530 Subject: [PATCH 37/61] feat: assign folder_id to documents before indexing to ensure correct folder visibility during processing --- .../local_folder_indexer.py | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index a3281eaea..041df71fc 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -719,6 +719,21 @@ async def index_local_folder( } documents = await pipeline.prepare_for_indexing(connector_docs) + # Assign folder_id immediately so docs appear in the correct + # folder while still pending/processing (visible via Zero sync). + for document in documents: + cd = doc_map.get(document.unique_identifier_hash) + if cd is None: + continue + rel_path = (cd.metadata or {}).get("file_path", "") + parent_dir = str(Path(rel_path).parent) if rel_path else "" + if parent_dir == ".": + parent_dir = "" + document.folder_id = folder_mapping.get( + parent_dir, folder_mapping.get("") + ) + await session.commit() + llm = await get_user_long_context_llm(session, user_id, search_space_id) for document in documents: @@ -732,17 +747,9 @@ async def index_local_folder( if DocumentStatus.is_state(result.status, DocumentStatus.READY): indexed_count += 1 - # Assign folder_id and mtime post-pipeline - rel_path = (connector_doc.metadata or {}).get("file_path", "") - parent_dir = str(Path(rel_path).parent) if rel_path else "" - if parent_dir == ".": - parent_dir = "" - fid = folder_mapping.get(parent_dir, folder_mapping.get("")) - unique_id = connector_doc.unique_id mtime_info = file_meta_map.get(unique_id, {}) - result.folder_id = fid doc_meta = dict(result.document_metadata or {}) doc_meta["mtime"] = mtime_info.get("mtime") result.document_metadata = doc_meta @@ -894,16 +901,18 @@ async def _index_single_file( return 0, 1, None db_doc = documents[0] - await pipeline.index(db_doc, connector_doc, llm) - # Post-pipeline: assign folder_id and mtime - await session.refresh(db_doc) - folder_id = None + # Assign folder_id before indexing so the doc appears in the + # correct folder while still pending/processing. if root_folder_id: - folder_id = await _resolve_folder_for_file( + db_doc.folder_id = await _resolve_folder_for_file( session, rel_path, root_folder_id, search_space_id, user_id ) - db_doc.folder_id = folder_id + await session.commit() + + await pipeline.index(db_doc, connector_doc, llm) + + await session.refresh(db_doc) doc_meta = dict(db_doc.document_metadata or {}) doc_meta["mtime"] = mtime db_doc.document_metadata = doc_meta From fe7fcaae5dada13d12bf5883b5722c54dcd5f425 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 04:16:19 +0530 Subject: [PATCH 38/61] feat: update folder deletion process to queue document deletions first and handle folder cleanup in Celery task --- .../app/routes/folders_routes.py | 31 +++++++++---------- .../app/tasks/celery_tasks/document_tasks.py | 27 ++++++++++++---- .../layout/ui/sidebar/DocumentsSidebar.tsx | 7 ++++- 3 files changed, 42 insertions(+), 23 deletions(-) diff --git a/surfsense_backend/app/routes/folders_routes.py b/surfsense_backend/app/routes/folders_routes.py index 6e524d4a4..2dc9bceac 100644 --- a/surfsense_backend/app/routes/folders_routes.py +++ b/surfsense_backend/app/routes/folders_routes.py @@ -367,7 +367,7 @@ async def delete_folder( session: AsyncSession = Depends(get_async_session), user: User = Depends(current_active_user), ): - """Delete a folder and cascade-delete subfolders. Documents are async-deleted via Celery.""" + """Mark documents for deletion and dispatch Celery to delete docs first, then folders.""" try: folder = await session.get(Folder, folder_id) if not folder: @@ -399,30 +399,29 @@ async def delete_folder( ) await session.commit() - await session.execute(Folder.__table__.delete().where(Folder.id == folder_id)) - await session.commit() + try: + from app.tasks.celery_tasks.document_tasks import ( + delete_folder_documents_task, + ) - if document_ids: - try: - from app.tasks.celery_tasks.document_tasks import ( - delete_folder_documents_task, - ) - - delete_folder_documents_task.delay(document_ids) - except Exception as err: + delete_folder_documents_task.delay( + document_ids, folder_subtree_ids=list(subtree_ids) + ) + except Exception as err: + if document_ids: await session.execute( Document.__table__.update() .where(Document.id.in_(document_ids)) .values(status={"state": "ready"}) ) await session.commit() - raise HTTPException( - status_code=503, - detail="Folder deleted but document cleanup could not be queued. Documents have been restored.", - ) from err + raise HTTPException( + status_code=503, + detail="Could not queue folder deletion. Documents have been restored.", + ) from err return { - "message": "Folder deleted successfully", + "message": "Folder deletion started", "documents_queued_for_deletion": len(document_ids), } diff --git a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py index 110f3deee..4701d9911 100644 --- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py @@ -142,21 +142,30 @@ async def _delete_document_background(document_id: int) -> None: retry_backoff_max=300, max_retries=5, ) -def delete_folder_documents_task(self, document_ids: list[int]): - """Celery task to batch-delete documents orphaned by folder deletion.""" +def delete_folder_documents_task( + self, + document_ids: list[int], + folder_subtree_ids: list[int] | None = None, +): + """Celery task to delete documents first, then the folder rows.""" loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: - loop.run_until_complete(_delete_folder_documents(document_ids)) + loop.run_until_complete( + _delete_folder_documents(document_ids, folder_subtree_ids) + ) finally: loop.close() -async def _delete_folder_documents(document_ids: list[int]) -> None: - """Delete chunks in batches, then document rows for each orphaned document.""" +async def _delete_folder_documents( + document_ids: list[int], + folder_subtree_ids: list[int] | None = None, +) -> None: + """Delete chunks in batches, then document rows, then folder rows.""" from sqlalchemy import delete as sa_delete, select - from app.db import Chunk, Document + from app.db import Chunk, Document, Folder async with get_celery_session_maker()() as session: batch_size = 500 @@ -178,6 +187,12 @@ async def _delete_folder_documents(document_ids: list[int]) -> None: await session.delete(doc) await session.commit() + if folder_subtree_ids: + await session.execute( + sa_delete(Folder).where(Folder.id.in_(folder_subtree_ids)) + ) + await session.commit() + @celery_app.task( name="delete_search_space_background", diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index f8b774d26..8dce68eeb 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -188,7 +188,12 @@ export function DocumentsSidebar({ const treeDocuments: DocumentNodeDoc[] = useMemo(() => { const zeroDocs = (zeroAllDocs ?? []) - .filter((d) => d.title && d.title.trim() !== "") + .filter((d) => { + if (!d.title || d.title.trim() === "") return false; + const state = (d.status as { state?: string } | undefined)?.state; + if (state === "deleting") return false; + return true; + }) .map((d) => ({ id: d.id, title: d.title, From 9a65163fe4d89d35e6891015c1b46b555f11f321 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 09:20:44 +0530 Subject: [PATCH 39/61] feat: improve DocumentUploadTab UI with updated styles, enhanced file size limit messages --- .../components/sources/DocumentUploadTab.tsx | 89 ++++++++++--------- surfsense_web/messages/en.json | 4 +- surfsense_web/messages/es.json | 4 +- surfsense_web/messages/hi.json | 4 +- surfsense_web/messages/pt.json | 4 +- surfsense_web/messages/zh.json | 4 +- 6 files changed, 55 insertions(+), 54 deletions(-) diff --git a/surfsense_web/components/sources/DocumentUploadTab.tsx b/surfsense_web/components/sources/DocumentUploadTab.tsx index 7176afae5..9a32e5a59 100644 --- a/surfsense_web/components/sources/DocumentUploadTab.tsx +++ b/surfsense_web/components/sources/DocumentUploadTab.tsx @@ -1,7 +1,7 @@ "use client"; import { useAtom } from "jotai"; -import { CheckCircle2, ChevronDown, File as FileIcon, FileType, FolderOpen, Plus, Upload, X } from "lucide-react"; +import { ChevronDown, Dot, File as FileIcon, FolderOpen, Upload, X } from "lucide-react"; import { useTranslations } from "next-intl"; import { useCallback, useMemo, useRef, useState } from "react"; @@ -22,7 +22,6 @@ import { DropdownMenuItem, DropdownMenuTrigger, } from "@/components/ui/dropdown-menu"; -import { Label } from "@/components/ui/label"; import { Progress } from "@/components/ui/progress"; import { Spinner } from "@/components/ui/spinner"; import { Switch } from "@/components/ui/switch"; @@ -362,12 +361,12 @@ export function DocumentUploadTab({ return ( e.stopPropagation()}> - - e.stopPropagation()}> + e.stopPropagation()}> Files @@ -416,32 +415,31 @@ export function DocumentUploadTab({ {renderBrowseButton({ compact: true, fullWidth: true })}
) : ( - + ) ) ) : (
{ if (!isElectron) fileInputRef.current?.click(); }} > - -
-

+ +

+

{isElectron ? "Select files or folder" : "Tap to select files"}

-

- {t("file_size_limit")}{" "} - {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })} +

+ {t("file_size_limit")} + + {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })}

{isElectron && ( @@ -491,9 +489,10 @@ export function DocumentUploadTab({

{t("drag_drop")}

-

- {t("file_size_limit")}{" "} - {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })} +

+ {t("file_size_limit")} + + {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })}

{renderBrowseButton()}
@@ -520,28 +519,29 @@ export function DocumentUploadTab({
-
- - -
- -
-
-

Enable AI Summary

-

- Improves search quality but adds latency -

+
+
+
+

Watch folder

+

+ Auto-sync when files change +

+
+ +
+
+
+

Enable AI Summary

+

+ Improves search quality but adds latency +

+
+
-
- - - - Version History - + + + Version History - - + + ); } +export function VersionHistoryDialog({ + open, + onOpenChange, + documentId, +}: { + open: boolean; + onOpenChange: (open: boolean) => void; + documentId: number; +}) { + return ( + + + Version History + {open && } + + + ); +} + +function formatRelativeTime(dateStr: string): string { + const now = Date.now(); + const then = new Date(dateStr).getTime(); + const diffMs = now - then; + const diffMin = Math.floor(diffMs / 60_000); + if (diffMin < 1) return "Just now"; + if (diffMin < 60) return `${diffMin} minute${diffMin !== 1 ? "s" : ""} ago`; + const diffHr = Math.floor(diffMin / 60); + if (diffHr < 24) return `${diffHr} hour${diffHr !== 1 ? "s" : ""} ago`; + return new Date(dateStr).toLocaleDateString(undefined, { + weekday: "short", + month: "short", + day: "numeric", + year: "numeric", + hour: "numeric", + minute: "2-digit", + }); +} + function VersionHistoryPanel({ documentId }: { documentId: number }) { const [versions, setVersions] = useState([]); const [loading, setLoading] = useState(true); @@ -55,6 +100,7 @@ function VersionHistoryPanel({ documentId }: { documentId: number }) { const [versionContent, setVersionContent] = useState(""); const [contentLoading, setContentLoading] = useState(false); const [restoring, setRestoring] = useState(false); + const [copied, setCopied] = useState(false); const loadVersions = useCallback(async () => { setLoading(true); @@ -73,6 +119,7 @@ function VersionHistoryPanel({ documentId }: { documentId: number }) { }, [loadVersions]); const handleSelectVersion = async (versionNumber: number) => { + if (selectedVersion === versionNumber) return; setSelectedVersion(versionNumber); setContentLoading(true); try { @@ -101,9 +148,15 @@ function VersionHistoryPanel({ documentId }: { documentId: number }) { } }; + const handleCopy = () => { + navigator.clipboard.writeText(versionContent); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + }; + if (loading) { return ( -
+
); @@ -111,75 +164,111 @@ function VersionHistoryPanel({ documentId }: { documentId: number }) { if (versions.length === 0) { return ( -
- +

No version history available yet.

Versions are created when file content changes.

); } - return ( -
-
- {versions.map((v) => ( -
handleSelectVersion(v.version_number)} - > -
-
-

Version {v.version_number}

- {v.created_at && ( -

- {new Date(v.created_at).toLocaleString()} -

- )} - {v.title && ( -

- {v.title} -

- )} -
- -
-
- ))} -
+ const selectedVersionData = versions.find((v) => v.version_number === selectedVersion); - {selectedVersion !== null && ( -
-

- Preview — Version {selectedVersion} -

- {contentLoading ? ( -
- -
- ) : ( -
-							{versionContent || "(empty)"}
-						
- )} + return ( + <> + {/* Left panel — version list */} +
+
+
+ {versions.map((v) => ( + + ))} +
+
+ + + {/* Right panel — content preview */} +
+ {selectedVersion !== null && selectedVersionData ? ( + <> +
+

+ {selectedVersionData.title || `Version ${selectedVersion}`} +

+
+ + +
+
+ +
+ {contentLoading ? ( +
+ +
+ ) : ( +
+									{versionContent || "(empty)"}
+								
+ )} +
+ + ) : ( +
+

Select a version to preview

+
+ )} +
+ ); } diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index 7aa518361..031390c9e 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -21,6 +21,7 @@ import type { DocumentNodeDoc } from "@/components/documents/DocumentNode"; import type { FolderDisplay } from "@/components/documents/FolderNode"; import { FolderPickerDialog } from "@/components/documents/FolderPickerDialog"; import { FolderTreeView } from "@/components/documents/FolderTreeView"; +import { VersionHistoryDialog } from "@/components/documents/version-history"; import { JsonMetadataViewer } from "@/components/json-metadata-viewer"; import { EXPORT_FILE_EXTENSIONS } from "@/components/shared/ExportMenuItems"; import { @@ -579,6 +580,7 @@ export function DocumentsSidebar({ const [bulkDeleteConfirmOpen, setBulkDeleteConfirmOpen] = useState(false); const [isBulkDeleting, setIsBulkDeleting] = useState(false); + const [versionDocId, setVersionDocId] = useState(null); const handleBulkDeleteSelected = useCallback(async () => { if (deletableSelectedIds.length === 0) return; @@ -826,6 +828,7 @@ export function DocumentsSidebar({ onDeleteDocument={(doc) => handleDeleteDocument(doc.id)} onMoveDocument={handleMoveDocument} onExportDocument={handleExportDocument} + onVersionHistory={(doc) => setVersionDocId(doc.id)} activeTypes={activeTypes} onDropIntoFolder={handleDropIntoFolder} onReorderFolder={handleReorderFolder} @@ -850,6 +853,14 @@ export function DocumentsSidebar({ }} /> + {versionDocId !== null && ( + { if (!open) setVersionDocId(null); }} + documentId={versionDocId} + /> + )} + Date: Fri, 3 Apr 2026 10:56:43 +0530 Subject: [PATCH 45/61] fix: update version history messages for clarity by removing unnecessary punctuation --- surfsense_web/components/documents/version-history.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/surfsense_web/components/documents/version-history.tsx b/surfsense_web/components/documents/version-history.tsx index f438a7190..7aba92b47 100644 --- a/surfsense_web/components/documents/version-history.tsx +++ b/surfsense_web/components/documents/version-history.tsx @@ -165,8 +165,8 @@ function VersionHistoryPanel({ documentId }: { documentId: number }) { if (versions.length === 0) { return (
-

No version history available yet.

-

Versions are created when file content changes.

+

No version history available yet

+

Versions are created when file content changes

); } From 79f19b9bc637d13bf89a8c08ae2e815123093491 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 11:10:46 +0530 Subject: [PATCH 46/61] fix: adjust layout in DocumentsSidebar for improved UI responsiveness and interaction, including changes to button styles and positioning --- .../components/layout/ui/sidebar/DocumentsSidebar.tsx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index 031390c9e..24f6666c9 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -784,12 +784,13 @@ export function DocumentsSidebar({ />
+
{deletableSelectedIds.length > 0 && ( -
+
)} - +
Date: Fri, 3 Apr 2026 11:19:54 +0530 Subject: [PATCH 47/61] feat: trigger document reindexing after restoring a document version to ensure content is up-to-date --- surfsense_backend/app/routes/documents_routes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index e6eed7836..083ed2b89 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -1285,6 +1285,9 @@ async def restore_document_version( document.content_needs_reindexing = True await session.commit() + from app.tasks.celery_tasks.document_reindex_tasks import reindex_document_task + reindex_document_task.delay(document_id, str(user.id)) + return { "message": f"Restored version {version_number}", "document_id": document_id, From 3833084dad3444d7bf534a7b39dc62ebf24a89fd Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 11:45:53 +0530 Subject: [PATCH 48/61] feat: changed the revision number of folder alembic migration --- ...ing.py => 118_add_local_folder_sync_and_versioning.py} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename surfsense_backend/alembic/versions/{117_add_local_folder_sync_and_versioning.py => 118_add_local_folder_sync_and_versioning.py} (98%) diff --git a/surfsense_backend/alembic/versions/117_add_local_folder_sync_and_versioning.py b/surfsense_backend/alembic/versions/118_add_local_folder_sync_and_versioning.py similarity index 98% rename from surfsense_backend/alembic/versions/117_add_local_folder_sync_and_versioning.py rename to surfsense_backend/alembic/versions/118_add_local_folder_sync_and_versioning.py index e322a608d..1fef9fbcb 100644 --- a/surfsense_backend/alembic/versions/117_add_local_folder_sync_and_versioning.py +++ b/surfsense_backend/alembic/versions/118_add_local_folder_sync_and_versioning.py @@ -1,7 +1,7 @@ """Add LOCAL_FOLDER_FILE document type, folder metadata, and document_versions table -Revision ID: 117 -Revises: 116 +Revision ID: 118 +Revises: 117 """ from collections.abc import Sequence @@ -10,8 +10,8 @@ import sqlalchemy as sa from alembic import op -revision: str = "117" -down_revision: str | None = "116" +revision: str = "118" +down_revision: str | None = "117" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None From 388811194e720a28dce7a92e47757d01f9f2820f Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 12:33:47 +0530 Subject: [PATCH 49/61] feat: update DocumentUploadTab to use a dropdown for file and folder selection, enhancing user experience; also update upload limits and file size messages in multiple languages --- .../components/sources/DocumentUploadTab.tsx | 73 ++++++------------- surfsense_web/messages/en.json | 15 +++- surfsense_web/messages/es.json | 15 +++- surfsense_web/messages/hi.json | 15 +++- surfsense_web/messages/pt.json | 15 +++- surfsense_web/messages/zh.json | 17 +++-- 6 files changed, 80 insertions(+), 70 deletions(-) diff --git a/surfsense_web/components/sources/DocumentUploadTab.tsx b/surfsense_web/components/sources/DocumentUploadTab.tsx index f3b9166dc..9733bd2e6 100644 --- a/surfsense_web/components/sources/DocumentUploadTab.tsx +++ b/surfsense_web/components/sources/DocumentUploadTab.tsx @@ -413,18 +413,24 @@ export function DocumentUploadTab({ } return ( - + + e.stopPropagation()}> + + + e.stopPropagation()}> + fileInputRef.current?.click()}> + + {t("browse_files")} + + folderInputRef.current?.click()}> + + {t("browse_folder")} + + + ); }; @@ -476,7 +482,7 @@ export function DocumentUploadTab({

- {isElectron ? "Select files or folder" : "Tap to select files"} + {isElectron ? "Select files or folder" : "Tap to select files or folder"}

{t("file_size_limit")} @@ -484,40 +490,9 @@ export function DocumentUploadTab({ {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })}

- {isElectron && ( -
e.stopPropagation()}> - {renderBrowseButton({ fullWidth: true })} -
- )} - {!isElectron && ( -
- - -
- )} +
e.stopPropagation()}> + {renderBrowseButton({ fullWidth: true })} +
)}
@@ -570,8 +545,8 @@ export function DocumentUploadTab({ )}
- {/* FOLDER SELECTED */} - {selectedFolder && ( + {/* FOLDER SELECTED (Electron only — web flattens folder contents into file list) */} + {isElectron && selectedFolder && (
diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json index 47d08e921..3a8c0c632 100644 --- a/surfsense_web/messages/en.json +++ b/surfsense_web/messages/en.json @@ -376,11 +376,11 @@ "upload_documents": { "title": "Upload Documents", "subtitle": "Upload your files to make them searchable and accessible through AI-powered conversations.", - "file_size_limit": "Maximum file size: {maxMB}MB per file", - "upload_limits": "Upload files or entire folders", + "file_size_limit": "Maximum file size: 50MB per file", + "upload_limits": "Upload limit: {maxFiles} files, {maxSizeMB}MB total", "drop_files": "Drop files or folders here", "drag_drop": "Drag & drop files or folders here", - "or_browse": "or click to browse files and folders", + "or_browse": "or click to browse", "browse_files": "Browse Files", "browse_folder": "Browse Folder", "selected_files": "Selected Files ({count})", @@ -397,7 +397,14 @@ "file_types_desc": "These file types are supported based on your current ETL service configuration.", "file_too_large": "File Too Large", "file_too_large_desc": "\"{name}\" exceeds the {maxMB}MB per-file limit.", - "no_supported_files_in_folder": "No supported file types found in the selected folder." + "no_supported_files_in_folder": "No supported file types found in the selected folder.", + "remaining_capacity": "{files} files, {sizeMB}MB remaining", + "file_limit_reached": "File limit reached", + "file_limit_reached_desc": "Maximum of {max} files allowed", + "max_files_exceeded": "Too many files", + "max_files_exceeded_desc": "You can upload a maximum of {max} files at once", + "max_size_exceeded": "Total size exceeded", + "max_size_exceeded_desc": "Total upload size cannot exceed {max}MB" }, "add_webpage": { "title": "Add Webpages for Crawling", diff --git a/surfsense_web/messages/es.json b/surfsense_web/messages/es.json index e7761ba25..2de30d29d 100644 --- a/surfsense_web/messages/es.json +++ b/surfsense_web/messages/es.json @@ -376,11 +376,11 @@ "upload_documents": { "title": "Subir documentos", "subtitle": "Sube tus archivos para hacerlos buscables y accesibles a través de conversaciones con IA.", - "file_size_limit": "Tamaño máximo de archivo: {maxMB} MB por archivo", - "upload_limits": "Sube archivos o carpetas enteras", + "file_size_limit": "Tamaño máximo de archivo: 50 MB por archivo", + "upload_limits": "Límite de subida: {maxFiles} archivos, {maxSizeMB} MB en total", "drop_files": "Suelta archivos o carpetas aquí", "drag_drop": "Arrastra y suelta archivos o carpetas aquí", - "or_browse": "o haz clic para explorar archivos y carpetas", + "or_browse": "o haz clic para explorar", "browse_files": "Explorar archivos", "browse_folder": "Explorar carpeta", "selected_files": "Archivos seleccionados ({count})", @@ -397,7 +397,14 @@ "file_types_desc": "Estos tipos de archivo son soportados según la configuración actual de tu servicio ETL.", "file_too_large": "Archivo demasiado grande", "file_too_large_desc": "\"{name}\" excede el límite de {maxMB} MB por archivo.", - "no_supported_files_in_folder": "No se encontraron tipos de archivo compatibles en la carpeta seleccionada." + "no_supported_files_in_folder": "No se encontraron tipos de archivo compatibles en la carpeta seleccionada.", + "remaining_capacity": "{files} archivos, {sizeMB}MB restante", + "file_limit_reached": "Límite de archivos alcanzado", + "file_limit_reached_desc": "Máximo de {max} archivos permitidos", + "max_files_exceeded": "Demasiados archivos", + "max_files_exceeded_desc": "Puedes subir un máximo de {max} archivos a la vez", + "max_size_exceeded": "Tamaño total excedido", + "max_size_exceeded_desc": "El tamaño total de subida no puede exceder {max}MB" }, "add_webpage": { "title": "Agregar páginas web para rastreo", diff --git a/surfsense_web/messages/hi.json b/surfsense_web/messages/hi.json index 957533206..c27291e3b 100644 --- a/surfsense_web/messages/hi.json +++ b/surfsense_web/messages/hi.json @@ -376,11 +376,11 @@ "upload_documents": { "title": "दस्तावेज़ अपलोड करें", "subtitle": "AI-संचालित बातचीत के माध्यम से अपनी फ़ाइलों को खोजने योग्य और सुलभ बनाने के लिए अपलोड करें।", - "file_size_limit": "अधिकतम फ़ाइल आकार: प्रति फ़ाइल {maxMB}MB", - "upload_limits": "फ़ाइलें या पूरे फ़ोल्डर अपलोड करें", + "file_size_limit": "अधिकतम फ़ाइल आकार: प्रति फ़ाइल 50MB", + "upload_limits": "अपलोड सीमा: {maxFiles} फ़ाइलें, कुल {maxSizeMB}MB", "drop_files": "फ़ाइलें या फ़ोल्डर यहां छोड़ें", "drag_drop": "फ़ाइलें या फ़ोल्डर यहां खींचें और छोड़ें", - "or_browse": "या फ़ाइलें और फ़ोल्डर ब्राउज़ करने के लिए क्लिक करें", + "or_browse": "या ब्राउज़ करने के लिए क्लिक करें", "browse_files": "फ़ाइलें ब्राउज़ करें", "browse_folder": "फ़ोल्डर ब्राउज़ करें", "selected_files": "चयनित फ़ाइलें ({count})", @@ -397,7 +397,14 @@ "file_types_desc": "ये फ़ाइल प्रकार आपकी वर्तमान ETL सेवा कॉन्फ़िगरेशन के आधार पर समर्थित हैं।", "file_too_large": "फ़ाइल बहुत बड़ी है", "file_too_large_desc": "\"{name}\" प्रति फ़ाइल {maxMB}MB की सीमा से अधिक है।", - "no_supported_files_in_folder": "चयनित फ़ोल्डर में कोई समर्थित फ़ाइल प्रकार नहीं मिला।" + "no_supported_files_in_folder": "चयनित फ़ोल्डर में कोई समर्थित फ़ाइल प्रकार नहीं मिला।", + "remaining_capacity": "{files} फ़ाइलें, {sizeMB}MB शेष", + "file_limit_reached": "फ़ाइल सीमा पूरी हो गई", + "file_limit_reached_desc": "अधिकतम {max} फ़ाइलें अनुमत हैं", + "max_files_exceeded": "बहुत सारी फ़ाइलें", + "max_files_exceeded_desc": "आप एक बार में अधिकतम {max} फ़ाइलें अपलोड कर सकते हैं", + "max_size_exceeded": "कुल आकार सीमा पार", + "max_size_exceeded_desc": "कुल अपलोड आकार {max}MB से अधिक नहीं हो सकता" }, "add_webpage": { "title": "क्रॉलिंग के लिए वेबपेज जोड़ें", diff --git a/surfsense_web/messages/pt.json b/surfsense_web/messages/pt.json index 9aec7af48..eeb417a27 100644 --- a/surfsense_web/messages/pt.json +++ b/surfsense_web/messages/pt.json @@ -376,11 +376,11 @@ "upload_documents": { "title": "Enviar documentos", "subtitle": "Envie seus arquivos para torná-los pesquisáveis e acessíveis através de conversas com IA.", - "file_size_limit": "Tamanho máximo do arquivo: {maxMB} MB por arquivo", - "upload_limits": "Envie arquivos ou pastas inteiras", + "file_size_limit": "Tamanho máximo do arquivo: 50 MB por arquivo", + "upload_limits": "Limite de envio: {maxFiles} arquivos, {maxSizeMB} MB no total", "drop_files": "Solte arquivos ou pastas aqui", "drag_drop": "Arraste e solte arquivos ou pastas aqui", - "or_browse": "ou clique para navegar arquivos e pastas", + "or_browse": "ou clique para navegar", "browse_files": "Navegar arquivos", "browse_folder": "Navegar pasta", "selected_files": "Arquivos selecionados ({count})", @@ -397,7 +397,14 @@ "file_types_desc": "Estes tipos de arquivo são suportados com base na configuração atual do seu serviço ETL.", "file_too_large": "Arquivo muito grande", "file_too_large_desc": "\"{name}\" excede o limite de {maxMB} MB por arquivo.", - "no_supported_files_in_folder": "Nenhum tipo de arquivo suportado encontrado na pasta selecionada." + "no_supported_files_in_folder": "Nenhum tipo de arquivo suportado encontrado na pasta selecionada.", + "remaining_capacity": "{files} arquivos, {sizeMB}MB restante", + "file_limit_reached": "Limite de arquivos atingido", + "file_limit_reached_desc": "Máximo de {max} arquivos permitidos", + "max_files_exceeded": "Muitos arquivos", + "max_files_exceeded_desc": "Você pode enviar no máximo {max} arquivos de uma vez", + "max_size_exceeded": "Tamanho total excedido", + "max_size_exceeded_desc": "O tamanho total do envio não pode exceder {max}MB" }, "add_webpage": { "title": "Adicionar páginas web para rastreamento", diff --git a/surfsense_web/messages/zh.json b/surfsense_web/messages/zh.json index 3ceab2443..2ee18a346 100644 --- a/surfsense_web/messages/zh.json +++ b/surfsense_web/messages/zh.json @@ -360,11 +360,11 @@ "upload_documents": { "title": "上传文档", "subtitle": "上传您的文件,使其可通过 AI 对话进行搜索和访问。", - "file_size_limit": "最大文件大小:每个文件 {maxMB}MB", - "upload_limits": "上传文件或整个文件夹", + "file_size_limit": "最大文件大小:每个文件 50MB", + "upload_limits": "上传限制:最多 {maxFiles} 个文件,总大小不超过 {maxSizeMB}MB", "drop_files": "将文件或文件夹拖放到此处", - "drag_drop": "将文件或文件夹拖放到此处", - "or_browse": "或点击浏览文件和文件夹", + "drag_drop": "拖放文件或文件夹到这里", + "or_browse": "或点击浏览", "browse_files": "浏览文件", "browse_folder": "浏览文件夹", "selected_files": "已选择的文件 ({count})", @@ -381,7 +381,14 @@ "file_types_desc": "根据您当前的 ETL 服务配置支持这些文件类型。", "file_too_large": "文件过大", "file_too_large_desc": "\"{name}\" 超过了每个文件 {maxMB}MB 的限制。", - "no_supported_files_in_folder": "所选文件夹中没有找到支持的文件类型。" + "no_supported_files_in_folder": "所选文件夹中没有找到支持的文件类型。", + "remaining_capacity": "剩余 {files} 个文件,{sizeMB}MB", + "file_limit_reached": "已达文件数量上限", + "file_limit_reached_desc": "最多允许 {max} 个文件", + "max_files_exceeded": "文件数量过多", + "max_files_exceeded_desc": "一次最多上传 {max} 个文件", + "max_size_exceeded": "总大小超出限制", + "max_size_exceeded_desc": "总上传大小不能超过 {max}MB" }, "add_webpage": { "title": "添加网页爬取", From 8171605fae6737b4e95cc07d05fe5467493265dd Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 13:05:42 +0530 Subject: [PATCH 50/61] refactor: remove metadata viewing functionality from FolderNode, FolderTreeView, and DocumentsSidebar components --- .../components/documents/FolderNode.tsx | 10 +----- .../components/documents/FolderTreeView.tsx | 3 -- .../layout/ui/sidebar/DocumentsSidebar.tsx | 34 ------------------- 3 files changed, 1 insertion(+), 46 deletions(-) diff --git a/surfsense_web/components/documents/FolderNode.tsx b/surfsense_web/components/documents/FolderNode.tsx index 41c1d8f73..909f965f9 100644 --- a/surfsense_web/components/documents/FolderNode.tsx +++ b/surfsense_web/components/documents/FolderNode.tsx @@ -80,7 +80,6 @@ interface FolderNodeProps { isWatched?: boolean; onRescan?: (folder: FolderDisplay) => void; onStopWatching?: (folder: FolderDisplay) => void; - onViewMetadata?: (folder: FolderDisplay) => void; } function getDropZone( @@ -122,7 +121,6 @@ export const FolderNode = React.memo(function FolderNode({ isWatched, onRescan, onStopWatching, - onViewMetadata, }: FolderNodeProps) { const [renameValue, setRenameValue] = useState(folder.name); const inputRef = useRef(null); @@ -258,13 +256,7 @@ export const FolderNode = React.memo(function FolderNode({ isOver && !canDrop && "cursor-not-allowed" )} style={{ paddingLeft: `${depth * 16 + 4}px` }} - onClick={(e) => { - if ((e.ctrlKey || e.metaKey) && onViewMetadata) { - e.preventDefault(); - e.stopPropagation(); - onViewMetadata(folder); - return; - } + onClick={() => { onToggleExpand(folder.id); }} onKeyDown={(e) => { diff --git a/surfsense_web/components/documents/FolderTreeView.tsx b/surfsense_web/components/documents/FolderTreeView.tsx index 6f64d6258..3aa8ce9d8 100644 --- a/surfsense_web/components/documents/FolderTreeView.tsx +++ b/surfsense_web/components/documents/FolderTreeView.tsx @@ -44,7 +44,6 @@ interface FolderTreeViewProps { watchedFolderIds?: Set; onRescanFolder?: (folder: FolderDisplay) => void; onStopWatchingFolder?: (folder: FolderDisplay) => void; - onViewFolderMetadata?: (folder: FolderDisplay) => void; } function groupBy(items: T[], keyFn: (item: T) => string | number): Record { @@ -82,7 +81,6 @@ export function FolderTreeView({ watchedFolderIds, onRescanFolder, onStopWatchingFolder, - onViewFolderMetadata, }: FolderTreeViewProps) { const foldersByParent = useMemo(() => groupBy(folders, (f) => f.parentId ?? "root"), [folders]); @@ -247,7 +245,6 @@ export function FolderTreeView({ isWatched={watchedFolderIds?.has(f.id)} onRescan={onRescanFolder} onStopWatching={onStopWatchingFolder} - onViewMetadata={onViewFolderMetadata} /> ); diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index 24f6666c9..c10c5dc82 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -22,7 +22,6 @@ import type { FolderDisplay } from "@/components/documents/FolderNode"; import { FolderPickerDialog } from "@/components/documents/FolderPickerDialog"; import { FolderTreeView } from "@/components/documents/FolderTreeView"; import { VersionHistoryDialog } from "@/components/documents/version-history"; -import { JsonMetadataViewer } from "@/components/json-metadata-viewer"; import { EXPORT_FILE_EXTENSIONS } from "@/components/shared/ExportMenuItems"; import { AlertDialog, @@ -97,10 +96,6 @@ export function DocumentsSidebar({ const [activeTypes, setActiveTypes] = useState([]); const [watchedFolderIds, setWatchedFolderIds] = useState>(new Set()); - const [metadataFolder, setMetadataFolder] = useState(null); - const [metadataJson, setMetadataJson] = useState | null>(null); - const [metadataLoading, setMetadataLoading] = useState(false); - useEffect(() => { const api = typeof window !== "undefined" ? window.electronAPI : null; if (!api?.getWatchedFolders) return; @@ -333,20 +328,6 @@ export function DocumentsSidebar({ [] ); - const handleViewFolderMetadata = useCallback(async (folder: FolderDisplay) => { - setMetadataFolder(folder); - setMetadataLoading(true); - try { - const fullFolder = await foldersApiService.getFolder(folder.id); - setMetadataJson((fullFolder.metadata as Record) ?? {}); - } catch (err) { - console.error("[DocumentsSidebar] Failed to fetch folder metadata:", err); - setMetadataJson({ error: "Failed to load folder metadata" }); - } finally { - setMetadataLoading(false); - } - }, []); - const handleRenameFolder = useCallback(async (folder: FolderDisplay, newName: string) => { try { await foldersApiService.updateFolder(folder.id, { name: newName }); @@ -836,25 +817,10 @@ export function DocumentsSidebar({ watchedFolderIds={watchedFolderIds} onRescanFolder={handleRescanFolder} onStopWatchingFolder={handleStopWatching} - onViewFolderMetadata={handleViewFolderMetadata} />
- { - if (!open) { - setMetadataFolder(null); - setMetadataJson(null); - setMetadataLoading(false); - } - }} - /> - {versionDocId !== null && ( Date: Fri, 3 Apr 2026 13:10:25 +0530 Subject: [PATCH 51/61] fix: update button alignment in InlineCitation component for consistent styling --- surfsense_web/components/assistant-ui/inline-citation.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/surfsense_web/components/assistant-ui/inline-citation.tsx b/surfsense_web/components/assistant-ui/inline-citation.tsx index 15ad11d94..42144f1d6 100644 --- a/surfsense_web/components/assistant-ui/inline-citation.tsx +++ b/surfsense_web/components/assistant-ui/inline-citation.tsx @@ -32,8 +32,7 @@ export const InlineCitation: FC = ({ chunkId, isDocsChunk = From 746c730b2e03ae23a718d53023bf473fa673d3f7 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 13:14:40 +0530 Subject: [PATCH 52/61] chore: ran linting --- .../app/routes/documents_routes.py | 63 ++--- surfsense_backend/app/routes/editor_routes.py | 6 +- .../routes/search_source_connectors_routes.py | 1 - surfsense_backend/app/schemas/folders.py | 7 +- .../app/tasks/celery_tasks/document_tasks.py | 15 +- .../local_folder_indexer.py | 215 +++++++++++++----- .../app/utils/document_versioning.py | 6 +- .../tests/integration/conftest.py | 2 - .../test_local_folder_pipeline.py | 198 +++++++++------- .../integration/test_document_versioning.py | 55 ++--- .../test_local_folder_scan.py | 4 +- .../app/(home)/login/LocalLoginForm.tsx | 20 +- surfsense_web/app/(home)/register/page.tsx | 131 +++++------ .../(manage)/components/DocumentsFilters.tsx | 22 +- .../components/PromptsContent.tsx | 7 +- .../assistant-ui/connector-popup.tsx | 5 +- .../views/connector-edit-view.tsx | 58 ++--- .../assistant-ui/document-upload-popup.tsx | 5 +- .../assistant-ui/inline-citation.tsx | 3 +- .../components/documents/DocumentNode.tsx | 17 +- .../components/documents/FolderNode.tsx | 186 +++++++-------- .../components/documents/FolderTreeView.tsx | 8 +- .../components/documents/version-history.tsx | 42 ++-- .../components/editor-panel/editor-panel.tsx | 39 ++-- .../layout/ui/sidebar/DocumentsSidebar.tsx | 185 ++++++++------- .../layout/ui/tabs/DocumentTabContent.tsx | 4 +- .../new-chat/source-detail-panel.tsx | 4 +- .../components/settings/llm-role-manager.tsx | 41 ++-- .../components/sources/DocumentUploadTab.tsx | 73 +++--- .../lib/apis/connectors-api.service.ts | 1 - .../lib/apis/documents-api.service.ts | 38 +++- 31 files changed, 801 insertions(+), 660 deletions(-) diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index c86cdab3f..5008b1a10 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -977,15 +977,19 @@ async def get_watched_folders( ) folders = ( - await session.execute( - select(Folder).where( - Folder.search_space_id == search_space_id, - Folder.parent_id.is_(None), - Folder.folder_metadata.isnot(None), - Folder.folder_metadata["watched"].astext == "true", + ( + await session.execute( + select(Folder).where( + Folder.search_space_id == search_space_id, + Folder.parent_id.is_(None), + Folder.folder_metadata.isnot(None), + Folder.folder_metadata["watched"].astext == "true", + ) ) ) - ).scalars().all() + .scalars() + .all() + ) return folders @@ -1265,15 +1269,21 @@ async def list_document_versions( if not document: raise HTTPException(status_code=404, detail="Document not found") - await check_permission(session, user, document.search_space_id, Permission.DOCUMENTS_READ.value) + await check_permission( + session, user, document.search_space_id, Permission.DOCUMENTS_READ.value + ) versions = ( - await session.execute( - select(DocumentVersion) - .where(DocumentVersion.document_id == document_id) - .order_by(DocumentVersion.version_number.desc()) + ( + await session.execute( + select(DocumentVersion) + .where(DocumentVersion.document_id == document_id) + .order_by(DocumentVersion.version_number.desc()) + ) ) - ).scalars().all() + .scalars() + .all() + ) return [ { @@ -1300,7 +1310,9 @@ async def get_document_version( if not document: raise HTTPException(status_code=404, detail="Document not found") - await check_permission(session, user, document.search_space_id, Permission.DOCUMENTS_READ.value) + await check_permission( + session, user, document.search_space_id, Permission.DOCUMENTS_READ.value + ) version = ( await session.execute( @@ -1331,14 +1343,14 @@ async def restore_document_version( ): """Restore a previous version: snapshot current state, then overwrite document content.""" document = ( - await session.execute( - select(Document).where(Document.id == document_id) - ) + await session.execute(select(Document).where(Document.id == document_id)) ).scalar_one_or_none() if not document: raise HTTPException(status_code=404, detail="Document not found") - await check_permission(session, user, document.search_space_id, Permission.DOCUMENTS_UPDATE.value) + await check_permission( + session, user, document.search_space_id, Permission.DOCUMENTS_UPDATE.value + ) version = ( await session.execute( @@ -1363,6 +1375,7 @@ async def restore_document_version( await session.commit() from app.tasks.celery_tasks.document_reindex_tasks import reindex_document_task + reindex_document_task.delay(document_id, str(user.id)) return { @@ -1430,9 +1443,7 @@ async def folder_index( root_folder_id = request.root_folder_id if root_folder_id: existing = ( - await session.execute( - select(Folder).where(Folder.id == root_folder_id) - ) + await session.execute(select(Folder).where(Folder.id == root_folder_id)) ).scalar_one_or_none() if not existing: root_folder_id = None @@ -1492,7 +1503,9 @@ async def folder_index_files( ) if not request.target_file_paths: - raise HTTPException(status_code=400, detail="target_file_paths must not be empty") + raise HTTPException( + status_code=400, detail="target_file_paths must not be empty" + ) await check_permission( session, @@ -1507,11 +1520,11 @@ async def folder_index_files( for fp in request.target_file_paths: try: Path(fp).relative_to(request.folder_path) - except ValueError: + except ValueError as err: raise HTTPException( status_code=400, detail=f"target_file_path {fp} must be inside folder_path", - ) + ) from err from app.tasks.celery_tasks.document_tasks import index_local_folder_task @@ -1530,5 +1543,3 @@ async def folder_index_files( "status": "processing", "file_count": len(request.target_file_paths), } - - diff --git a/surfsense_backend/app/routes/editor_routes.py b/surfsense_backend/app/routes/editor_routes.py index a0505f62f..829b2cf69 100644 --- a/surfsense_backend/app/routes/editor_routes.py +++ b/surfsense_backend/app/routes/editor_routes.py @@ -129,7 +129,11 @@ async def get_editor_content( if not chunk_contents: doc_status = document.status or {} - state = doc_status.get("state", "ready") if isinstance(doc_status, dict) else "ready" + state = ( + doc_status.get("state", "ready") + if isinstance(doc_status, dict) + else "ready" + ) if state in ("pending", "processing"): raise HTTPException( status_code=409, diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index f49ba2d5d..d208ff910 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -20,7 +20,6 @@ Non-OAuth connectors (BookStack, GitHub, etc.) are limited to one per search spa import asyncio import logging -import os from contextlib import suppress from datetime import UTC, datetime, timedelta from typing import Any diff --git a/surfsense_backend/app/schemas/folders.py b/surfsense_backend/app/schemas/folders.py index e8bdf3821..a7e065144 100644 --- a/surfsense_backend/app/schemas/folders.py +++ b/surfsense_backend/app/schemas/folders.py @@ -1,9 +1,8 @@ """Pydantic schemas for folder CRUD, move, and reorder operations.""" from datetime import datetime -from uuid import UUID - from typing import Any +from uuid import UUID from pydantic import BaseModel, ConfigDict, Field @@ -36,7 +35,9 @@ class FolderRead(BaseModel): created_by_id: UUID | None created_at: datetime updated_at: datetime - metadata: dict[str, Any] | None = Field(default=None, validation_alias="folder_metadata") + metadata: dict[str, Any] | None = Field( + default=None, validation_alias="folder_metadata" + ) model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py index 506f8118c..4e9249d34 100644 --- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py @@ -1,6 +1,7 @@ """Celery tasks for document processing.""" import asyncio +import contextlib import logging import os from uuid import UUID @@ -1337,9 +1338,7 @@ async def _index_local_folder_async( ) notification_id = notification.id _start_heartbeat(notification_id) - heartbeat_task = asyncio.create_task( - _run_heartbeat_loop(notification_id) - ) + heartbeat_task = asyncio.create_task(_run_heartbeat_loop(notification_id)) except Exception: logger.warning( "Failed to create notification for local folder indexing", @@ -1349,18 +1348,16 @@ async def _index_local_folder_async( async def _heartbeat_progress(completed_count: int) -> None: """Refresh heartbeat and optionally update notification progress.""" if notification: - try: + with contextlib.suppress(Exception): await NotificationService.document_processing.notify_processing_progress( session=session, notification=notification, stage="indexing", stage_message=f"Syncing files ({completed_count}/{file_count or '?'})", ) - except Exception: - pass try: - indexed, skipped_or_failed, _rfid, err = await index_local_folder( + _indexed, _skipped_or_failed, _rfid, err = await index_local_folder( session=session, search_space_id=search_space_id, user_id=user_id, @@ -1371,7 +1368,9 @@ async def _index_local_folder_async( root_folder_id=root_folder_id, enable_summary=enable_summary, target_file_paths=target_file_paths, - on_heartbeat_callback=_heartbeat_progress if (is_batch or is_full_scan) else None, + on_heartbeat_callback=_heartbeat_progress + if (is_batch or is_full_scan) + else None, ) if notification: diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index 4ac8cc594..539cfdd32 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -43,30 +43,110 @@ from .base import ( logger, ) -PLAINTEXT_EXTENSIONS = frozenset({ - ".md", ".markdown", ".txt", ".text", ".csv", ".tsv", - ".json", ".jsonl", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf", - ".xml", ".html", ".htm", ".css", ".scss", ".less", ".sass", - ".py", ".pyw", ".pyi", ".pyx", - ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", - ".java", ".kt", ".kts", ".scala", ".groovy", - ".c", ".h", ".cpp", ".cxx", ".cc", ".hpp", ".hxx", - ".cs", ".fs", ".fsx", - ".go", ".rs", ".rb", ".php", ".pl", ".pm", ".lua", - ".swift", ".m", ".mm", - ".r", ".R", ".jl", - ".sh", ".bash", ".zsh", ".fish", ".bat", ".cmd", ".ps1", - ".sql", ".graphql", ".gql", - ".env", ".gitignore", ".dockerignore", ".editorconfig", - ".makefile", ".cmake", - ".log", ".rst", ".tex", ".bib", ".org", ".adoc", ".asciidoc", - ".vue", ".svelte", ".astro", - ".tf", ".hcl", ".proto", -}) +PLAINTEXT_EXTENSIONS = frozenset( + { + ".md", + ".markdown", + ".txt", + ".text", + ".csv", + ".tsv", + ".json", + ".jsonl", + ".yaml", + ".yml", + ".toml", + ".ini", + ".cfg", + ".conf", + ".xml", + ".html", + ".htm", + ".css", + ".scss", + ".less", + ".sass", + ".py", + ".pyw", + ".pyi", + ".pyx", + ".js", + ".jsx", + ".ts", + ".tsx", + ".mjs", + ".cjs", + ".java", + ".kt", + ".kts", + ".scala", + ".groovy", + ".c", + ".h", + ".cpp", + ".cxx", + ".cc", + ".hpp", + ".hxx", + ".cs", + ".fs", + ".fsx", + ".go", + ".rs", + ".rb", + ".php", + ".pl", + ".pm", + ".lua", + ".swift", + ".m", + ".mm", + ".r", + ".R", + ".jl", + ".sh", + ".bash", + ".zsh", + ".fish", + ".bat", + ".cmd", + ".ps1", + ".sql", + ".graphql", + ".gql", + ".env", + ".gitignore", + ".dockerignore", + ".editorconfig", + ".makefile", + ".cmake", + ".log", + ".rst", + ".tex", + ".bib", + ".org", + ".adoc", + ".asciidoc", + ".vue", + ".svelte", + ".astro", + ".tf", + ".hcl", + ".proto", + } +) -AUDIO_EXTENSIONS = frozenset({ - ".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", -}) +AUDIO_EXTENSIONS = frozenset( + { + ".mp3", + ".mp4", + ".mpeg", + ".mpga", + ".m4a", + ".wav", + ".webm", + } +) def _is_plaintext_file(filename: str) -> bool: @@ -81,6 +161,7 @@ def _needs_etl(filename: str) -> bool: """File is not plaintext and not audio — requires ETL service to parse.""" return not _is_plaintext_file(filename) and not _is_audio_file(filename) + HeartbeatCallbackType = Callable[[int], Awaitable[None]] DEFAULT_EXCLUDE_PATTERNS = [ @@ -121,9 +202,7 @@ def scan_folder( for dirpath, dirnames, filenames in os.walk(root): rel_dir = Path(dirpath).relative_to(root) - dirnames[:] = [ - d for d in dirnames if d not in exclude_patterns - ] + dirnames[:] = [d for d in dirnames if d not in exclude_patterns] if any(part in exclude_patterns for part in rel_dir.parts): continue @@ -134,9 +213,11 @@ def scan_folder( full = Path(dirpath) / fname - if file_extensions is not None: - if full.suffix.lower() not in file_extensions: - continue + if ( + file_extensions is not None + and full.suffix.lower() not in file_extensions + ): + continue try: stat = full.stat() @@ -209,11 +290,14 @@ def _content_hash(content: str, search_space_id: int) -> str: pipeline so that dedup checks are consistent. """ import hashlib - return hashlib.sha256(f"{search_space_id}:{content}".encode("utf-8")).hexdigest() + + return hashlib.sha256(f"{search_space_id}:{content}".encode()).hexdigest() async def _compute_file_content_hash( - file_path: str, filename: str, search_space_id: int, + file_path: str, + filename: str, + search_space_id: int, ) -> tuple[str, str]: """Read a file (via ETL if needed) and compute its content hash. @@ -257,9 +341,7 @@ async def _mirror_folder_structure( if root_folder_id: existing = ( - await session.execute( - select(Folder).where(Folder.id == root_folder_id) - ) + await session.execute(select(Folder).where(Folder.id == root_folder_id)) ).scalar_one_or_none() if existing: mapping[""] = existing.id @@ -412,13 +494,17 @@ async def _cleanup_empty_folders( id_to_rel: dict[int, str] = {fid: rel for rel, fid in folder_mapping.items() if rel} all_folders = ( - await session.execute( - select(Folder).where( - Folder.search_space_id == search_space_id, - Folder.id != root_folder_id, + ( + await session.execute( + select(Folder).where( + Folder.search_space_id == search_space_id, + Folder.id != root_folder_id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) candidates: list[Folder] = [] for folder in all_folders: @@ -520,7 +606,9 @@ async def index_local_folder( metadata={ "folder_path": folder_path, "user_id": str(user_id), - "target_file_paths_count": len(target_file_paths) if target_file_paths else None, + "target_file_paths_count": len(target_file_paths) + if target_file_paths + else None, }, ) @@ -532,7 +620,12 @@ async def index_local_folder( "Folder not found", {}, ) - return 0, 0, root_folder_id, f"Folder path missing or does not exist: {folder_path}" + return ( + 0, + 0, + root_folder_id, + f"Folder path missing or does not exist: {folder_path}", + ) if exclude_patterns is None: exclude_patterns = DEFAULT_EXCLUDE_PATTERNS @@ -639,7 +732,9 @@ async def index_local_folder( ) if existing_document: - stored_mtime = (existing_document.document_metadata or {}).get("mtime") + stored_mtime = (existing_document.document_metadata or {}).get( + "mtime" + ) current_mtime = file_info["modified_at"].timestamp() if stored_mtime and abs(current_mtime - stored_mtime) < 1.0: @@ -709,23 +804,31 @@ async def index_local_folder( # ================================================================ all_root_folder_ids = set(folder_mapping.values()) all_db_folders = ( - await session.execute( - select(Folder.id).where( - Folder.search_space_id == search_space_id, + ( + await session.execute( + select(Folder.id).where( + Folder.search_space_id == search_space_id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) all_root_folder_ids.update(all_db_folders) all_folder_docs = ( - await session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == search_space_id, - Document.folder_id.in_(list(all_root_folder_ids)), + ( + await session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == search_space_id, + Document.folder_id.in_(list(all_root_folder_ids)), + ) ) ) - ).scalars().all() + .scalars() + .all() + ) for doc in all_folder_docs: if doc.unique_identifier_hash not in seen_unique_hashes: @@ -742,9 +845,7 @@ async def index_local_folder( ) pipeline = IndexingPipelineService(session) - doc_map = { - compute_unique_identifier_hash(cd): cd for cd in connector_docs - } + doc_map = {compute_unique_identifier_hash(cd): cd for cd in connector_docs} documents = await pipeline.prepare_for_indexing(connector_docs) # Assign folder_id immediately so docs appear in the correct @@ -1033,7 +1134,9 @@ async def _index_single_file( db_doc.document_metadata = doc_meta await session.commit() - indexed = 1 if DocumentStatus.is_state(db_doc.status, DocumentStatus.READY) else 0 + indexed = ( + 1 if DocumentStatus.is_state(db_doc.status, DocumentStatus.READY) else 0 + ) failed_msg = None if indexed else "Indexing failed" if indexed: diff --git a/surfsense_backend/app/utils/document_versioning.py b/surfsense_backend/app/utils/document_versioning.py index 889bc4a3a..e6ad1fb06 100644 --- a/surfsense_backend/app/utils/document_versioning.py +++ b/surfsense_backend/app/utils/document_versioning.py @@ -83,9 +83,9 @@ async def create_version_snapshot( # Cleanup: cap at MAX_VERSIONS_PER_DOCUMENT count = ( await session.execute( - select(func.count()).select_from(DocumentVersion).where( - DocumentVersion.document_id == document.id - ) + select(func.count()) + .select_from(DocumentVersion) + .where(DocumentVersion.document_id == document.id) ) ).scalar_one() diff --git a/surfsense_backend/tests/integration/conftest.py b/surfsense_backend/tests/integration/conftest.py index 9c91011ae..d9d7cacae 100644 --- a/surfsense_backend/tests/integration/conftest.py +++ b/surfsense_backend/tests/integration/conftest.py @@ -166,5 +166,3 @@ def make_connector_document(db_connector, db_user): return ConnectorDocument(**defaults) return _make - - diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py index 67254ec93..4062c3a3b 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py @@ -21,7 +21,9 @@ from app.db import ( pytestmark = pytest.mark.integration UNIFIED_FIXTURES = ( - "patched_summarize", "patched_embed_texts", "patched_chunk_text", + "patched_summarize", + "patched_embed_texts", + "patched_chunk_text", ) @@ -37,6 +39,7 @@ class _FakeSessionMaker: @asynccontextmanager async def _ctx(): yield self._session + return _ctx() @@ -59,7 +62,6 @@ def patched_batch_sessions(monkeypatch, db_session): class TestFullIndexer: - @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_i1_new_file_indexed( self, @@ -73,7 +75,7 @@ class TestFullIndexer: (tmp_path / "note.md").write_text("# Hello World\n\nContent here.") - count, skipped, root_folder_id, err = await index_local_folder( + count, _skipped, _root_folder_id, err = await index_local_folder( session=db_session, search_space_id=db_search_space.id, user_id=str(db_user.id), @@ -85,13 +87,17 @@ class TestFullIndexer: assert count == 1 docs = ( - await db_session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) assert len(docs) == 1 assert docs[0].document_type == DocumentType.LOCAL_FOLDER_FILE assert DocumentStatus.is_state(docs[0].status, DocumentStatus.READY) @@ -130,7 +136,9 @@ class TestFullIndexer: total = ( await db_session.execute( - select(func.count()).select_from(Document).where( + select(func.count()) + .select_from(Document) + .where( Document.document_type == DocumentType.LOCAL_FOLDER_FILE, Document.search_space_id == db_search_space.id, ) @@ -174,13 +182,19 @@ class TestFullIndexer: assert count == 1 versions = ( - await db_session.execute( - select(DocumentVersion).join(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(DocumentVersion) + .join(Document) + .where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) assert len(versions) >= 1 @pytest.mark.usefixtures(*UNIFIED_FIXTURES) @@ -207,7 +221,9 @@ class TestFullIndexer: docs_before = ( await db_session.execute( - select(func.count()).select_from(Document).where( + select(func.count()) + .select_from(Document) + .where( Document.document_type == DocumentType.LOCAL_FOLDER_FILE, Document.search_space_id == db_search_space.id, ) @@ -228,7 +244,9 @@ class TestFullIndexer: docs_after = ( await db_session.execute( - select(func.count()).select_from(Document).where( + select(func.count()) + .select_from(Document) + .where( Document.document_type == DocumentType.LOCAL_FOLDER_FILE, Document.search_space_id == db_search_space.id, ) @@ -262,13 +280,17 @@ class TestFullIndexer: assert count == 1 docs = ( - await db_session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) assert len(docs) == 1 assert docs[0].title == "b.md" @@ -279,7 +301,6 @@ class TestFullIndexer: class TestFolderMirroring: - @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_f1_root_folder_created( self, @@ -335,10 +356,14 @@ class TestFolderMirroring: ) folders = ( - await db_session.execute( - select(Folder).where(Folder.search_space_id == db_search_space.id) + ( + await db_session.execute( + select(Folder).where(Folder.search_space_id == db_search_space.id) + ) ) - ).scalars().all() + .scalars() + .all() + ) folder_names = {f.name for f in folders} assert "notes" in folder_names @@ -376,10 +401,14 @@ class TestFolderMirroring: ) folders_before = ( - await db_session.execute( - select(Folder).where(Folder.search_space_id == db_search_space.id) + ( + await db_session.execute( + select(Folder).where(Folder.search_space_id == db_search_space.id) + ) ) - ).scalars().all() + .scalars() + .all() + ) ids_before = {f.id for f in folders_before} await index_local_folder( @@ -392,10 +421,14 @@ class TestFolderMirroring: ) folders_after = ( - await db_session.execute( - select(Folder).where(Folder.search_space_id == db_search_space.id) + ( + await db_session.execute( + select(Folder).where(Folder.search_space_id == db_search_space.id) + ) ) - ).scalars().all() + .scalars() + .all() + ) ids_after = {f.id for f in folders_after} assert ids_before == ids_after @@ -425,21 +458,23 @@ class TestFolderMirroring: ) docs = ( - await db_session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) today_doc = next(d for d in docs if d.title == "today.md") root_doc = next(d for d in docs if d.title == "root.md") daily_folder = ( - await db_session.execute( - select(Folder).where(Folder.name == "daily") - ) + await db_session.execute(select(Folder).where(Folder.name == "daily")) ).scalar_one() assert today_doc.folder_id == daily_folder.id @@ -455,9 +490,10 @@ class TestFolderMirroring: tmp_path: Path, ): """F5: Deleted dir's empty Folder row is cleaned up on re-sync.""" - from app.tasks.connector_indexers.local_folder_indexer import index_local_folder import shutil + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + daily = tmp_path / "notes" / "daily" daily.mkdir(parents=True) weekly = tmp_path / "notes" / "weekly" @@ -474,9 +510,7 @@ class TestFolderMirroring: ) weekly_folder = ( - await db_session.execute( - select(Folder).where(Folder.name == "weekly") - ) + await db_session.execute(select(Folder).where(Folder.name == "weekly")) ).scalar_one_or_none() assert weekly_folder is not None @@ -492,16 +526,12 @@ class TestFolderMirroring: ) weekly_after = ( - await db_session.execute( - select(Folder).where(Folder.name == "weekly") - ) + await db_session.execute(select(Folder).where(Folder.name == "weekly")) ).scalar_one_or_none() assert weekly_after is None daily_after = ( - await db_session.execute( - select(Folder).where(Folder.name == "daily") - ) + await db_session.execute(select(Folder).where(Folder.name == "daily")) ).scalar_one_or_none() assert daily_after is not None @@ -551,18 +581,14 @@ class TestFolderMirroring: ).scalar_one() daily_folder = ( - await db_session.execute( - select(Folder).where(Folder.name == "daily") - ) + await db_session.execute(select(Folder).where(Folder.name == "daily")) ).scalar_one() assert doc.folder_id == daily_folder.id assert daily_folder.parent_id is not None notes_folder = ( - await db_session.execute( - select(Folder).where(Folder.name == "notes") - ) + await db_session.execute(select(Folder).where(Folder.name == "notes")) ).scalar_one() assert daily_folder.parent_id == notes_folder.id assert notes_folder.parent_id == root_folder_id @@ -592,9 +618,7 @@ class TestFolderMirroring: ) eph_folder = ( - await db_session.execute( - select(Folder).where(Folder.name == "ephemeral") - ) + await db_session.execute(select(Folder).where(Folder.name == "ephemeral")) ).scalar_one_or_none() assert eph_folder is not None @@ -612,16 +636,12 @@ class TestFolderMirroring: ) eph_after = ( - await db_session.execute( - select(Folder).where(Folder.name == "ephemeral") - ) + await db_session.execute(select(Folder).where(Folder.name == "ephemeral")) ).scalar_one_or_none() assert eph_after is None notes_after = ( - await db_session.execute( - select(Folder).where(Folder.name == "notes") - ) + await db_session.execute(select(Folder).where(Folder.name == "notes")) ).scalar_one_or_none() assert notes_after is None @@ -632,7 +652,6 @@ class TestFolderMirroring: class TestBatchMode: - @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_b1_batch_indexes_multiple_files( self, @@ -649,7 +668,7 @@ class TestBatchMode: (tmp_path / "b.md").write_text("File B content") (tmp_path / "c.md").write_text("File C content") - count, failed, root_folder_id, err = await index_local_folder( + count, failed, _root_folder_id, err = await index_local_folder( session=db_session, search_space_id=db_search_space.id, user_id=str(db_user.id), @@ -667,13 +686,17 @@ class TestBatchMode: assert err is None docs = ( - await db_session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) assert len(docs) == 3 assert {d.title for d in docs} == {"a.md", "b.md", "c.md"} assert all( @@ -714,13 +737,17 @@ class TestBatchMode: assert err is not None docs = ( - await db_session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) assert len(docs) == 2 assert {d.title for d in docs} == {"good1.md", "good2.md"} @@ -731,7 +758,6 @@ class TestBatchMode: class TestPipelineIntegration: - @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_p1_local_folder_file_through_pipeline( self, @@ -742,7 +768,9 @@ class TestPipelineIntegration: ): """P1: LOCAL_FOLDER_FILE ConnectorDocument through prepare+index to READY.""" from app.indexing_pipeline.connector_document import ConnectorDocument - from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService + from app.indexing_pipeline.indexing_pipeline_service import ( + IndexingPipelineService, + ) doc = ConnectorDocument( title="Test Local File", @@ -763,12 +791,16 @@ class TestPipelineIntegration: assert result is not None docs = ( - await db_session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) assert len(docs) == 1 assert DocumentStatus.is_state(docs[0].status, DocumentStatus.READY) diff --git a/surfsense_backend/tests/integration/test_document_versioning.py b/surfsense_backend/tests/integration/test_document_versioning.py index 87e3c490c..9bd03d219 100644 --- a/surfsense_backend/tests/integration/test_document_versioning.py +++ b/surfsense_backend/tests/integration/test_document_versioning.py @@ -34,14 +34,16 @@ async def db_document( async def _version_count(session: AsyncSession, document_id: int) -> int: result = await session.execute( - select(func.count()).select_from(DocumentVersion).where( - DocumentVersion.document_id == document_id - ) + select(func.count()) + .select_from(DocumentVersion) + .where(DocumentVersion.document_id == document_id) ) return result.scalar_one() -async def _get_versions(session: AsyncSession, document_id: int) -> list[DocumentVersion]: +async def _get_versions( + session: AsyncSession, document_id: int +) -> list[DocumentVersion]: result = await session.execute( select(DocumentVersion) .where(DocumentVersion.document_id == document_id) @@ -74,18 +76,14 @@ class TestCreateVersionSnapshot: from app.utils.document_versioning import create_version_snapshot t0 = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC) - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda: t0 - ) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: t0) await create_version_snapshot(db_session, db_document) # Simulate content change and time passing db_document.source_markdown = "# Test\n\nUpdated content." db_document.content_hash = "def456" t1 = t0 + timedelta(minutes=31) - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda: t1 - ) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: t1) await create_version_snapshot(db_session, db_document) versions = await _get_versions(db_session, db_document.id) @@ -101,9 +99,7 @@ class TestCreateVersionSnapshot: from app.utils.document_versioning import create_version_snapshot t0 = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC) - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda: t0 - ) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: t0) await create_version_snapshot(db_session, db_document) count_after_first = await _version_count(db_session, db_document.id) assert count_after_first == 1 @@ -112,9 +108,7 @@ class TestCreateVersionSnapshot: db_document.source_markdown = "# Test\n\nQuick edit." db_document.content_hash = "quick123" t1 = t0 + timedelta(minutes=10) - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda: t1 - ) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: t1) await create_version_snapshot(db_session, db_document) count_after_second = await _version_count(db_session, db_document.id) @@ -134,22 +128,15 @@ class TestCreateVersionSnapshot: # Create 5 versions spread across time: 3 older than 90 days, 2 recent for i in range(5): - db_document.source_markdown = f"Content v{i+1}" - db_document.content_hash = f"hash_{i+1}" - if i < 3: - t = base + timedelta(days=i) # old - else: - t = base + timedelta(days=100 + i) # recent - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda _t=t: _t - ) + db_document.source_markdown = f"Content v{i + 1}" + db_document.content_hash = f"hash_{i + 1}" + t = base + timedelta(days=i) if i < 3 else base + timedelta(days=100 + i) + monkeypatch.setattr("app.utils.document_versioning._now", lambda _t=t: _t) await create_version_snapshot(db_session, db_document) # Now trigger cleanup from a "current" time that makes the first 3 versions > 90 days old now = base + timedelta(days=200) - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda: now - ) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: now) db_document.source_markdown = "Content v6" db_document.content_hash = "hash_6" await create_version_snapshot(db_session, db_document) @@ -160,9 +147,7 @@ class TestCreateVersionSnapshot: age = now - v.created_at.replace(tzinfo=UTC) assert age <= timedelta(days=90), f"Version {v.version_number} is too old" - async def test_v5_cap_at_20_versions( - self, db_session, db_document, monkeypatch - ): + async def test_v5_cap_at_20_versions(self, db_session, db_document, monkeypatch): """V5: More than 20 versions triggers cap — oldest gets deleted.""" from app.utils.document_versioning import create_version_snapshot @@ -170,12 +155,10 @@ class TestCreateVersionSnapshot: # Create 21 versions (all within 90 days, each 31 min apart) for i in range(21): - db_document.source_markdown = f"Content v{i+1}" - db_document.content_hash = f"hash_{i+1}" + db_document.source_markdown = f"Content v{i + 1}" + db_document.content_hash = f"hash_{i + 1}" t = base + timedelta(minutes=31 * i) - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda _t=t: _t - ) + monkeypatch.setattr("app.utils.document_versioning._now", lambda _t=t: _t) await create_version_snapshot(db_session, db_document) versions = await _get_versions(db_session, db_document.id) diff --git a/surfsense_backend/tests/unit/connector_indexers/test_local_folder_scan.py b/surfsense_backend/tests/unit/connector_indexers/test_local_folder_scan.py index 9b4c73f25..c6e7b160c 100644 --- a/surfsense_backend/tests/unit/connector_indexers/test_local_folder_scan.py +++ b/surfsense_backend/tests/unit/connector_indexers/test_local_folder_scan.py @@ -51,9 +51,7 @@ class TestScanFolder: git.mkdir() (git / "config").write_text("gitconfig") - results = scan_folder( - str(tmp_path), exclude_patterns=["node_modules", ".git"] - ) + results = scan_folder(str(tmp_path), exclude_patterns=["node_modules", ".git"]) names = {r["relative_path"] for r in results} assert "good.md" in names diff --git a/surfsense_web/app/(home)/login/LocalLoginForm.tsx b/surfsense_web/app/(home)/login/LocalLoginForm.tsx index 1ebbf46b6..e94857334 100644 --- a/surfsense_web/app/(home)/login/LocalLoginForm.tsx +++ b/surfsense_web/app/(home)/login/LocalLoginForm.tsx @@ -160,11 +160,11 @@ export function LocalLoginForm() { placeholder="you@example.com" value={username} onChange={(e) => setUsername(e.target.value)} - className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ - error.title - ? "border-destructive focus:border-destructive focus:ring-destructive/40" - : "border-border focus:border-primary focus:ring-primary/40" - }`} + className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ + error.title + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" + }`} disabled={isLoggingIn} />
@@ -181,11 +181,11 @@ export function LocalLoginForm() { placeholder="Enter your password" value={password} onChange={(e) => setPassword(e.target.value)} - className={`mt-1 block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ - error.title - ? "border-destructive focus:border-destructive focus:ring-destructive/40" - : "border-border focus:border-primary focus:ring-primary/40" - }`} + className={`mt-1 block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ + error.title + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" + }`} disabled={isLoggingIn} />
-
- - setPassword(e.target.value)} - className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ - error.title - ? "border-destructive focus:border-destructive focus:ring-destructive/40" - : "border-border focus:border-primary focus:ring-primary/40" - }`} - disabled={isRegistering} - /> -
+
+ + setPassword(e.target.value)} + className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ + error.title + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" + }`} + disabled={isRegistering} + /> +
-
- - setConfirmPassword(e.target.value)} - className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ - error.title - ? "border-destructive focus:border-destructive focus:ring-destructive/40" - : "border-border focus:border-primary focus:ring-primary/40" - }`} - disabled={isRegistering} - /> +
+ + setConfirmPassword(e.target.value)} + className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ + error.title + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" + }`} + disabled={isRegistering} + />
+ {/* Upload Button */} +
); diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/PromptsContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/PromptsContent.tsx index 39362d244..1e7087afc 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/PromptsContent.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/PromptsContent.tsx @@ -2,7 +2,6 @@ import { useAtomValue } from "jotai"; import { AlertTriangle, Globe, Lock, PenLine, Sparkles, Trash2 } from "lucide-react"; -import { ShortcutKbd } from "@/components/ui/shortcut-kbd"; import { useCallback, useState } from "react"; import { toast } from "sonner"; import { @@ -24,6 +23,7 @@ import { import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; +import { ShortcutKbd } from "@/components/ui/shortcut-kbd"; import { Spinner } from "@/components/ui/spinner"; import { Switch } from "@/components/ui/switch"; import type { PromptRead } from "@/contracts/types/prompts.types"; @@ -145,9 +145,8 @@ export function PromptsContent() {

- Create prompt templates triggered with{" "} - in the - chat composer. + Create prompt templates triggered with in + the chat composer.

{!showForm && ( diff --git a/surfsense_web/components/documents/DocumentNode.tsx b/surfsense_web/components/documents/DocumentNode.tsx index 31d1bc7ca..919f904d4 100644 --- a/surfsense_web/components/documents/DocumentNode.tsx +++ b/surfsense_web/components/documents/DocumentNode.tsx @@ -39,8 +39,8 @@ import { Spinner } from "@/components/ui/spinner"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import type { DocumentTypeEnum } from "@/contracts/types/document.types"; import { cn } from "@/lib/utils"; -import { isVersionableType } from "./version-history"; import { DND_TYPES } from "./FolderNode"; +import { isVersionableType } from "./version-history"; const EDITABLE_DOCUMENT_TYPES = new Set(["FILE", "NOTE"]); @@ -199,7 +199,10 @@ export const DocumentNode = React.memo(function DocumentNode({ {doc.title} - {getDocumentTypeIcon(doc.document_type as DocumentTypeEnum, "h-3.5 w-3.5 text-muted-foreground") && ( + {getDocumentTypeIcon( + doc.document_type as DocumentTypeEnum, + "h-3.5 w-3.5 text-muted-foreground" + ) && ( {getDocumentTypeIcon( doc.document_type as DocumentTypeEnum, @@ -251,10 +254,7 @@ export const DocumentNode = React.memo(function DocumentNode({ )} {onVersionHistory && isVersionableType(doc.document_type) && ( - onVersionHistory(doc)} - > + onVersionHistory(doc)}> Versions @@ -300,10 +300,7 @@ export const DocumentNode = React.memo(function DocumentNode({ )} {onVersionHistory && isVersionableType(doc.document_type) && ( - onVersionHistory(doc)} - > + onVersionHistory(doc)}> Versions diff --git a/surfsense_web/components/documents/FolderNode.tsx b/surfsense_web/components/documents/FolderNode.tsx index 909f965f9..88cc76c69 100644 --- a/surfsense_web/components/documents/FolderNode.tsx +++ b/surfsense_web/components/documents/FolderNode.tsx @@ -256,15 +256,15 @@ export const FolderNode = React.memo(function FolderNode({ isOver && !canDrop && "cursor-not-allowed" )} style={{ paddingLeft: `${depth * 16 + 4}px` }} - onClick={() => { - onToggleExpand(folder.id); - }} - onKeyDown={(e) => { - if (e.key === "Enter" || e.key === " ") { - e.preventDefault(); + onClick={() => { onToggleExpand(folder.id); - } - }} + }} + onKeyDown={(e) => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + onToggleExpand(folder.id); + } + }} onDoubleClick={(e) => { e.stopPropagation(); startRename(); @@ -306,7 +306,11 @@ export const FolderNode = React.memo(function FolderNode({ ) : ( e.stopPropagation()} @@ -350,107 +354,107 @@ export const FolderNode = React.memo(function FolderNode({ - - {isWatched && onRescan && ( + + {isWatched && onRescan && ( + { + e.stopPropagation(); + onRescan(folder); + }} + > + + Re-scan + + )} + {isWatched && onStopWatching && ( + { + e.stopPropagation(); + onStopWatching(folder); + }} + > + + Stop watching + + )} { e.stopPropagation(); - onRescan(folder); + onCreateSubfolder(folder.id); }} > - - Re-scan + + New subfolder - )} - {isWatched && onStopWatching && ( { e.stopPropagation(); - onStopWatching(folder); + startRename(); }} > - - Stop watching + + Rename - )} - { - e.stopPropagation(); - onCreateSubfolder(folder.id); - }} - > - - New subfolder - - { - e.stopPropagation(); - startRename(); - }} - > - - Rename - - { - e.stopPropagation(); - onMove(folder); - }} - > - - Move to... - - { - e.stopPropagation(); - onDelete(folder); - }} - > - - Delete - - + { + e.stopPropagation(); + onMove(folder); + }} + > + + Move to... + + { + e.stopPropagation(); + onDelete(folder); + }} + > + + Delete + + )}
- {!isRenaming && contextMenuOpen && ( - - {isWatched && onRescan && ( - onRescan(folder)}> - - Re-scan + {!isRenaming && contextMenuOpen && ( + + {isWatched && onRescan && ( + onRescan(folder)}> + + Re-scan + + )} + {isWatched && onStopWatching && ( + onStopWatching(folder)}> + + Stop watching + + )} + onCreateSubfolder(folder.id)}> + + New subfolder - )} - {isWatched && onStopWatching && ( - onStopWatching(folder)}> - - Stop watching + startRename()}> + + Rename - )} - onCreateSubfolder(folder.id)}> - - New subfolder - - startRename()}> - - Rename - - onMove(folder)}> - - Move to... - - onDelete(folder)} - > - - Delete - - - )} + onMove(folder)}> + + Move to... + + onDelete(folder)} + > + + Delete + + + )} ); }); diff --git a/surfsense_web/components/documents/FolderTreeView.tsx b/surfsense_web/components/documents/FolderTreeView.tsx index 3aa8ce9d8..1df007c0b 100644 --- a/surfsense_web/components/documents/FolderTreeView.tsx +++ b/surfsense_web/components/documents/FolderTreeView.tsx @@ -242,10 +242,10 @@ export function FolderTreeView({ siblingPositions={siblingPositions} contextMenuOpen={openContextMenuId === `folder-${f.id}`} onContextMenuOpenChange={(open) => setOpenContextMenuId(open ? `folder-${f.id}` : null)} - isWatched={watchedFolderIds?.has(f.id)} - onRescan={onRescanFolder} - onStopWatching={onStopWatchingFolder} - /> + isWatched={watchedFolderIds?.has(f.id)} + onRescan={onRescanFolder} + onStopWatching={onStopWatchingFolder} + /> ); if (isExpanded) { diff --git a/surfsense_web/components/documents/version-history.tsx b/surfsense_web/components/documents/version-history.tsx index 7aba92b47..27343dc6a 100644 --- a/surfsense_web/components/documents/version-history.tsx +++ b/surfsense_web/components/documents/version-history.tsx @@ -1,19 +1,14 @@ "use client"; -import { useCallback, useEffect, useState } from "react"; import { Check, ChevronRight, Clock, Copy, RotateCcw } from "lucide-react"; +import { useCallback, useEffect, useState } from "react"; +import { toast } from "sonner"; import { Button } from "@/components/ui/button"; -import { - Dialog, - DialogContent, - DialogTitle, - DialogTrigger, -} from "@/components/ui/dialog"; +import { Dialog, DialogContent, DialogTitle, DialogTrigger } from "@/components/ui/dialog"; import { Separator } from "@/components/ui/separator"; import { Spinner } from "@/components/ui/spinner"; -import { cn } from "@/lib/utils"; import { documentsApiService } from "@/lib/apis/documents-api.service"; -import { toast } from "sonner"; +import { cn } from "@/lib/utils"; interface DocumentVersionSummary { version_number: number; @@ -123,10 +118,9 @@ function VersionHistoryPanel({ documentId }: { documentId: number }) { setSelectedVersion(versionNumber); setContentLoading(true); try { - const data = (await documentsApiService.getDocumentVersion( - documentId, - versionNumber - )) as { source_markdown: string }; + const data = (await documentsApiService.getDocumentVersion(documentId, versionNumber)) as { + source_markdown: string; + }; setVersionContent(data.source_markdown || ""); } catch { toast.error("Failed to load version content"); @@ -196,13 +190,11 @@ function VersionHistoryPanel({ documentId }: { documentId: number }) { >

- {v.created_at ? formatRelativeTime(v.created_at) : `Version ${v.version_number}`} + {v.created_at + ? formatRelativeTime(v.created_at) + : `Version ${v.version_number}`}

- {v.title && ( -

- {v.title} -

- )} + {v.title &&

{v.title}

}
@@ -227,11 +219,7 @@ function VersionHistoryPanel({ documentId }: { documentId: number }) { onClick={handleCopy} disabled={contentLoading || copied} > - {copied ? ( - - ) : ( - - )} + {copied ? : } {copied ? "Copied" : "Copy"}
diff --git a/surfsense_web/components/editor-panel/editor-panel.tsx b/surfsense_web/components/editor-panel/editor-panel.tsx index 59af0ee8d..05bcd2dc0 100644 --- a/surfsense_web/components/editor-panel/editor-panel.tsx +++ b/surfsense_web/components/editor-panel/editor-panel.tsx @@ -54,7 +54,6 @@ function EditorPanelSkeleton() { ); } - export function EditorPanelContent({ documentId, searchSpaceId, @@ -194,24 +193,24 @@ export function EditorPanelContent({ return ( <>
-
-

{displayTitle}

- {isEditableType && editedMarkdown !== null && ( -

Unsaved changes

- )} +
+

{displayTitle}

+ {isEditableType && editedMarkdown !== null && ( +

Unsaved changes

+ )} +
+
+ {editorDoc?.document_type && ( + + )} + {onClose && ( + + )} +
-
- {editorDoc?.document_type && ( - - )} - {onClose && ( - - )} -
-
{isLoading ? ( @@ -233,7 +232,9 @@ export function EditorPanelContent({ ? "Document is processing" : "Document unavailable"}

-

{error || "An unknown error occurred"}

+

+ {error || "An unknown error occurred"} +

) : isLargeDocument ? ( diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index c10c5dc82..aa409e179 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -121,9 +121,7 @@ export function DocumentsSidebar({ } const recovered = await api!.getWatchedFolders(); const ids = new Set( - recovered - .filter((f) => f.rootFolderId != null) - .map((f) => f.rootFolderId as number) + recovered.filter((f) => f.rootFolderId != null).map((f) => f.rootFolderId as number) ); setWatchedFolderIds(ids); return; @@ -133,9 +131,7 @@ export function DocumentsSidebar({ } const ids = new Set( - folders - .filter((f) => f.rootFolderId != null) - .map((f) => f.rootFolderId as number) + folders.filter((f) => f.rootFolderId != null).map((f) => f.rootFolderId as number) ); setWatchedFolderIds(ids); } @@ -305,28 +301,25 @@ export function DocumentsSidebar({ [searchSpaceId] ); - const handleStopWatching = useCallback( - async (folder: FolderDisplay) => { - const api = window.electronAPI; - if (!api) return; + const handleStopWatching = useCallback(async (folder: FolderDisplay) => { + const api = window.electronAPI; + if (!api) return; - const watchedFolders = await api.getWatchedFolders(); - const matched = watchedFolders.find((wf) => wf.rootFolderId === folder.id); - if (!matched) { - toast.error("This folder is not being watched"); - return; - } + const watchedFolders = await api.getWatchedFolders(); + const matched = watchedFolders.find((wf) => wf.rootFolderId === folder.id); + if (!matched) { + toast.error("This folder is not being watched"); + return; + } - await api.removeWatchedFolder(matched.path); - try { - await foldersApiService.stopWatching(folder.id); - } catch (err) { - console.error("[DocumentsSidebar] Failed to clear watched metadata:", err); - } - toast.success(`Stopped watching: ${matched.name}`); - }, - [] - ); + await api.removeWatchedFolder(matched.path); + try { + await foldersApiService.stopWatching(folder.id); + } catch (err) { + console.error("[DocumentsSidebar] Failed to clear watched metadata:", err); + } + toast.success(`Stopped watching: ${matched.name}`); + }, []); const handleRenameFolder = useCallback(async (folder: FolderDisplay, newName: string) => { try { @@ -755,81 +748,83 @@ export function DocumentsSidebar({
- handleCreateFolder(null)} - /> + handleCreateFolder(null)} + />
-
- {deletableSelectedIds.length > 0 && ( -
- -
- )} +
+ {deletableSelectedIds.length > 0 && ( +
+ +
+ )} - { - openEditorPanel({ - documentId: doc.id, - searchSpaceId, - title: doc.title, - }); - }} - onEditDocument={(doc) => { - openEditorPanel({ - documentId: doc.id, - searchSpaceId, - title: doc.title, - }); - }} - onDeleteDocument={(doc) => handleDeleteDocument(doc.id)} - onMoveDocument={handleMoveDocument} - onExportDocument={handleExportDocument} - onVersionHistory={(doc) => setVersionDocId(doc.id)} - activeTypes={activeTypes} - onDropIntoFolder={handleDropIntoFolder} - onReorderFolder={handleReorderFolder} - watchedFolderIds={watchedFolderIds} - onRescanFolder={handleRescanFolder} - onStopWatchingFolder={handleStopWatching} - /> + { + openEditorPanel({ + documentId: doc.id, + searchSpaceId, + title: doc.title, + }); + }} + onEditDocument={(doc) => { + openEditorPanel({ + documentId: doc.id, + searchSpaceId, + title: doc.title, + }); + }} + onDeleteDocument={(doc) => handleDeleteDocument(doc.id)} + onMoveDocument={handleMoveDocument} + onExportDocument={handleExportDocument} + onVersionHistory={(doc) => setVersionDocId(doc.id)} + activeTypes={activeTypes} + onDropIntoFolder={handleDropIntoFolder} + onReorderFolder={handleReorderFolder} + watchedFolderIds={watchedFolderIds} + onRescanFolder={handleRescanFolder} + onStopWatchingFolder={handleStopWatching} + /> +
-
- {versionDocId !== null && ( - { if (!open) setVersionDocId(null); }} - documentId={versionDocId} - /> - )} + {versionDocId !== null && ( + { + if (!open) setVersionDocId(null); + }} + documentId={versionDocId} + /> + )} - {isProcessing ? "Document is processing" : "Document unavailable"}

-

- {error || "An unknown error occurred"} -

+

{error || "An unknown error occurred"}

{!isProcessing && (
-

- Document unavailable -

+

Document unavailable

{documentByChunkFetchingError.message || "An unexpected error occurred. Please try again."} diff --git a/surfsense_web/components/settings/llm-role-manager.tsx b/surfsense_web/components/settings/llm-role-manager.tsx index d1651b7f0..718503318 100644 --- a/surfsense_web/components/settings/llm-role-manager.tsx +++ b/surfsense_web/components/settings/llm-role-manager.tsx @@ -134,24 +134,27 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { preferences?.image_generation_config_id, ]); - const handleRoleAssignment = useCallback(async (prefKey: string, configId: string) => { - const value = configId === "unassigned" ? "" : parseInt(configId); + const handleRoleAssignment = useCallback( + async (prefKey: string, configId: string) => { + const value = configId === "unassigned" ? "" : parseInt(configId); - setAssignments((prev) => ({ ...prev, [prefKey]: value })); - setSavingRole(prefKey); - savingRef.current = true; + setAssignments((prev) => ({ ...prev, [prefKey]: value })); + setSavingRole(prefKey); + savingRef.current = true; - try { - await updatePreferences({ - search_space_id: searchSpaceId, - data: { [prefKey]: value || undefined }, - }); - toast.success("Role assignment updated"); - } finally { - setSavingRole(null); - savingRef.current = false; - } - }, [updatePreferences, searchSpaceId]); + try { + await updatePreferences({ + search_space_id: searchSpaceId, + data: { [prefKey]: value || undefined }, + }); + toast.success("Role assignment updated"); + } finally { + setSavingRole(null); + savingRef.current = false; + } + }, + [updatePreferences, searchSpaceId] + ); // Combine global and custom LLM configs const allLLMConfigs = [ @@ -199,10 +202,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { Refresh {isAssignmentComplete && !isLoading && !hasError && ( - + All roles assigned @@ -483,7 +483,6 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { })}

)} -
); } diff --git a/surfsense_web/components/sources/DocumentUploadTab.tsx b/surfsense_web/components/sources/DocumentUploadTab.tsx index 9733bd2e6..f1162f57c 100644 --- a/surfsense_web/components/sources/DocumentUploadTab.tsx +++ b/surfsense_web/components/sources/DocumentUploadTab.tsx @@ -128,7 +128,8 @@ const MAX_TOTAL_SIZE_BYTES = MAX_TOTAL_SIZE_MB * 1024 * 1024; const MAX_FILE_SIZE_MB = 500; const MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024; -const toggleRowClass = "flex items-center justify-between rounded-lg bg-slate-400/5 dark:bg-white/5 p-3"; +const toggleRowClass = + "flex items-center justify-between rounded-lg bg-slate-400/5 dark:bg-white/5 p-3"; export function DocumentUploadTab({ searchSpaceId, @@ -326,7 +327,14 @@ export function DocumentUploadTab({ await api.addWatchedFolder({ path: selectedFolder.path, name: selectedFolder.name, - excludePatterns: [".git", "node_modules", "__pycache__", ".DS_Store", ".obsidian", ".trash"], + excludePatterns: [ + ".git", + "node_modules", + "__pycache__", + ".DS_Store", + ".obsidian", + ".trash", + ], fileExtensions: null, rootFolderId, searchSpaceId: Number(searchSpaceId), @@ -393,12 +401,20 @@ export function DocumentUploadTab({ return ( e.stopPropagation()}> - - e.stopPropagation()}> + e.stopPropagation()} + > Files @@ -415,7 +431,11 @@ export function DocumentUploadTab({ return ( e.stopPropagation()}> - @@ -457,21 +477,19 @@ export function DocumentUploadTab({ {/* MOBILE DROP ZONE */}
{hasContent ? ( - !selectedFolder && !isFileCountLimitReached && ( - isElectron ? ( -
- {renderBrowseButton({ compact: true, fullWidth: true })} -
- ) : ( - - ) - ) + !selectedFolder && + !isFileCountLimitReached && + (isElectron ? ( +
{renderBrowseButton({ compact: true, fullWidth: true })}
+ ) : ( + + )) ) : (
{t("file_size_limit")} - {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })} + + {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })} +

e.stopPropagation()}> @@ -538,7 +558,9 @@ export function DocumentUploadTab({

{t("file_size_limit")} - {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })} + + {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })} +

{renderBrowseButton()}
@@ -569,9 +591,7 @@ export function DocumentUploadTab({

Watch folder

-

- Auto-sync when files change -

+

Auto-sync when files change

- {t("selected_files", { count: files.length })} · {formatFileSize(totalFileSize)} + {t("selected_files", { count: files.length })} ·{" "} + {formatFileSize(totalFileSize)}

e.stopPropagation()}> {renderBrowseButton({ fullWidth: true })} @@ -490,9 +488,7 @@ export function DocumentUploadTab({
- {isDragActive - ? t("drop_files") - : t("drag_drop_more")} + {isDragActive ? t("drop_files") : t("drag_drop_more")} {renderBrowseButton({ compact: true })}
@@ -505,9 +501,7 @@ export function DocumentUploadTab({

{t("drag_drop")}

-

- {t("file_size_limit")} -

+

{t("file_size_limit")}

{renderBrowseButton()}
)} @@ -578,7 +572,9 @@ export function DocumentUploadTab({

- {t("selected_files", { count: files.length })}{formatFileSize(totalFileSize)} + {t("selected_files", { count: files.length })} + + {formatFileSize(totalFileSize)}

- e.stopPropagation()}> + e.stopPropagation()} + > fileInputRef.current?.click()}> {t("browse_files")} @@ -492,17 +496,20 @@ export function DocumentUploadTab({ {renderBrowseButton({ compact: true })}
- ) : isDragActive ? ( -
- -

{t("drop_files")}

-
) : ( -
- -

{t("drag_drop")}

-

{t("file_size_limit")}

-
{renderBrowseButton()}
+
+ {isDragActive && ( +
+ +

{t("drop_files")}

+
+ )} +
+ +

{t("drag_drop")}

+

{t("file_size_limit")}

+
{renderBrowseButton()}
+
)}
From 9c0af6569d1e94744ad5361b9611efee0f57269c Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 3 Apr 2026 19:13:25 +0530 Subject: [PATCH 61/61] feat: implement page limit checks in local folder indexing to manage user page usage --- .../local_folder_indexer.py | 88 ++++++- .../test_local_folder_pipeline.py | 219 ++++++++++++++++++ 2 files changed, 303 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index 58c9f5003..acfbce0bf 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -34,6 +34,7 @@ from app.indexing_pipeline.connector_document import ConnectorDocument from app.indexing_pipeline.document_hashing import compute_identifier_hash from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService from app.services.llm_service import get_user_long_context_llm +from app.services.page_limit_service import PageLimitExceededError, PageLimitService from app.services.task_logging_service import TaskLoggingService from app.tasks.celery_tasks import get_celery_session_maker from app.utils.document_versioning import create_version_snapshot @@ -171,6 +172,39 @@ def _needs_etl(filename: str) -> bool: HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +def _estimate_pages_safe(page_limit_service: PageLimitService, file_path: str) -> int: + """Estimate page count with a file-size fallback.""" + try: + return page_limit_service.estimate_pages_before_processing(file_path) + except Exception: + file_size = os.path.getsize(file_path) + return max(1, file_size // (80 * 1024)) + + +async def _check_page_limit_or_skip( + page_limit_service: PageLimitService, + user_id: str, + file_path: str, +) -> int: + """Estimate pages and check the limit; raises PageLimitExceededError if over quota. + + Returns the estimated page count on success. + """ + estimated = _estimate_pages_safe(page_limit_service, file_path) + await page_limit_service.check_page_limit(user_id, estimated) + return estimated + + +def _compute_final_pages( + page_limit_service: PageLimitService, + estimated_pages: int, + content_length: int, +) -> int: + """Return the final page count as max(estimated, actual).""" + actual = page_limit_service.estimate_pages_from_content_length(content_length) + return max(estimated_pages, actual) + DEFAULT_EXCLUDE_PATTERNS = [ ".git", "node_modules", @@ -720,11 +754,12 @@ async def index_local_folder( skipped_count = 0 failed_count = 0 + page_limit_service = PageLimitService(session) + # ================================================================ # PHASE 1: Pre-filter files (mtime / content-hash), version changed # ================================================================ connector_docs: list[ConnectorDocument] = [] - # Maps unique_id -> (relative_path, mtime) for post-pipeline folder_id assignment file_meta_map: dict[str, dict] = {} seen_unique_hashes: set[str] = set() @@ -759,6 +794,17 @@ async def index_local_folder( skipped_count += 1 continue + try: + estimated_pages = await _check_page_limit_or_skip( + page_limit_service, user_id, file_path_abs + ) + except PageLimitExceededError: + logger.warning( + f"Page limit exceeded, skipping: {file_path_abs}" + ) + failed_count += 1 + continue + try: content, content_hash = await _compute_file_content_hash( file_path_abs, file_info["relative_path"], search_space_id @@ -781,6 +827,17 @@ async def index_local_folder( await create_version_snapshot(session, existing_document) else: + try: + estimated_pages = await _check_page_limit_or_skip( + page_limit_service, user_id, file_path_abs + ) + except PageLimitExceededError: + logger.warning( + f"Page limit exceeded, skipping: {file_path_abs}" + ) + failed_count += 1 + continue + try: content, content_hash = await _compute_file_content_hash( file_path_abs, file_info["relative_path"], search_space_id @@ -807,6 +864,8 @@ async def index_local_folder( file_meta_map[unique_identifier] = { "relative_path": relative_path, "mtime": file_info["modified_at"].timestamp(), + "estimated_pages": estimated_pages, + "content_length": len(content), } except Exception as e: @@ -901,6 +960,15 @@ async def index_local_folder( doc_meta = dict(result.document_metadata or {}) doc_meta["mtime"] = mtime_info.get("mtime") result.document_metadata = doc_meta + + est = mtime_info.get("estimated_pages", 1) + content_len = mtime_info.get("content_length", 0) + final_pages = _compute_final_pages( + page_limit_service, est, content_len + ) + await page_limit_service.update_page_usage( + user_id, final_pages, allow_exceed=True + ) else: failed_count += 1 @@ -1084,6 +1152,14 @@ async def _index_single_file( DocumentType.LOCAL_FOLDER_FILE.value, unique_id, search_space_id ) + page_limit_service = PageLimitService(session) + try: + estimated_pages = await _check_page_limit_or_skip( + page_limit_service, user_id, str(full_path) + ) + except PageLimitExceededError as e: + return 0, 1, f"Page limit exceeded: {e}" + try: content, content_hash = await _compute_file_content_hash( str(full_path), full_path.name, search_space_id @@ -1128,8 +1204,6 @@ async def _index_single_file( db_doc = documents[0] - # Assign folder_id before indexing so the doc appears in the - # correct folder while still pending/processing. if root_folder_id: try: db_doc.folder_id = await _resolve_folder_for_file( @@ -1154,10 +1228,16 @@ async def _index_single_file( failed_msg = None if indexed else "Indexing failed" if indexed: + final_pages = _compute_final_pages( + page_limit_service, estimated_pages, len(content) + ) + await page_limit_service.update_page_usage( + user_id, final_pages, allow_exceed=True + ) await task_logger.log_task_success( log_entry, f"Single file indexed: {rel_path}", - {"file": rel_path}, + {"file": rel_path, "pages_processed": final_pages}, ) return indexed, 0 if indexed else 1, failed_msg diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py index b94762ee6..4d9bda7ee 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py @@ -959,3 +959,222 @@ class TestDirectConvert: assert "| name" in doc.source_markdown assert "name,age,city" not in doc.source_markdown + + +# ==================================================================== +# Tier 8: Page Limits (PL1-PL6) +# ==================================================================== + + +class TestPageLimits: + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_pl1_full_scan_increments_pages_used( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """PL1: Successful full-scan sync increments user.pages_used.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + db_user.pages_used = 0 + db_user.pages_limit = 500 + await db_session.flush() + + (tmp_path / "note.md").write_text("# Hello World\n\nContent here.") + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + assert err is None + assert count == 1 + + await db_session.refresh(db_user) + assert db_user.pages_used > 0, "pages_used should increase after indexing" + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_pl2_full_scan_blocked_when_limit_exhausted( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """PL2: Full-scan skips file when page limit is exhausted.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + db_user.pages_used = 100 + db_user.pages_limit = 100 + await db_session.flush() + + (tmp_path / "note.md").write_text("# Hello World\n\nContent here.") + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + assert count == 0 + + await db_session.refresh(db_user) + assert db_user.pages_used == 100, "pages_used should not change on rejection" + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_pl3_single_file_increments_pages_used( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """PL3: Single-file mode increments user.pages_used on success.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + db_user.pages_used = 0 + db_user.pages_limit = 500 + await db_session.flush() + + (tmp_path / "note.md").write_text("# Hello World\n\nContent here.") + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[str(tmp_path / "note.md")], + ) + + assert err is None + assert count == 1 + + await db_session.refresh(db_user) + assert db_user.pages_used > 0, "pages_used should increase after indexing" + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_pl4_single_file_blocked_when_limit_exhausted( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """PL4: Single-file mode skips file when page limit is exhausted.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + db_user.pages_used = 100 + db_user.pages_limit = 100 + await db_session.flush() + + (tmp_path / "note.md").write_text("# Hello World\n\nContent here.") + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[str(tmp_path / "note.md")], + ) + + assert count == 0 + assert err is not None + assert "page limit" in err.lower() + + await db_session.refresh(db_user) + assert db_user.pages_used == 100, "pages_used should not change on rejection" + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_pl5_unchanged_resync_no_extra_pages( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """PL5: Re-syncing an unchanged file does not consume additional pages.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + db_user.pages_used = 0 + db_user.pages_limit = 500 + await db_session.flush() + + (tmp_path / "note.md").write_text("# Hello\n\nSame content.") + + count1, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + assert count1 == 1 + + await db_session.refresh(db_user) + pages_after_first = db_user.pages_used + assert pages_after_first > 0 + + count2, _, _, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + root_folder_id=root_folder_id, + ) + assert count2 == 0 + + await db_session.refresh(db_user) + assert db_user.pages_used == pages_after_first, ( + "pages_used should not increase for unchanged files" + ) + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_pl6_batch_partial_page_limit_exhaustion( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + patched_batch_sessions, + ): + """PL6: Batch mode with a very low page limit: some files succeed, rest fail.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + db_user.pages_used = 0 + db_user.pages_limit = 1 + await db_session.flush() + + (tmp_path / "a.md").write_text("File A content") + (tmp_path / "b.md").write_text("File B content") + (tmp_path / "c.md").write_text("File C content") + + count, failed, _root_folder_id, _err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[ + str(tmp_path / "a.md"), + str(tmp_path / "b.md"), + str(tmp_path / "c.md"), + ], + ) + + assert count >= 1, "at least one file should succeed" + assert failed >= 1, "at least one file should fail due to page limit" + assert count + failed == 3 + + await db_session.refresh(db_user) + assert db_user.pages_used > 0 + assert db_user.pages_used <= db_user.pages_limit + 1