diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index b75c56663..dd7b56033 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -22,6 +22,7 @@ from app.schemas import ( DocumentsCreate, DocumentUpdate, DocumentWithChunksRead, + PaginatedResponse, ) from app.services.task_logging_service import TaskLoggingService from app.tasks.document_processors import ( @@ -154,15 +155,36 @@ async def create_documents_file_upload( ) from e -@router.get("/documents/", response_model=list[DocumentRead]) +@router.get("/documents/", response_model=PaginatedResponse[DocumentRead]) async def read_documents( - skip: int = 0, - limit: int = 3000, + skip: int | None = None, + page: int | None = None, + page_size: int = 50, search_space_id: int | None = None, session: AsyncSession = Depends(get_async_session), user: User = Depends(current_active_user), ): + """ + List documents owned by the current user, with optional filtering and pagination. + + Args: + skip: Absolute number of items to skip from the beginning. If provided, it takes precedence over 'page'. + page: Zero-based page index used when 'skip' is not provided. + page_size: Number of items per page (default: 50). Use -1 to return all remaining items after the offset. + search_space_id: If provided, restrict results to a specific search space. + session: Database session (injected). + user: Current authenticated user (injected). + + Returns: + PaginatedResponse[DocumentRead]: Paginated list of documents visible to the user. + + Notes: + - If both 'skip' and 'page' are provided, 'skip' is used. + - Results are scoped to documents owned by the current user. + """ try: + from sqlalchemy import func + query = ( select(Document).join(SearchSpace).filter(SearchSpace.user_id == user.id) ) @@ -171,7 +193,33 @@ async def read_documents( if search_space_id is not None: query = query.filter(Document.search_space_id == search_space_id) - result = await session.execute(query.offset(skip).limit(limit)) + # Get total count + count_query = ( + select(func.count()) + .select_from(Document) + .join(SearchSpace) + .filter(SearchSpace.user_id == user.id) + ) + if search_space_id is not None: + count_query = count_query.filter( + Document.search_space_id == search_space_id + ) + total_result = await session.execute(count_query) + total = total_result.scalar() or 0 + + # Calculate offset + offset = 0 + if skip is not None: + offset = skip + elif page is not None: + offset = page * page_size + + # Get paginated results + if page_size == -1: + result = await session.execute(query.offset(offset)) + else: + result = await session.execute(query.offset(offset).limit(page_size)) + db_documents = result.scalars().all() # Convert database objects to API-friendly format @@ -189,13 +237,106 @@ async def read_documents( ) ) - return api_documents + return PaginatedResponse(items=api_documents, total=total) except Exception as e: raise HTTPException( status_code=500, detail=f"Failed to fetch documents: {e!s}" ) from e +@router.get("/documents/search/", response_model=PaginatedResponse[DocumentRead]) +async def search_documents( + title: str, + skip: int | None = None, + page: int | None = None, + page_size: int = 50, + search_space_id: int | None = None, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Search documents by title substring, optionally filtered by search_space_id. + + Args: + title: Case-insensitive substring to match against document titles. Required. + skip: Absolute number of items to skip from the beginning. If provided, it takes precedence over 'page'. Default: None. + page: Zero-based page index used when 'skip' is not provided. Default: None. + page_size: Number of items per page. Use -1 to return all remaining items after the offset. Default: 50. + search_space_id: Filter results to a specific search space. Default: None. + session: Database session (injected). + user: Current authenticated user (injected). + + Returns: + PaginatedResponse[DocumentRead]: Paginated list of documents matching the query and filter. + + Notes: + - Title matching uses ILIKE (case-insensitive). + - If both 'skip' and 'page' are provided, 'skip' is used. + """ + try: + from sqlalchemy import func + + query = ( + select(Document).join(SearchSpace).filter(SearchSpace.user_id == user.id) + ) + if search_space_id is not None: + query = query.filter(Document.search_space_id == search_space_id) + + # Only search by title (case-insensitive) + query = query.filter(Document.title.ilike(f"%{title}%")) + + # Get total count + count_query = ( + select(func.count()) + .select_from(Document) + .join(SearchSpace) + .filter(SearchSpace.user_id == user.id) + ) + if search_space_id is not None: + count_query = count_query.filter( + Document.search_space_id == search_space_id + ) + count_query = count_query.filter(Document.title.ilike(f"%{title}%")) + total_result = await session.execute(count_query) + total = total_result.scalar() or 0 + + # Calculate offset + offset = 0 + if skip is not None: + offset = skip + elif page is not None: + offset = page * page_size + + # Get paginated results + if page_size == -1: + result = await session.execute(query.offset(offset)) + else: + result = await session.execute(query.offset(offset).limit(page_size)) + + db_documents = result.scalars().all() + + # Convert database objects to API-friendly format + api_documents = [] + for doc in db_documents: + api_documents.append( + DocumentRead( + id=doc.id, + title=doc.title, + document_type=doc.document_type, + document_metadata=doc.document_metadata, + content=doc.content, + created_at=doc.created_at, + search_space_id=doc.search_space_id, + ) + ) + + return PaginatedResponse(items=api_documents, total=total) + except Exception as e: + raise HTTPException( + status_code=500, detail=f"Failed to search documents: {e!s}" + ) from e + + @router.get("/documents/{document_id}", response_model=DocumentRead) async def read_document( document_id: int, diff --git a/surfsense_backend/app/schemas/__init__.py b/surfsense_backend/app/schemas/__init__.py index ca9287a16..41b2ce23c 100644 --- a/surfsense_backend/app/schemas/__init__.py +++ b/surfsense_backend/app/schemas/__init__.py @@ -16,6 +16,7 @@ from .documents import ( DocumentWithChunksRead, ExtensionDocumentContent, ExtensionDocumentMetadata, + PaginatedResponse, ) from .llm_config import LLMConfigBase, LLMConfigCreate, LLMConfigRead, LLMConfigUpdate from .logs import LogBase, LogCreate, LogFilter, LogRead, LogUpdate @@ -68,6 +69,7 @@ __all__ = [ "LogFilter", "LogRead", "LogUpdate", + "PaginatedResponse", "PodcastBase", "PodcastCreate", "PodcastGenerateRequest", diff --git a/surfsense_backend/app/schemas/documents.py b/surfsense_backend/app/schemas/documents.py index bdaf568e7..80f6dc0a7 100644 --- a/surfsense_backend/app/schemas/documents.py +++ b/surfsense_backend/app/schemas/documents.py @@ -1,11 +1,14 @@ from datetime import datetime from pydantic import BaseModel, ConfigDict +from typing import Generic, TypeVar from app.db import DocumentType from .chunks import ChunkRead +T = TypeVar("T") + class ExtensionDocumentMetadata(BaseModel): BrowsingSessionId: str @@ -53,3 +56,8 @@ class DocumentWithChunksRead(DocumentRead): chunks: list[ChunkRead] = [] model_config = ConfigDict(from_attributes=True) + + +class PaginatedResponse(BaseModel, Generic[T]): + items: list[T] + total: int diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx index 4a69a7533..67b55c105 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx @@ -2,7 +2,7 @@ import { motion } from "framer-motion"; import { useParams } from "next/navigation"; -import { useEffect, useId, useMemo, useState } from "react"; +import { useCallback, useEffect, useId, useMemo, useState } from "react"; import { toast } from "sonner"; import { useDocuments } from "@/hooks/use-documents"; @@ -26,10 +26,6 @@ export default function DocumentsTable() { const params = useParams(); const searchSpaceId = Number(params.search_space_id); - const { documents, loading, error, refreshDocuments, deleteDocument } = - useDocuments(searchSpaceId); - - const [data, setData] = useState([]); const [search, setSearch] = useState(""); const debouncedSearch = useDebounced(search, 250); const [activeTypes, setActiveTypes] = useState([]); @@ -45,26 +41,48 @@ export default function DocumentsTable() { const [sortDesc, setSortDesc] = useState(false); const [selectedIds, setSelectedIds] = useState>(new Set()); - useEffect(() => { - if (documents) setData(documents as Document[]); - }, [documents]); + // Use server-side pagination and search + const { + documents, + total, + loading, + error, + fetchDocuments, + searchDocuments, + deleteDocument, + } = useDocuments(searchSpaceId, { + page: pageIndex, + pageSize: pageSize, + }); - const filtered = useMemo(() => { - let result = data; - if (debouncedSearch.trim()) { - const q = debouncedSearch.toLowerCase(); - result = result.filter((d) => d.title.toLowerCase().includes(q)); + // Refetch when pagination changes or when search/filters change + useEffect(() => { + if (searchSpaceId) { + if (debouncedSearch.trim()) { + // Use search endpoint if there's a search query + searchDocuments?.(debouncedSearch, pageIndex, pageSize); + } else { + // Use regular fetch if no search + fetchDocuments?.(pageIndex, pageSize); + } } + }, [pageIndex, pageSize, debouncedSearch, searchSpaceId, fetchDocuments, searchDocuments]); + + // Client-side filtering for document types only + // Note: This could also be moved to the backend for better performance + const filtered = useMemo(() => { + let result = documents || []; if (activeTypes.length > 0) { result = result.filter((d) => activeTypes.includes(d.document_type)); } return result; - }, [data, debouncedSearch, activeTypes]); + }, [documents, activeTypes]); - const total = filtered.length; + // Display filtered results + const displayDocs = filtered; + const displayTotal = activeTypes.length > 0 ? filtered.length : total; const pageStart = pageIndex * pageSize; - const pageEnd = Math.min(pageStart + pageSize, total); - const pageDocs = filtered.slice(pageStart, pageEnd); + const pageEnd = Math.min(pageStart + pageSize, displayTotal); const onToggleType = (type: string, checked: boolean) => { setActiveTypes((prev) => (checked ? [...prev, type] : prev.filter((t) => t !== type))); @@ -75,6 +93,14 @@ export default function DocumentsTable() { setColumnVisibility((prev) => ({ ...prev, [id]: checked })); }; + const refreshCurrentView = useCallback(async () => { + if (debouncedSearch.trim()) { + await searchDocuments?.(debouncedSearch, pageIndex, pageSize); + } else { + await fetchDocuments?.(pageIndex, pageSize); + } + }, [debouncedSearch, pageIndex, pageSize, searchDocuments, fetchDocuments]); + const onBulkDelete = async () => { if (selectedIds.size === 0) { toast.error("No rows selected"); @@ -86,7 +112,8 @@ export default function DocumentsTable() { if (okCount === selectedIds.size) toast.success(`Successfully deleted ${okCount} document(s)`); else toast.error("Some documents could not be deleted"); - await refreshDocuments?.(); + // Refetch the current page with appropriate method + await refreshCurrentView(); setSelectedIds(new Set()); } catch (e) { console.error(e); @@ -113,8 +140,8 @@ export default function DocumentsTable() { className="w-full px-6 py-4" > { - await (refreshDocuments?.() ?? Promise.resolve()); - }} + onRefresh={refreshCurrentView} selectedIds={selectedIds} setSelectedIds={setSelectedIds} columnVisibility={columnVisibility} @@ -147,20 +172,22 @@ export default function DocumentsTable() { }} /> + +export { DocumentsTable }; { setPageSize(s); setPageIndex(0); }} onFirst={() => setPageIndex(0)} onPrev={() => setPageIndex((i) => Math.max(0, i - 1))} - onNext={() => setPageIndex((i) => (pageEnd < total ? i + 1 : i))} - onLast={() => setPageIndex(Math.max(0, Math.ceil(total / pageSize) - 1))} + onNext={() => setPageIndex((i) => (pageEnd < displayTotal ? i + 1 : i))} + onLast={() => setPageIndex(Math.max(0, Math.ceil(displayTotal / pageSize) - 1))} canPrev={pageIndex > 0} - canNext={pageEnd < total} + canNext={pageEnd < displayTotal} id={id} /> diff --git a/surfsense_web/components/chat/ChatInputGroup.tsx b/surfsense_web/components/chat/ChatInputGroup.tsx index 1f5cc6d31..8085af6eb 100644 --- a/surfsense_web/components/chat/ChatInputGroup.tsx +++ b/surfsense_web/components/chat/ChatInputGroup.tsx @@ -42,7 +42,10 @@ const DocumentSelector = React.memo( const { documents, loading, isLoaded, fetchDocuments } = useDocuments( Number(search_space_id), - true + { + lazy: true, + pageSize: -1, // Fetch all documents with large page size + } ); const handleOpenChange = useCallback( diff --git a/surfsense_web/hooks/use-documents.ts b/surfsense_web/hooks/use-documents.ts index 4c7536689..9dba04e93 100644 --- a/surfsense_web/hooks/use-documents.ts +++ b/surfsense_web/hooks/use-documents.ts @@ -1,6 +1,7 @@ "use client"; import { useCallback, useEffect, useState } from "react"; import { toast } from "sonner"; +import { normalizeListResponse } from "@/lib/pagination"; export interface Document { id: number; @@ -29,43 +30,79 @@ export type DocumentType = | "GOOGLE_GMAIL_CONNECTOR" | "AIRTABLE_CONNECTOR"; -export function useDocuments(searchSpaceId: number, lazy: boolean = false) { +export interface UseDocumentsOptions { + page?: number; + pageSize?: number; + lazy?: boolean; +} + +export function useDocuments( + searchSpaceId: number, + options?: UseDocumentsOptions | boolean +) { + // Support both old boolean API and new options API for backward compatibility + const opts = typeof options === "boolean" ? { lazy: options } : options || {}; + const { page, pageSize = 300, lazy = false } = opts; + const [documents, setDocuments] = useState([]); + const [total, setTotal] = useState(0); const [loading, setLoading] = useState(!lazy); // Don't show loading initially for lazy mode const [error, setError] = useState(null); const [isLoaded, setIsLoaded] = useState(false); // Memoization flag - const fetchDocuments = useCallback(async () => { - if (isLoaded && lazy) return; // Avoid redundant calls in lazy mode + const fetchDocuments = useCallback( + async (fetchPage?: number, fetchPageSize?: number) => { + if (isLoaded && lazy) return; // Avoid redundant calls in lazy mode - try { - setLoading(true); - const response = await fetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents?search_space_id=${searchSpaceId}`, - { - headers: { - Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`, - }, - method: "GET", + try { + setLoading(true); + + // Build query params + const params = new URLSearchParams({ + search_space_id: searchSpaceId.toString(), + }); + + // Use passed parameters or fall back to state/options + const effectivePage = fetchPage !== undefined ? fetchPage : page; + const effectivePageSize = fetchPageSize !== undefined ? fetchPageSize : pageSize; + + if (effectivePage !== undefined) { + params.append("page", effectivePage.toString()); + } + if (effectivePageSize !== undefined) { + params.append("page_size", effectivePageSize.toString()); } - ); - if (!response.ok) { - toast.error("Failed to fetch documents"); - throw new Error("Failed to fetch documents"); + const response = await fetch( + `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents?${params.toString()}`, + { + headers: { + Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`, + }, + method: "GET", + } + ); + + if (!response.ok) { + toast.error("Failed to fetch documents"); + throw new Error("Failed to fetch documents"); + } + + const data = await response.json(); + const normalized = normalizeListResponse(data); + setDocuments(normalized.items); + setTotal(normalized.total); + setError(null); + setIsLoaded(true); + } catch (err: any) { + setError(err.message || "Failed to fetch documents"); + console.error("Error fetching documents:", err); + } finally { + setLoading(false); } - - const data = await response.json(); - setDocuments(data); - setError(null); - setIsLoaded(true); - } catch (err: any) { - setError(err.message || "Failed to fetch documents"); - console.error("Error fetching documents:", err); - } finally { - setLoading(false); - } - }, [searchSpaceId, isLoaded, lazy]); + }, + [searchSpaceId, page, pageSize, isLoaded, lazy] + ); useEffect(() => { if (!lazy && searchSpaceId) { @@ -79,6 +116,64 @@ export function useDocuments(searchSpaceId: number, lazy: boolean = false) { await fetchDocuments(); }, [fetchDocuments]); + // Function to search documents by title + const searchDocuments = useCallback( + async (searchQuery: string, fetchPage?: number, fetchPageSize?: number) => { + if (!searchQuery.trim()) { + // If search is empty, fetch all documents + return fetchDocuments(fetchPage, fetchPageSize); + } + + try { + setLoading(true); + + // Build query params + const params = new URLSearchParams({ + search_space_id: searchSpaceId.toString(), + title: searchQuery, + }); + + // Use passed parameters or fall back to state/options + const effectivePage = fetchPage !== undefined ? fetchPage : page; + const effectivePageSize = fetchPageSize !== undefined ? fetchPageSize : pageSize; + + if (effectivePage !== undefined) { + params.append("page", effectivePage.toString()); + } + if (effectivePageSize !== undefined) { + params.append("page_size", effectivePageSize.toString()); + } + + const response = await fetch( + `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents/search/?${params.toString()}`, + { + headers: { + Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`, + }, + method: "GET", + } + ); + + if (!response.ok) { + toast.error("Failed to search documents"); + throw new Error("Failed to search documents"); + } + + const data = await response.json(); + const normalized = normalizeListResponse(data); + setDocuments(normalized.items); + setTotal(normalized.total); + setError(null); + } catch (err: any) { + setError(err.message || "Failed to search documents"); + console.error("Error searching documents:", err); + } finally { + setLoading(false); + } + }, + [searchSpaceId, page, pageSize, fetchDocuments] + ); + // Function to delete a document const deleteDocument = useCallback( async (documentId: number) => { @@ -113,10 +208,12 @@ export function useDocuments(searchSpaceId: number, lazy: boolean = false) { return { documents, + total, loading, error, isLoaded, fetchDocuments, // Manual fetch function for lazy mode + searchDocuments, // Search function refreshDocuments, deleteDocument, }; diff --git a/surfsense_web/lib/pagination.ts b/surfsense_web/lib/pagination.ts new file mode 100644 index 000000000..0c618e56e --- /dev/null +++ b/surfsense_web/lib/pagination.ts @@ -0,0 +1,34 @@ +// Helper to normalize list responses from the API +// Supports shapes: Array, { items: T[]; total: number }, and tuple [T[], total] +export type ListResponse = { + items: T[]; + total: number; +}; + +export function normalizeListResponse(payload: any): ListResponse { + try { + // Case 1: already in desired shape + if (payload && Array.isArray(payload.items)) { + const total = typeof payload.total === "number" ? payload.total : payload.items.length; + return { items: payload.items as T[], total }; + } + + // Case 2: tuple [items, total] + if (Array.isArray(payload) && payload.length === 2 && Array.isArray(payload[0])) { + const items = (payload[0] ?? []) as T[]; + const rawTotal = payload[1]; + const total = typeof rawTotal === "number" ? rawTotal : items.length; + return { items, total }; + } + + // Case 3: plain array + if (Array.isArray(payload)) { + return { items: payload as T[], total: (payload as T[]).length }; + } + } catch (e) { + // fallthrough to default + } + + return { items: [], total: 0 }; +} +