Merge pull request #349 from NatsumeRyuhane/dev

Fix #320: Use serverside pagination
This commit is contained in:
Rohan Verma 2025-10-01 18:32:41 -07:00 committed by GitHub
commit 35a7427b00
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 374 additions and 62 deletions

View file

@ -22,6 +22,7 @@ from app.schemas import (
DocumentsCreate, DocumentsCreate,
DocumentUpdate, DocumentUpdate,
DocumentWithChunksRead, DocumentWithChunksRead,
PaginatedResponse,
) )
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.tasks.document_processors import ( from app.tasks.document_processors import (
@ -154,15 +155,36 @@ async def create_documents_file_upload(
) from e ) from e
@router.get("/documents/", response_model=list[DocumentRead]) @router.get("/documents/", response_model=PaginatedResponse[DocumentRead])
async def read_documents( async def read_documents(
skip: int = 0, skip: int | None = None,
limit: int = 300, page: int | None = None,
page_size: int = 50,
search_space_id: int | None = None, search_space_id: int | None = None,
session: AsyncSession = Depends(get_async_session), session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user), user: User = Depends(current_active_user),
): ):
"""
List documents owned by the current user, with optional filtering and pagination.
Args:
skip: Absolute number of items to skip from the beginning. If provided, it takes precedence over 'page'.
page: Zero-based page index used when 'skip' is not provided.
page_size: Number of items per page (default: 50). Use -1 to return all remaining items after the offset.
search_space_id: If provided, restrict results to a specific search space.
session: Database session (injected).
user: Current authenticated user (injected).
Returns:
PaginatedResponse[DocumentRead]: Paginated list of documents visible to the user.
Notes:
- If both 'skip' and 'page' are provided, 'skip' is used.
- Results are scoped to documents owned by the current user.
"""
try: try:
from sqlalchemy import func
query = ( query = (
select(Document).join(SearchSpace).filter(SearchSpace.user_id == user.id) select(Document).join(SearchSpace).filter(SearchSpace.user_id == user.id)
) )
@ -171,7 +193,33 @@ async def read_documents(
if search_space_id is not None: if search_space_id is not None:
query = query.filter(Document.search_space_id == search_space_id) query = query.filter(Document.search_space_id == search_space_id)
result = await session.execute(query.offset(skip).limit(limit)) # Get total count
count_query = (
select(func.count())
.select_from(Document)
.join(SearchSpace)
.filter(SearchSpace.user_id == user.id)
)
if search_space_id is not None:
count_query = count_query.filter(
Document.search_space_id == search_space_id
)
total_result = await session.execute(count_query)
total = total_result.scalar() or 0
# Calculate offset
offset = 0
if skip is not None:
offset = skip
elif page is not None:
offset = page * page_size
# Get paginated results
if page_size == -1:
result = await session.execute(query.offset(offset))
else:
result = await session.execute(query.offset(offset).limit(page_size))
db_documents = result.scalars().all() db_documents = result.scalars().all()
# Convert database objects to API-friendly format # Convert database objects to API-friendly format
@ -189,13 +237,106 @@ async def read_documents(
) )
) )
return api_documents return PaginatedResponse(items=api_documents, total=total)
except Exception as e: except Exception as e:
raise HTTPException( raise HTTPException(
status_code=500, detail=f"Failed to fetch documents: {e!s}" status_code=500, detail=f"Failed to fetch documents: {e!s}"
) from e ) from e
@router.get("/documents/search/", response_model=PaginatedResponse[DocumentRead])
async def search_documents(
title: str,
skip: int | None = None,
page: int | None = None,
page_size: int = 50,
search_space_id: int | None = None,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Search documents by title substring, optionally filtered by search_space_id.
Args:
title: Case-insensitive substring to match against document titles. Required.
skip: Absolute number of items to skip from the beginning. If provided, it takes precedence over 'page'. Default: None.
page: Zero-based page index used when 'skip' is not provided. Default: None.
page_size: Number of items per page. Use -1 to return all remaining items after the offset. Default: 50.
search_space_id: Filter results to a specific search space. Default: None.
session: Database session (injected).
user: Current authenticated user (injected).
Returns:
PaginatedResponse[DocumentRead]: Paginated list of documents matching the query and filter.
Notes:
- Title matching uses ILIKE (case-insensitive).
- If both 'skip' and 'page' are provided, 'skip' is used.
"""
try:
from sqlalchemy import func
query = (
select(Document).join(SearchSpace).filter(SearchSpace.user_id == user.id)
)
if search_space_id is not None:
query = query.filter(Document.search_space_id == search_space_id)
# Only search by title (case-insensitive)
query = query.filter(Document.title.ilike(f"%{title}%"))
# Get total count
count_query = (
select(func.count())
.select_from(Document)
.join(SearchSpace)
.filter(SearchSpace.user_id == user.id)
)
if search_space_id is not None:
count_query = count_query.filter(
Document.search_space_id == search_space_id
)
count_query = count_query.filter(Document.title.ilike(f"%{title}%"))
total_result = await session.execute(count_query)
total = total_result.scalar() or 0
# Calculate offset
offset = 0
if skip is not None:
offset = skip
elif page is not None:
offset = page * page_size
# Get paginated results
if page_size == -1:
result = await session.execute(query.offset(offset))
else:
result = await session.execute(query.offset(offset).limit(page_size))
db_documents = result.scalars().all()
# Convert database objects to API-friendly format
api_documents = []
for doc in db_documents:
api_documents.append(
DocumentRead(
id=doc.id,
title=doc.title,
document_type=doc.document_type,
document_metadata=doc.document_metadata,
content=doc.content,
created_at=doc.created_at,
search_space_id=doc.search_space_id,
)
)
return PaginatedResponse(items=api_documents, total=total)
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to search documents: {e!s}"
) from e
@router.get("/documents/{document_id}", response_model=DocumentRead) @router.get("/documents/{document_id}", response_model=DocumentRead)
async def read_document( async def read_document(
document_id: int, document_id: int,

View file

@ -16,6 +16,7 @@ from .documents import (
DocumentWithChunksRead, DocumentWithChunksRead,
ExtensionDocumentContent, ExtensionDocumentContent,
ExtensionDocumentMetadata, ExtensionDocumentMetadata,
PaginatedResponse,
) )
from .llm_config import LLMConfigBase, LLMConfigCreate, LLMConfigRead, LLMConfigUpdate from .llm_config import LLMConfigBase, LLMConfigCreate, LLMConfigRead, LLMConfigUpdate
from .logs import LogBase, LogCreate, LogFilter, LogRead, LogUpdate from .logs import LogBase, LogCreate, LogFilter, LogRead, LogUpdate
@ -68,6 +69,7 @@ __all__ = [
"LogFilter", "LogFilter",
"LogRead", "LogRead",
"LogUpdate", "LogUpdate",
"PaginatedResponse",
"PodcastBase", "PodcastBase",
"PodcastCreate", "PodcastCreate",
"PodcastGenerateRequest", "PodcastGenerateRequest",

View file

@ -1,11 +1,14 @@
from datetime import datetime from datetime import datetime
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict
from typing import Generic, TypeVar
from app.db import DocumentType from app.db import DocumentType
from .chunks import ChunkRead from .chunks import ChunkRead
T = TypeVar("T")
class ExtensionDocumentMetadata(BaseModel): class ExtensionDocumentMetadata(BaseModel):
BrowsingSessionId: str BrowsingSessionId: str
@ -53,3 +56,8 @@ class DocumentWithChunksRead(DocumentRead):
chunks: list[ChunkRead] = [] chunks: list[ChunkRead] = []
model_config = ConfigDict(from_attributes=True) model_config = ConfigDict(from_attributes=True)
class PaginatedResponse(BaseModel, Generic[T]):
items: list[T]
total: int

View file

@ -2,7 +2,7 @@
import { motion } from "motion/react"; import { motion } from "motion/react";
import { useParams } from "next/navigation"; import { useParams } from "next/navigation";
import { useEffect, useId, useMemo, useState } from "react"; import { useCallback, useEffect, useId, useMemo, useState } from "react";
import { toast } from "sonner"; import { toast } from "sonner";
import { useDocuments } from "@/hooks/use-documents"; import { useDocuments } from "@/hooks/use-documents";
@ -26,10 +26,6 @@ export default function DocumentsTable() {
const params = useParams(); const params = useParams();
const searchSpaceId = Number(params.search_space_id); const searchSpaceId = Number(params.search_space_id);
const { documents, loading, error, refreshDocuments, deleteDocument } =
useDocuments(searchSpaceId);
const [data, setData] = useState<Document[]>([]);
const [search, setSearch] = useState(""); const [search, setSearch] = useState("");
const debouncedSearch = useDebounced(search, 250); const debouncedSearch = useDebounced(search, 250);
const [activeTypes, setActiveTypes] = useState<string[]>([]); const [activeTypes, setActiveTypes] = useState<string[]>([]);
@ -45,26 +41,48 @@ export default function DocumentsTable() {
const [sortDesc, setSortDesc] = useState(false); const [sortDesc, setSortDesc] = useState(false);
const [selectedIds, setSelectedIds] = useState<Set<number>>(new Set()); const [selectedIds, setSelectedIds] = useState<Set<number>>(new Set());
useEffect(() => { // Use server-side pagination and search
if (documents) setData(documents as Document[]); const {
}, [documents]); documents,
total,
loading,
error,
fetchDocuments,
searchDocuments,
deleteDocument,
} = useDocuments(searchSpaceId, {
page: pageIndex,
pageSize: pageSize,
});
const filtered = useMemo(() => { // Refetch when pagination changes or when search/filters change
let result = data; useEffect(() => {
if (debouncedSearch.trim()) { if (searchSpaceId) {
const q = debouncedSearch.toLowerCase(); if (debouncedSearch.trim()) {
result = result.filter((d) => d.title.toLowerCase().includes(q)); // Use search endpoint if there's a search query
searchDocuments?.(debouncedSearch, pageIndex, pageSize);
} else {
// Use regular fetch if no search
fetchDocuments?.(pageIndex, pageSize);
}
} }
}, [pageIndex, pageSize, debouncedSearch, searchSpaceId, fetchDocuments, searchDocuments]);
// Client-side filtering for document types only
// Note: This could also be moved to the backend for better performance
const filtered = useMemo(() => {
let result = documents || [];
if (activeTypes.length > 0) { if (activeTypes.length > 0) {
result = result.filter((d) => activeTypes.includes(d.document_type)); result = result.filter((d) => activeTypes.includes(d.document_type));
} }
return result; return result;
}, [data, debouncedSearch, activeTypes]); }, [documents, activeTypes]);
const total = filtered.length; // Display filtered results
const displayDocs = filtered;
const displayTotal = activeTypes.length > 0 ? filtered.length : total;
const pageStart = pageIndex * pageSize; const pageStart = pageIndex * pageSize;
const pageEnd = Math.min(pageStart + pageSize, total); const pageEnd = Math.min(pageStart + pageSize, displayTotal);
const pageDocs = filtered.slice(pageStart, pageEnd);
const onToggleType = (type: string, checked: boolean) => { const onToggleType = (type: string, checked: boolean) => {
setActiveTypes((prev) => (checked ? [...prev, type] : prev.filter((t) => t !== type))); setActiveTypes((prev) => (checked ? [...prev, type] : prev.filter((t) => t !== type)));
@ -75,6 +93,14 @@ export default function DocumentsTable() {
setColumnVisibility((prev) => ({ ...prev, [id]: checked })); setColumnVisibility((prev) => ({ ...prev, [id]: checked }));
}; };
const refreshCurrentView = useCallback(async () => {
if (debouncedSearch.trim()) {
await searchDocuments?.(debouncedSearch, pageIndex, pageSize);
} else {
await fetchDocuments?.(pageIndex, pageSize);
}
}, [debouncedSearch, pageIndex, pageSize, searchDocuments, fetchDocuments]);
const onBulkDelete = async () => { const onBulkDelete = async () => {
if (selectedIds.size === 0) { if (selectedIds.size === 0) {
toast.error("No rows selected"); toast.error("No rows selected");
@ -86,7 +112,8 @@ export default function DocumentsTable() {
if (okCount === selectedIds.size) if (okCount === selectedIds.size)
toast.success(`Successfully deleted ${okCount} document(s)`); toast.success(`Successfully deleted ${okCount} document(s)`);
else toast.error("Some documents could not be deleted"); else toast.error("Some documents could not be deleted");
await refreshDocuments?.(); // Refetch the current page with appropriate method
await refreshCurrentView();
setSelectedIds(new Set()); setSelectedIds(new Set());
} catch (e) { } catch (e) {
console.error(e); console.error(e);
@ -113,8 +140,8 @@ export default function DocumentsTable() {
className="w-full px-6 py-4" className="w-full px-6 py-4"
> >
<DocumentsFilters <DocumentsFilters
allDocuments={data} allDocuments={documents || []}
visibleDocuments={pageDocs} visibleDocuments={displayDocs}
selectedIds={selectedIds} selectedIds={selectedIds}
onSearch={setSearch} onSearch={setSearch}
searchValue={search} searchValue={search}
@ -126,12 +153,10 @@ export default function DocumentsTable() {
/> />
<DocumentsTableShell <DocumentsTableShell
documents={pageDocs} documents={displayDocs}
loading={!!loading} loading={!!loading}
error={!!error} error={!!error}
onRefresh={async () => { onRefresh={refreshCurrentView}
await (refreshDocuments?.() ?? Promise.resolve());
}}
selectedIds={selectedIds} selectedIds={selectedIds}
setSelectedIds={setSelectedIds} setSelectedIds={setSelectedIds}
columnVisibility={columnVisibility} columnVisibility={columnVisibility}
@ -147,20 +172,22 @@ export default function DocumentsTable() {
}} }}
/> />
export { DocumentsTable };
<PaginationControls <PaginationControls
pageIndex={pageIndex} pageIndex={pageIndex}
pageSize={pageSize} pageSize={pageSize}
total={total} total={displayTotal}
onPageSizeChange={(s) => { onPageSizeChange={(s) => {
setPageSize(s); setPageSize(s);
setPageIndex(0); setPageIndex(0);
}} }}
onFirst={() => setPageIndex(0)} onFirst={() => setPageIndex(0)}
onPrev={() => setPageIndex((i) => Math.max(0, i - 1))} onPrev={() => setPageIndex((i) => Math.max(0, i - 1))}
onNext={() => setPageIndex((i) => (pageEnd < total ? i + 1 : i))} onNext={() => setPageIndex((i) => (pageEnd < displayTotal ? i + 1 : i))}
onLast={() => setPageIndex(Math.max(0, Math.ceil(total / pageSize) - 1))} onLast={() => setPageIndex(Math.max(0, Math.ceil(displayTotal / pageSize) - 1))}
canPrev={pageIndex > 0} canPrev={pageIndex > 0}
canNext={pageEnd < total} canNext={pageEnd < displayTotal}
id={id} id={id}
/> />
</motion.div> </motion.div>

View file

@ -42,7 +42,10 @@ const DocumentSelector = React.memo(
const { documents, loading, isLoaded, fetchDocuments } = useDocuments( const { documents, loading, isLoaded, fetchDocuments } = useDocuments(
Number(search_space_id), Number(search_space_id),
true {
lazy: true,
pageSize: -1, // Fetch all documents with large page size
}
); );
const handleOpenChange = useCallback( const handleOpenChange = useCallback(

View file

@ -1,6 +1,7 @@
"use client"; "use client";
import { useCallback, useEffect, useState } from "react"; import { useCallback, useEffect, useState } from "react";
import { toast } from "sonner"; import { toast } from "sonner";
import { normalizeListResponse } from "@/lib/pagination";
export interface Document { export interface Document {
id: number; id: number;
@ -30,43 +31,79 @@ export type DocumentType =
| "AIRTABLE_CONNECTOR" | "AIRTABLE_CONNECTOR"
| "LUMA_CONNECTOR"; | "LUMA_CONNECTOR";
export function useDocuments(searchSpaceId: number, lazy: boolean = false) { export interface UseDocumentsOptions {
page?: number;
pageSize?: number;
lazy?: boolean;
}
export function useDocuments(
searchSpaceId: number,
options?: UseDocumentsOptions | boolean
) {
// Support both old boolean API and new options API for backward compatibility
const opts = typeof options === "boolean" ? { lazy: options } : options || {};
const { page, pageSize = 300, lazy = false } = opts;
const [documents, setDocuments] = useState<Document[]>([]); const [documents, setDocuments] = useState<Document[]>([]);
const [total, setTotal] = useState(0);
const [loading, setLoading] = useState(!lazy); // Don't show loading initially for lazy mode const [loading, setLoading] = useState(!lazy); // Don't show loading initially for lazy mode
const [error, setError] = useState<string | null>(null); const [error, setError] = useState<string | null>(null);
const [isLoaded, setIsLoaded] = useState(false); // Memoization flag const [isLoaded, setIsLoaded] = useState(false); // Memoization flag
const fetchDocuments = useCallback(async () => { const fetchDocuments = useCallback(
if (isLoaded && lazy) return; // Avoid redundant calls in lazy mode async (fetchPage?: number, fetchPageSize?: number) => {
if (isLoaded && lazy) return; // Avoid redundant calls in lazy mode
try { try {
setLoading(true); setLoading(true);
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents?search_space_id=${searchSpaceId}`, // Build query params
{ const params = new URLSearchParams({
headers: { search_space_id: searchSpaceId.toString(),
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`, });
},
method: "GET", // Use passed parameters or fall back to state/options
const effectivePage = fetchPage !== undefined ? fetchPage : page;
const effectivePageSize = fetchPageSize !== undefined ? fetchPageSize : pageSize;
if (effectivePage !== undefined) {
params.append("page", effectivePage.toString());
}
if (effectivePageSize !== undefined) {
params.append("page_size", effectivePageSize.toString());
} }
);
if (!response.ok) { const response = await fetch(
toast.error("Failed to fetch documents"); `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents?${params.toString()}`,
throw new Error("Failed to fetch documents"); {
headers: {
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
},
method: "GET",
}
);
if (!response.ok) {
toast.error("Failed to fetch documents");
throw new Error("Failed to fetch documents");
}
const data = await response.json();
const normalized = normalizeListResponse<Document>(data);
setDocuments(normalized.items);
setTotal(normalized.total);
setError(null);
setIsLoaded(true);
} catch (err: any) {
setError(err.message || "Failed to fetch documents");
console.error("Error fetching documents:", err);
} finally {
setLoading(false);
} }
},
const data = await response.json(); [searchSpaceId, page, pageSize, isLoaded, lazy]
setDocuments(data); );
setError(null);
setIsLoaded(true);
} catch (err: any) {
setError(err.message || "Failed to fetch documents");
console.error("Error fetching documents:", err);
} finally {
setLoading(false);
}
}, [searchSpaceId, isLoaded, lazy]);
useEffect(() => { useEffect(() => {
if (!lazy && searchSpaceId) { if (!lazy && searchSpaceId) {
@ -80,6 +117,64 @@ export function useDocuments(searchSpaceId: number, lazy: boolean = false) {
await fetchDocuments(); await fetchDocuments();
}, [fetchDocuments]); }, [fetchDocuments]);
// Function to search documents by title
const searchDocuments = useCallback(
async (searchQuery: string, fetchPage?: number, fetchPageSize?: number) => {
if (!searchQuery.trim()) {
// If search is empty, fetch all documents
return fetchDocuments(fetchPage, fetchPageSize);
}
try {
setLoading(true);
// Build query params
const params = new URLSearchParams({
search_space_id: searchSpaceId.toString(),
title: searchQuery,
});
// Use passed parameters or fall back to state/options
const effectivePage = fetchPage !== undefined ? fetchPage : page;
const effectivePageSize = fetchPageSize !== undefined ? fetchPageSize : pageSize;
if (effectivePage !== undefined) {
params.append("page", effectivePage.toString());
}
if (effectivePageSize !== undefined) {
params.append("page_size", effectivePageSize.toString());
}
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents/search/?${params.toString()}`,
{
headers: {
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
},
method: "GET",
}
);
if (!response.ok) {
toast.error("Failed to search documents");
throw new Error("Failed to search documents");
}
const data = await response.json();
const normalized = normalizeListResponse<Document>(data);
setDocuments(normalized.items);
setTotal(normalized.total);
setError(null);
} catch (err: any) {
setError(err.message || "Failed to search documents");
console.error("Error searching documents:", err);
} finally {
setLoading(false);
}
},
[searchSpaceId, page, pageSize, fetchDocuments]
);
// Function to delete a document // Function to delete a document
const deleteDocument = useCallback( const deleteDocument = useCallback(
async (documentId: number) => { async (documentId: number) => {
@ -114,10 +209,12 @@ export function useDocuments(searchSpaceId: number, lazy: boolean = false) {
return { return {
documents, documents,
total,
loading, loading,
error, error,
isLoaded, isLoaded,
fetchDocuments, // Manual fetch function for lazy mode fetchDocuments, // Manual fetch function for lazy mode
searchDocuments, // Search function
refreshDocuments, refreshDocuments,
deleteDocument, deleteDocument,
}; };

View file

@ -0,0 +1,34 @@
// Helper to normalize list responses from the API
// Supports shapes: Array<T>, { items: T[]; total: number }, and tuple [T[], total]
export type ListResponse<T> = {
items: T[];
total: number;
};
export function normalizeListResponse<T>(payload: any): ListResponse<T> {
try {
// Case 1: already in desired shape
if (payload && Array.isArray(payload.items)) {
const total = typeof payload.total === "number" ? payload.total : payload.items.length;
return { items: payload.items as T[], total };
}
// Case 2: tuple [items, total]
if (Array.isArray(payload) && payload.length === 2 && Array.isArray(payload[0])) {
const items = (payload[0] ?? []) as T[];
const rawTotal = payload[1];
const total = typeof rawTotal === "number" ? rawTotal : items.length;
return { items, total };
}
// Case 3: plain array
if (Array.isArray(payload)) {
return { items: payload as T[], total: (payload as T[]).length };
}
} catch (e) {
// fallthrough to default
}
return { items: [], total: 0 };
}