Implemented serverside pagination;

Enabled searchspace file mgmt panel to use serverside pagination;
This commit is contained in:
Natsume Ryuhane 2025-10-01 13:05:22 -07:00
parent 54326be56c
commit 797fe26f53
No known key found for this signature in database
7 changed files with 374 additions and 62 deletions

View file

@ -22,6 +22,7 @@ from app.schemas import (
DocumentsCreate,
DocumentUpdate,
DocumentWithChunksRead,
PaginatedResponse,
)
from app.services.task_logging_service import TaskLoggingService
from app.tasks.document_processors import (
@ -154,15 +155,36 @@ async def create_documents_file_upload(
) from e
@router.get("/documents/", response_model=list[DocumentRead])
@router.get("/documents/", response_model=PaginatedResponse[DocumentRead])
async def read_documents(
skip: int = 0,
limit: int = 3000,
skip: int | None = None,
page: int | None = None,
page_size: int = 50,
search_space_id: int | None = None,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
List documents owned by the current user, with optional filtering and pagination.
Args:
skip: Absolute number of items to skip from the beginning. If provided, it takes precedence over 'page'.
page: Zero-based page index used when 'skip' is not provided.
page_size: Number of items per page (default: 50). Use -1 to return all remaining items after the offset.
search_space_id: If provided, restrict results to a specific search space.
session: Database session (injected).
user: Current authenticated user (injected).
Returns:
PaginatedResponse[DocumentRead]: Paginated list of documents visible to the user.
Notes:
- If both 'skip' and 'page' are provided, 'skip' is used.
- Results are scoped to documents owned by the current user.
"""
try:
from sqlalchemy import func
query = (
select(Document).join(SearchSpace).filter(SearchSpace.user_id == user.id)
)
@ -171,7 +193,33 @@ async def read_documents(
if search_space_id is not None:
query = query.filter(Document.search_space_id == search_space_id)
result = await session.execute(query.offset(skip).limit(limit))
# Get total count
count_query = (
select(func.count())
.select_from(Document)
.join(SearchSpace)
.filter(SearchSpace.user_id == user.id)
)
if search_space_id is not None:
count_query = count_query.filter(
Document.search_space_id == search_space_id
)
total_result = await session.execute(count_query)
total = total_result.scalar() or 0
# Calculate offset
offset = 0
if skip is not None:
offset = skip
elif page is not None:
offset = page * page_size
# Get paginated results
if page_size == -1:
result = await session.execute(query.offset(offset))
else:
result = await session.execute(query.offset(offset).limit(page_size))
db_documents = result.scalars().all()
# Convert database objects to API-friendly format
@ -189,13 +237,106 @@ async def read_documents(
)
)
return api_documents
return PaginatedResponse(items=api_documents, total=total)
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to fetch documents: {e!s}"
) from e
@router.get("/documents/search/", response_model=PaginatedResponse[DocumentRead])
async def search_documents(
title: str,
skip: int | None = None,
page: int | None = None,
page_size: int = 50,
search_space_id: int | None = None,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Search documents by title substring, optionally filtered by search_space_id.
Args:
title: Case-insensitive substring to match against document titles. Required.
skip: Absolute number of items to skip from the beginning. If provided, it takes precedence over 'page'. Default: None.
page: Zero-based page index used when 'skip' is not provided. Default: None.
page_size: Number of items per page. Use -1 to return all remaining items after the offset. Default: 50.
search_space_id: Filter results to a specific search space. Default: None.
session: Database session (injected).
user: Current authenticated user (injected).
Returns:
PaginatedResponse[DocumentRead]: Paginated list of documents matching the query and filter.
Notes:
- Title matching uses ILIKE (case-insensitive).
- If both 'skip' and 'page' are provided, 'skip' is used.
"""
try:
from sqlalchemy import func
query = (
select(Document).join(SearchSpace).filter(SearchSpace.user_id == user.id)
)
if search_space_id is not None:
query = query.filter(Document.search_space_id == search_space_id)
# Only search by title (case-insensitive)
query = query.filter(Document.title.ilike(f"%{title}%"))
# Get total count
count_query = (
select(func.count())
.select_from(Document)
.join(SearchSpace)
.filter(SearchSpace.user_id == user.id)
)
if search_space_id is not None:
count_query = count_query.filter(
Document.search_space_id == search_space_id
)
count_query = count_query.filter(Document.title.ilike(f"%{title}%"))
total_result = await session.execute(count_query)
total = total_result.scalar() or 0
# Calculate offset
offset = 0
if skip is not None:
offset = skip
elif page is not None:
offset = page * page_size
# Get paginated results
if page_size == -1:
result = await session.execute(query.offset(offset))
else:
result = await session.execute(query.offset(offset).limit(page_size))
db_documents = result.scalars().all()
# Convert database objects to API-friendly format
api_documents = []
for doc in db_documents:
api_documents.append(
DocumentRead(
id=doc.id,
title=doc.title,
document_type=doc.document_type,
document_metadata=doc.document_metadata,
content=doc.content,
created_at=doc.created_at,
search_space_id=doc.search_space_id,
)
)
return PaginatedResponse(items=api_documents, total=total)
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to search documents: {e!s}"
) from e
@router.get("/documents/{document_id}", response_model=DocumentRead)
async def read_document(
document_id: int,

View file

@ -16,6 +16,7 @@ from .documents import (
DocumentWithChunksRead,
ExtensionDocumentContent,
ExtensionDocumentMetadata,
PaginatedResponse,
)
from .llm_config import LLMConfigBase, LLMConfigCreate, LLMConfigRead, LLMConfigUpdate
from .logs import LogBase, LogCreate, LogFilter, LogRead, LogUpdate
@ -68,6 +69,7 @@ __all__ = [
"LogFilter",
"LogRead",
"LogUpdate",
"PaginatedResponse",
"PodcastBase",
"PodcastCreate",
"PodcastGenerateRequest",

View file

@ -1,11 +1,14 @@
from datetime import datetime
from pydantic import BaseModel, ConfigDict
from typing import Generic, TypeVar
from app.db import DocumentType
from .chunks import ChunkRead
T = TypeVar("T")
class ExtensionDocumentMetadata(BaseModel):
BrowsingSessionId: str
@ -53,3 +56,8 @@ class DocumentWithChunksRead(DocumentRead):
chunks: list[ChunkRead] = []
model_config = ConfigDict(from_attributes=True)
class PaginatedResponse(BaseModel, Generic[T]):
items: list[T]
total: int

View file

@ -2,7 +2,7 @@
import { motion } from "framer-motion";
import { useParams } from "next/navigation";
import { useEffect, useId, useMemo, useState } from "react";
import { useCallback, useEffect, useId, useMemo, useState } from "react";
import { toast } from "sonner";
import { useDocuments } from "@/hooks/use-documents";
@ -26,10 +26,6 @@ export default function DocumentsTable() {
const params = useParams();
const searchSpaceId = Number(params.search_space_id);
const { documents, loading, error, refreshDocuments, deleteDocument } =
useDocuments(searchSpaceId);
const [data, setData] = useState<Document[]>([]);
const [search, setSearch] = useState("");
const debouncedSearch = useDebounced(search, 250);
const [activeTypes, setActiveTypes] = useState<string[]>([]);
@ -45,26 +41,48 @@ export default function DocumentsTable() {
const [sortDesc, setSortDesc] = useState(false);
const [selectedIds, setSelectedIds] = useState<Set<number>>(new Set());
useEffect(() => {
if (documents) setData(documents as Document[]);
}, [documents]);
// Use server-side pagination and search
const {
documents,
total,
loading,
error,
fetchDocuments,
searchDocuments,
deleteDocument,
} = useDocuments(searchSpaceId, {
page: pageIndex,
pageSize: pageSize,
});
const filtered = useMemo(() => {
let result = data;
if (debouncedSearch.trim()) {
const q = debouncedSearch.toLowerCase();
result = result.filter((d) => d.title.toLowerCase().includes(q));
// Refetch when pagination changes or when search/filters change
useEffect(() => {
if (searchSpaceId) {
if (debouncedSearch.trim()) {
// Use search endpoint if there's a search query
searchDocuments?.(debouncedSearch, pageIndex, pageSize);
} else {
// Use regular fetch if no search
fetchDocuments?.(pageIndex, pageSize);
}
}
}, [pageIndex, pageSize, debouncedSearch, searchSpaceId, fetchDocuments, searchDocuments]);
// Client-side filtering for document types only
// Note: This could also be moved to the backend for better performance
const filtered = useMemo(() => {
let result = documents || [];
if (activeTypes.length > 0) {
result = result.filter((d) => activeTypes.includes(d.document_type));
}
return result;
}, [data, debouncedSearch, activeTypes]);
}, [documents, activeTypes]);
const total = filtered.length;
// Display filtered results
const displayDocs = filtered;
const displayTotal = activeTypes.length > 0 ? filtered.length : total;
const pageStart = pageIndex * pageSize;
const pageEnd = Math.min(pageStart + pageSize, total);
const pageDocs = filtered.slice(pageStart, pageEnd);
const pageEnd = Math.min(pageStart + pageSize, displayTotal);
const onToggleType = (type: string, checked: boolean) => {
setActiveTypes((prev) => (checked ? [...prev, type] : prev.filter((t) => t !== type)));
@ -75,6 +93,14 @@ export default function DocumentsTable() {
setColumnVisibility((prev) => ({ ...prev, [id]: checked }));
};
const refreshCurrentView = useCallback(async () => {
if (debouncedSearch.trim()) {
await searchDocuments?.(debouncedSearch, pageIndex, pageSize);
} else {
await fetchDocuments?.(pageIndex, pageSize);
}
}, [debouncedSearch, pageIndex, pageSize, searchDocuments, fetchDocuments]);
const onBulkDelete = async () => {
if (selectedIds.size === 0) {
toast.error("No rows selected");
@ -86,7 +112,8 @@ export default function DocumentsTable() {
if (okCount === selectedIds.size)
toast.success(`Successfully deleted ${okCount} document(s)`);
else toast.error("Some documents could not be deleted");
await refreshDocuments?.();
// Refetch the current page with appropriate method
await refreshCurrentView();
setSelectedIds(new Set());
} catch (e) {
console.error(e);
@ -113,8 +140,8 @@ export default function DocumentsTable() {
className="w-full px-6 py-4"
>
<DocumentsFilters
allDocuments={data}
visibleDocuments={pageDocs}
allDocuments={documents || []}
visibleDocuments={displayDocs}
selectedIds={selectedIds}
onSearch={setSearch}
searchValue={search}
@ -126,12 +153,10 @@ export default function DocumentsTable() {
/>
<DocumentsTableShell
documents={pageDocs}
documents={displayDocs}
loading={!!loading}
error={!!error}
onRefresh={async () => {
await (refreshDocuments?.() ?? Promise.resolve());
}}
onRefresh={refreshCurrentView}
selectedIds={selectedIds}
setSelectedIds={setSelectedIds}
columnVisibility={columnVisibility}
@ -147,20 +172,22 @@ export default function DocumentsTable() {
}}
/>
export { DocumentsTable };
<PaginationControls
pageIndex={pageIndex}
pageSize={pageSize}
total={total}
total={displayTotal}
onPageSizeChange={(s) => {
setPageSize(s);
setPageIndex(0);
}}
onFirst={() => setPageIndex(0)}
onPrev={() => setPageIndex((i) => Math.max(0, i - 1))}
onNext={() => setPageIndex((i) => (pageEnd < total ? i + 1 : i))}
onLast={() => setPageIndex(Math.max(0, Math.ceil(total / pageSize) - 1))}
onNext={() => setPageIndex((i) => (pageEnd < displayTotal ? i + 1 : i))}
onLast={() => setPageIndex(Math.max(0, Math.ceil(displayTotal / pageSize) - 1))}
canPrev={pageIndex > 0}
canNext={pageEnd < total}
canNext={pageEnd < displayTotal}
id={id}
/>
</motion.div>

View file

@ -42,7 +42,10 @@ const DocumentSelector = React.memo(
const { documents, loading, isLoaded, fetchDocuments } = useDocuments(
Number(search_space_id),
true
{
lazy: true,
pageSize: -1, // Fetch all documents with large page size
}
);
const handleOpenChange = useCallback(

View file

@ -1,6 +1,7 @@
"use client";
import { useCallback, useEffect, useState } from "react";
import { toast } from "sonner";
import { normalizeListResponse } from "@/lib/pagination";
export interface Document {
id: number;
@ -29,43 +30,79 @@ export type DocumentType =
| "GOOGLE_GMAIL_CONNECTOR"
| "AIRTABLE_CONNECTOR";
export function useDocuments(searchSpaceId: number, lazy: boolean = false) {
export interface UseDocumentsOptions {
page?: number;
pageSize?: number;
lazy?: boolean;
}
export function useDocuments(
searchSpaceId: number,
options?: UseDocumentsOptions | boolean
) {
// Support both old boolean API and new options API for backward compatibility
const opts = typeof options === "boolean" ? { lazy: options } : options || {};
const { page, pageSize = 300, lazy = false } = opts;
const [documents, setDocuments] = useState<Document[]>([]);
const [total, setTotal] = useState(0);
const [loading, setLoading] = useState(!lazy); // Don't show loading initially for lazy mode
const [error, setError] = useState<string | null>(null);
const [isLoaded, setIsLoaded] = useState(false); // Memoization flag
const fetchDocuments = useCallback(async () => {
if (isLoaded && lazy) return; // Avoid redundant calls in lazy mode
const fetchDocuments = useCallback(
async (fetchPage?: number, fetchPageSize?: number) => {
if (isLoaded && lazy) return; // Avoid redundant calls in lazy mode
try {
setLoading(true);
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents?search_space_id=${searchSpaceId}`,
{
headers: {
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
},
method: "GET",
try {
setLoading(true);
// Build query params
const params = new URLSearchParams({
search_space_id: searchSpaceId.toString(),
});
// Use passed parameters or fall back to state/options
const effectivePage = fetchPage !== undefined ? fetchPage : page;
const effectivePageSize = fetchPageSize !== undefined ? fetchPageSize : pageSize;
if (effectivePage !== undefined) {
params.append("page", effectivePage.toString());
}
if (effectivePageSize !== undefined) {
params.append("page_size", effectivePageSize.toString());
}
);
if (!response.ok) {
toast.error("Failed to fetch documents");
throw new Error("Failed to fetch documents");
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents?${params.toString()}`,
{
headers: {
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
},
method: "GET",
}
);
if (!response.ok) {
toast.error("Failed to fetch documents");
throw new Error("Failed to fetch documents");
}
const data = await response.json();
const normalized = normalizeListResponse<Document>(data);
setDocuments(normalized.items);
setTotal(normalized.total);
setError(null);
setIsLoaded(true);
} catch (err: any) {
setError(err.message || "Failed to fetch documents");
console.error("Error fetching documents:", err);
} finally {
setLoading(false);
}
const data = await response.json();
setDocuments(data);
setError(null);
setIsLoaded(true);
} catch (err: any) {
setError(err.message || "Failed to fetch documents");
console.error("Error fetching documents:", err);
} finally {
setLoading(false);
}
}, [searchSpaceId, isLoaded, lazy]);
},
[searchSpaceId, page, pageSize, isLoaded, lazy]
);
useEffect(() => {
if (!lazy && searchSpaceId) {
@ -79,6 +116,64 @@ export function useDocuments(searchSpaceId: number, lazy: boolean = false) {
await fetchDocuments();
}, [fetchDocuments]);
// Function to search documents by title
const searchDocuments = useCallback(
async (searchQuery: string, fetchPage?: number, fetchPageSize?: number) => {
if (!searchQuery.trim()) {
// If search is empty, fetch all documents
return fetchDocuments(fetchPage, fetchPageSize);
}
try {
setLoading(true);
// Build query params
const params = new URLSearchParams({
search_space_id: searchSpaceId.toString(),
title: searchQuery,
});
// Use passed parameters or fall back to state/options
const effectivePage = fetchPage !== undefined ? fetchPage : page;
const effectivePageSize = fetchPageSize !== undefined ? fetchPageSize : pageSize;
if (effectivePage !== undefined) {
params.append("page", effectivePage.toString());
}
if (effectivePageSize !== undefined) {
params.append("page_size", effectivePageSize.toString());
}
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents/search/?${params.toString()}`,
{
headers: {
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
},
method: "GET",
}
);
if (!response.ok) {
toast.error("Failed to search documents");
throw new Error("Failed to search documents");
}
const data = await response.json();
const normalized = normalizeListResponse<Document>(data);
setDocuments(normalized.items);
setTotal(normalized.total);
setError(null);
} catch (err: any) {
setError(err.message || "Failed to search documents");
console.error("Error searching documents:", err);
} finally {
setLoading(false);
}
},
[searchSpaceId, page, pageSize, fetchDocuments]
);
// Function to delete a document
const deleteDocument = useCallback(
async (documentId: number) => {
@ -113,10 +208,12 @@ export function useDocuments(searchSpaceId: number, lazy: boolean = false) {
return {
documents,
total,
loading,
error,
isLoaded,
fetchDocuments, // Manual fetch function for lazy mode
searchDocuments, // Search function
refreshDocuments,
deleteDocument,
};

View file

@ -0,0 +1,34 @@
// Helper to normalize list responses from the API
// Supports shapes: Array<T>, { items: T[]; total: number }, and tuple [T[], total]
export type ListResponse<T> = {
items: T[];
total: number;
};
export function normalizeListResponse<T>(payload: any): ListResponse<T> {
try {
// Case 1: already in desired shape
if (payload && Array.isArray(payload.items)) {
const total = typeof payload.total === "number" ? payload.total : payload.items.length;
return { items: payload.items as T[], total };
}
// Case 2: tuple [items, total]
if (Array.isArray(payload) && payload.length === 2 && Array.isArray(payload[0])) {
const items = (payload[0] ?? []) as T[];
const rawTotal = payload[1];
const total = typeof rawTotal === "number" ? rawTotal : items.length;
return { items, total };
}
// Case 3: plain array
if (Array.isArray(payload)) {
return { items: payload as T[], total: (payload as T[]).length };
}
} catch (e) {
// fallthrough to default
}
return { items: [], total: 0 };
}