feat: add document status management with JSONB column for processing states in documents

This commit is contained in:
Anish Sarkar 2026-02-05 21:59:31 +05:30
parent 04884caeef
commit aef59d04eb
13 changed files with 526 additions and 135 deletions

View file

@ -0,0 +1,80 @@
"""Add status column to documents table for per-document processing status
Revision ID: 92
Revises: 91
Create Date: 2026-02-05
Changes:
1. Add status column (JSONB) to documents table
2. Default value is {"state": "ready"} for backward compatibility
3. Existing documents are set to ready status
4. Index created for efficient status filtering
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "92"
down_revision: str | None = "91"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Add status column to documents with default ready state."""
# 1. Add status column with default value for new rows
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'documents' AND column_name = 'status'
) THEN
ALTER TABLE documents
ADD COLUMN status JSONB NOT NULL DEFAULT '{"state": "ready"}'::jsonb;
END IF;
END$$;
"""
)
# 2. Create index on status for efficient filtering by state
op.execute(
"""
CREATE INDEX IF NOT EXISTS ix_documents_status
ON documents ((status->>'state'));
"""
)
def downgrade() -> None:
"""Remove status column from documents."""
# Drop index
op.execute(
"""
DROP INDEX IF EXISTS ix_documents_status;
"""
)
# Drop column
op.execute(
"""
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'documents' AND column_name = 'status'
) THEN
ALTER TABLE documents
DROP COLUMN status;
END IF;
END$$;
"""
)

View file

@ -16,13 +16,14 @@ from sqlalchemy.orm import selectinload
from app.config import config
from app.connectors.composio_connector import ComposioConnector
from app.db import Document, DocumentType
from app.db import Document, DocumentStatus, DocumentType
from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService
from app.tasks.connector_indexers.base import (
calculate_date_range,
check_duplicate_document_by_hash,
safe_set_chunks,
)
from app.utils.document_converters import (
create_document_chunks,
@ -266,18 +267,18 @@ async def index_composio_google_calendar(
documents_indexed = 0
documents_skipped = 0
duplicate_content_count = (
0 # Track events skipped due to duplicate content_hash
)
documents_failed = 0 # Track events that failed processing
duplicate_content_count = 0 # Track events skipped due to duplicate content_hash
last_heartbeat_time = time.time()
# =======================================================================
# PHASE 1: Analyze all events, create pending documents
# This makes ALL documents visible in the UI immediately with pending status
# =======================================================================
events_to_process = [] # List of dicts with document and event data
new_documents_created = False
for event in events:
# Send heartbeat periodically to indicate task is still alive
if on_heartbeat_callback:
current_time = time.time()
if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time
try:
# Handle both standard Google API and potential Composio variations
event_id = event.get("id", "") or event.get("eventId", "")
@ -315,61 +316,24 @@ async def index_composio_google_calendar(
if existing_document:
if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
existing_document.status = DocumentStatus.ready()
documents_skipped += 1
continue
# Update existing
user_llm = await get_user_long_context_llm(
session, user_id, search_space_id
)
if user_llm:
document_metadata = {
"event_id": event_id,
"summary": summary,
"start_time": start_time,
"document_type": "Google Calendar Event (Composio)",
}
(
summary_content,
summary_embedding,
) = await generate_document_summary(
markdown_content, user_llm, document_metadata
)
else:
summary_content = f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
if location:
summary_content += f"\nLocation: {location}"
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
chunks = await create_document_chunks(markdown_content)
existing_document.title = summary
existing_document.content = summary_content
existing_document.content_hash = content_hash
existing_document.embedding = summary_embedding
existing_document.document_metadata = {
"event_id": event_id,
"summary": summary,
"start_time": start_time,
"end_time": end_time,
"location": location,
"connector_id": connector_id,
"source": "composio",
}
existing_document.chunks = chunks
existing_document.updated_at = get_current_timestamp()
documents_indexed += 1
# Batch commit every 10 documents
if documents_indexed % 10 == 0:
logger.info(
f"Committing batch: {documents_indexed} Google Calendar events processed so far"
)
await session.commit()
# Queue existing document for update (will be set to processing in Phase 2)
events_to_process.append({
'document': existing_document,
'is_new': False,
'markdown_content': markdown_content,
'content_hash': content_hash,
'event_id': event_id,
'summary': summary,
'start_time': start_time,
'end_time': end_time,
'location': location,
})
continue
# Document doesn't exist by unique_identifier_hash
@ -380,46 +344,16 @@ async def index_composio_google_calendar(
)
if duplicate_by_content:
# A document with the same content already exists (likely from standard connector)
logger.info(
f"Event {summary} already indexed by another connector "
f"(existing document ID: {duplicate_by_content.id}, "
f"type: {duplicate_by_content.document_type}). Skipping to avoid duplicate content."
f"type: {duplicate_by_content.document_type}). Skipping."
)
duplicate_content_count += 1
documents_skipped += 1
continue
# Create new document
user_llm = await get_user_long_context_llm(
session, user_id, search_space_id
)
if user_llm:
document_metadata = {
"event_id": event_id,
"summary": summary,
"start_time": start_time,
"document_type": "Google Calendar Event (Composio)",
}
(
summary_content,
summary_embedding,
) = await generate_document_summary(
markdown_content, user_llm, document_metadata
)
else:
summary_content = (
f"Calendar: {summary}\n\nStart: {start_time}\nEnd: {end_time}"
)
if location:
summary_content += f"\nLocation: {location}"
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
chunks = await create_document_chunks(markdown_content)
# Create new document with PENDING status (visible in UI immediately)
document = Document(
search_space_id=search_space_id,
title=summary,
@ -436,19 +370,107 @@ async def index_composio_google_calendar(
"toolkit_id": "googlecalendar",
"source": "composio",
},
content=summary_content,
content_hash=content_hash,
content="Pending...", # Placeholder until processed
content_hash=unique_identifier_hash, # Temporary unique value - updated when ready
unique_identifier_hash=unique_identifier_hash,
embedding=summary_embedding,
chunks=chunks,
embedding=None,
chunks=[], # Empty at creation - safe for async
status=DocumentStatus.pending(), # Pending until processing starts
updated_at=get_current_timestamp(),
created_by_id=user_id,
connector_id=connector_id,
)
session.add(document)
new_documents_created = True
events_to_process.append({
'document': document,
'is_new': True,
'markdown_content': markdown_content,
'content_hash': content_hash,
'event_id': event_id,
'summary': summary,
'start_time': start_time,
'end_time': end_time,
'location': location,
})
except Exception as e:
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
documents_failed += 1
continue
# Commit all pending documents - they all appear in UI now
if new_documents_created:
logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents")
await session.commit()
# =======================================================================
# PHASE 2: Process each document one by one
# Each document transitions: pending → processing → ready/failed
# =======================================================================
logger.info(f"Phase 2: Processing {len(events_to_process)} documents")
for item in events_to_process:
# Send heartbeat periodically
if on_heartbeat_callback:
current_time = time.time()
if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time
document = item['document']
try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing()
await session.commit()
# Heavy processing (LLM, embeddings, chunks)
user_llm = await get_user_long_context_llm(
session, user_id, search_space_id
)
if user_llm:
document_metadata_for_summary = {
"event_id": item['event_id'],
"summary": item['summary'],
"start_time": item['start_time'],
"document_type": "Google Calendar Event (Composio)",
}
summary_content, summary_embedding = await generate_document_summary(
item['markdown_content'], user_llm, document_metadata_for_summary
)
else:
summary_content = f"Calendar: {item['summary']}\n\nStart: {item['start_time']}\nEnd: {item['end_time']}"
if item['location']:
summary_content += f"\nLocation: {item['location']}"
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
chunks = await create_document_chunks(item['markdown_content'])
# Update document to READY with actual content
document.title = item['summary']
document.content = summary_content
document.content_hash = item['content_hash']
document.embedding = summary_embedding
document.document_metadata = {
"event_id": item['event_id'],
"summary": item['summary'],
"start_time": item['start_time'],
"end_time": item['end_time'],
"location": item['location'],
"connector_id": connector_id,
"source": "composio",
}
safe_set_chunks(document, chunks)
document.updated_at = get_current_timestamp()
document.status = DocumentStatus.ready()
documents_indexed += 1
# Batch commit every 10 documents
# Batch commit every 10 documents (for ready status updates)
if documents_indexed % 10 == 0:
logger.info(
f"Committing batch: {documents_indexed} Google Calendar events processed so far"
@ -457,7 +479,13 @@ async def index_composio_google_calendar(
except Exception as e:
logger.error(f"Error processing Calendar event: {e!s}", exc_info=True)
documents_skipped += 1
# Mark document as failed with reason (visible in UI)
try:
document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp()
except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}")
documents_failed += 1
continue
# CRITICAL: Always update timestamp (even if 0 documents indexed) so Electric SQL syncs
@ -490,10 +518,13 @@ async def index_composio_google_calendar(
else:
raise
# Build warning message if duplicates were found
warning_message = None
# Build warning message if there were issues
warning_parts = []
if duplicate_content_count > 0:
warning_message = f"{duplicate_content_count} skipped (duplicate)"
warning_parts.append(f"{duplicate_content_count} duplicate")
if documents_failed > 0:
warning_parts.append(f"{documents_failed} failed")
warning_message = ", ".join(warning_parts) if warning_parts else None
await task_logger.log_task_success(
log_entry,
@ -501,13 +532,15 @@ async def index_composio_google_calendar(
{
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
"documents_failed": documents_failed,
"duplicate_content_count": duplicate_content_count,
},
)
logger.info(
f"Composio Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped "
f"({duplicate_content_count} due to duplicate content from other connectors)"
f"Composio Google Calendar indexing completed: {documents_indexed} ready, "
f"{documents_skipped} skipped, {documents_failed} failed "
f"({duplicate_content_count} duplicate content)"
)
return documents_indexed, warning_message

View file

@ -100,6 +100,80 @@ class PodcastStatus(str, Enum):
FAILED = "failed"
class DocumentStatus:
"""
Helper class for document processing status (stored as JSONB).
Status values:
- {"state": "ready"} - Document is fully processed and searchable
- {"state": "pending"} - Document is queued, waiting to be processed
- {"state": "processing"} - Document is currently being processed (only 1 at a time)
- {"state": "failed", "reason": "..."} - Processing failed with reason
Usage:
document.status = DocumentStatus.pending()
document.status = DocumentStatus.processing()
document.status = DocumentStatus.ready()
document.status = DocumentStatus.failed("LLM rate limit exceeded")
"""
# State constants
READY = "ready"
PENDING = "pending"
PROCESSING = "processing"
FAILED = "failed"
@staticmethod
def ready() -> dict:
"""Return status dict for a ready/searchable document."""
return {"state": DocumentStatus.READY}
@staticmethod
def pending() -> dict:
"""Return status dict for a document waiting to be processed."""
return {"state": DocumentStatus.PENDING}
@staticmethod
def processing() -> dict:
"""Return status dict for a document being processed."""
return {"state": DocumentStatus.PROCESSING}
@staticmethod
def failed(reason: str, **extra_details) -> dict:
"""
Return status dict for a failed document.
Args:
reason: Human-readable failure reason
**extra_details: Optional additional details (duplicate_of, error_code, etc.)
"""
status = {"state": DocumentStatus.FAILED, "reason": reason[:500]} # Truncate long reasons
if extra_details:
status.update(extra_details)
return status
@staticmethod
def get_state(status: dict | None) -> str | None:
"""Extract state from status dict, returns None if invalid."""
if status is None:
return None
return status.get("state") if isinstance(status, dict) else None
@staticmethod
def is_state(status: dict | None, state: str) -> bool:
"""Check if status matches a given state."""
return DocumentStatus.get_state(status) == state
@staticmethod
def get_failure_reason(status: dict | None) -> str | None:
"""Extract failure reason from status dict."""
if status is None or not isinstance(status, dict):
return None
if status.get("state") == DocumentStatus.FAILED:
return status.get("reason")
return None
class LiteLLMProvider(str, Enum):
"""
Enum for LLM providers supported by LiteLLM.
@ -785,6 +859,17 @@ class Document(BaseModel, TimestampMixin):
index=True,
)
# Processing status for real-time visibility (JSONB)
# Format: {"state": "ready"} or {"state": "processing"} or {"state": "failed", "reason": "..."}
# Default to {"state": "ready"} for backward compatibility with existing documents
status = Column(
JSONB,
nullable=False,
default=DocumentStatus.ready,
server_default=text("'{\"state\": \"ready\"}'::jsonb"),
index=True,
)
# Relationships
search_space = relationship("SearchSpace", back_populates="documents")
created_by = relationship("User", back_populates="documents")

View file

@ -19,6 +19,7 @@ from app.db import (
from app.schemas import (
DocumentRead,
DocumentsCreate,
DocumentStatusSchema,
DocumentTitleRead,
DocumentTitleSearchResponse,
DocumentUpdate,
@ -271,6 +272,14 @@ async def read_documents(
if doc.created_by:
created_by_name = doc.created_by.display_name or doc.created_by.email
# Parse status from JSONB
status_data = None
if hasattr(doc, 'status') and doc.status:
status_data = DocumentStatusSchema(
state=doc.status.get("state", "ready"),
reason=doc.status.get("reason"),
)
api_documents.append(
DocumentRead(
id=doc.id,
@ -285,6 +294,7 @@ async def read_documents(
search_space_id=doc.search_space_id,
created_by_id=doc.created_by_id,
created_by_name=created_by_name,
status=status_data,
)
)
@ -417,6 +427,14 @@ async def search_documents(
if doc.created_by:
created_by_name = doc.created_by.display_name or doc.created_by.email
# Parse status from JSONB
status_data = None
if hasattr(doc, 'status') and doc.status:
status_data = DocumentStatusSchema(
state=doc.status.get("state", "ready"),
reason=doc.status.get("reason"),
)
api_documents.append(
DocumentRead(
id=doc.id,
@ -431,6 +449,7 @@ async def search_documents(
search_space_id=doc.search_space_id,
created_by_id=doc.created_by_id,
created_by_name=created_by_name,
status=status_data,
)
)
@ -806,6 +825,7 @@ async def delete_document(
"""
Delete a document.
Requires DOCUMENTS_DELETE permission for the search space.
Documents in "processing" state cannot be deleted.
"""
try:
result = await session.execute(
@ -818,6 +838,14 @@ async def delete_document(
status_code=404, detail=f"Document with id {document_id} not found"
)
# Check if document is pending or currently being processed
doc_state = document.status.get("state") if document.status else None
if doc_state in ("pending", "processing"):
raise HTTPException(
status_code=409, # Conflict
detail="Cannot delete document while it is pending or being processed. Please wait for processing to complete.",
)
# Check permission for the search space
await check_permission(
session,

View file

@ -4,6 +4,7 @@ from .documents import (
DocumentBase,
DocumentRead,
DocumentsCreate,
DocumentStatusSchema,
DocumentTitleRead,
DocumentTitleSearchResponse,
DocumentUpdate,
@ -87,6 +88,7 @@ __all__ = [
# Document schemas
"DocumentBase",
"DocumentRead",
"DocumentStatusSchema",
"DocumentTitleRead",
"DocumentTitleSearchResponse",
"DocumentUpdate",

View file

@ -41,6 +41,12 @@ class DocumentUpdate(DocumentBase):
pass
class DocumentStatusSchema(BaseModel):
"""Document processing status."""
state: str # "ready", "processing", "failed"
reason: str | None = None
class DocumentRead(BaseModel):
id: int
title: str
@ -54,6 +60,7 @@ class DocumentRead(BaseModel):
search_space_id: int
created_by_id: UUID | None = None # User who created/uploaded this document
created_by_name: str | None = None # Display name or email of the user who created this document
status: DocumentStatusSchema | None = None # Processing status (ready, processing, failed)
model_config = ConfigDict(from_attributes=True)

View file

@ -28,6 +28,34 @@ def get_current_timestamp() -> datetime:
return datetime.now(UTC)
def safe_set_chunks(document: Document, chunks: list) -> None:
"""
Safely assign chunks to a document without triggering lazy loading.
ALWAYS use this instead of `document.chunks = chunks` to avoid
SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
Why this is needed:
- Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
load the OLD chunks first (for comparison/orphan detection)
- This lazy loading fails in async context with asyncpg driver
- set_committed_value bypasses this by setting the value directly
This function is safe regardless of how the document was loaded
(with or without selectinload).
Args:
document: The Document object to update
chunks: List of Chunk objects to assign
Example:
# Instead of: document.chunks = chunks (DANGEROUS!)
safe_set_chunks(document, chunks) # Always safe
"""
from sqlalchemy.orm.attributes import set_committed_value
set_committed_value(document, 'chunks', chunks)
async def check_duplicate_document_by_hash(
session: AsyncSession, content_hash: str
) -> Document | None:

View file

@ -1,7 +1,7 @@
"use client";
import { formatDistanceToNow } from "date-fns";
import { Calendar, ChevronDown, ChevronUp, FileText, FileX, Loader2, Network, Plus, User } from "lucide-react";
import { AlertCircle, Calendar, CheckCircle2, ChevronDown, ChevronUp, Clock, FileText, FileX, Loader2, Network, Plus, User } from "lucide-react";
import { motion } from "motion/react";
import { useTranslations } from "next-intl";
import React, { useRef, useState, useEffect, useCallback } from "react";
@ -17,6 +17,7 @@ import {
DialogTitle,
} from "@/components/ui/dialog";
import { Skeleton } from "@/components/ui/skeleton";
import { Spinner } from "@/components/ui/spinner";
import {
Table,
TableBody,
@ -29,7 +30,61 @@ import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip
import { documentsApiService } from "@/lib/apis/documents-api.service";
import { DocumentTypeChip } from "./DocumentTypeIcon";
import { RowActions } from "./RowActions";
import type { ColumnVisibility, Document } from "./types";
import type { ColumnVisibility, Document, DocumentStatus } from "./types";
// Status indicator component for document processing status
function StatusIndicator({ status }: { status?: DocumentStatus }) {
const state = status?.state ?? "ready";
switch (state) {
case "pending":
return (
<Tooltip>
<TooltipTrigger asChild>
<div className="flex items-center justify-center">
<Clock className="h-5 w-5 text-muted-foreground" />
</div>
</TooltipTrigger>
<TooltipContent side="top">Pending - waiting to be processed</TooltipContent>
</Tooltip>
);
case "processing":
return (
<Tooltip>
<TooltipTrigger asChild>
<div className="flex items-center justify-center">
<Spinner size="sm" className="text-primary" />
</div>
</TooltipTrigger>
<TooltipContent side="top">Processing...</TooltipContent>
</Tooltip>
);
case "failed":
return (
<Tooltip>
<TooltipTrigger asChild>
<div className="flex items-center justify-center">
<AlertCircle className="h-5 w-5 text-destructive" />
</div>
</TooltipTrigger>
<TooltipContent side="top" className="max-w-xs">
{status?.reason || "Processing failed"}
</TooltipContent>
</Tooltip>
);
case "ready":
return (
<Tooltip>
<TooltipTrigger asChild>
<div className="flex items-center justify-center">
<CheckCircle2 className="h-5 w-5 text-muted-foreground/60" />
</div>
</TooltipTrigger>
<TooltipContent side="top">Ready</TooltipContent>
</Tooltip>
);
}
}
export type SortKey = keyof Pick<Document, "title" | "document_type" | "created_at">;
@ -460,7 +515,7 @@ export function DocumentsTableShell({
</TableHead>
)}
{columnVisibility.created_at && (
<TableHead className="w-32">
<TableHead className="w-32 border-r border-border/40">
<SortableHeader
sortKey="created_at"
currentSortKey={sortKey}
@ -472,6 +527,13 @@ export function DocumentsTableShell({
</SortableHeader>
</TableHead>
)}
{columnVisibility.status && (
<TableHead className="w-20 text-center">
<span className="text-sm font-medium text-muted-foreground/70">
Status
</span>
</TableHead>
)}
<TableHead className="w-10">
<span className="sr-only">Actions</span>
</TableHead>
@ -552,7 +614,7 @@ export function DocumentsTableShell({
</TableCell>
)}
{columnVisibility.created_at && (
<TableCell className="w-32 py-2.5 text-sm text-foreground">
<TableCell className="w-32 py-2.5 text-sm text-foreground border-r border-border/40">
<Tooltip>
<TooltipTrigger asChild>
<span className="cursor-default">{formatRelativeDate(doc.created_at)}</span>
@ -563,6 +625,11 @@ export function DocumentsTableShell({
</Tooltip>
</TableCell>
)}
{columnVisibility.status && (
<TableCell className="w-20 py-2.5 text-center">
<StatusIndicator status={doc.status} />
</TableCell>
)}
<TableCell className="w-10 py-2.5 text-center">
<RowActions
document={doc}
@ -647,11 +714,14 @@ export function DocumentsTableShell({
)}
</div>
</div>
<RowActions
document={doc}
deleteDocument={deleteDocument}
searchSpaceId={searchSpaceId}
/>
<div className="flex items-center gap-2">
{columnVisibility.status && <StatusIndicator status={doc.status} />}
<RowActions
document={doc}
deleteDocument={deleteDocument}
searchSpaceId={searchSpaceId}
/>
</div>
</div>
</motion.div>
);

View file

@ -45,10 +45,17 @@ export function RowActions({
document.document_type as (typeof EDITABLE_DOCUMENT_TYPES)[number]
);
const isDeletable = !NON_DELETABLE_DOCUMENT_TYPES.includes(
// Documents in "pending" or "processing" state should show disabled delete
const isBeingProcessed = document.status?.state === "pending" || document.status?.state === "processing";
// SURFSENSE_DOCS are system-managed and should not show delete at all
const shouldShowDelete = !NON_DELETABLE_DOCUMENT_TYPES.includes(
document.document_type as (typeof NON_DELETABLE_DOCUMENT_TYPES)[number]
);
// Delete is disabled while processing
const isDeleteDisabled = isBeingProcessed;
const handleDelete = async () => {
setIsDeleting(true);
try {
@ -87,10 +94,11 @@ export function RowActions({
<Pencil className="mr-2 h-4 w-4" />
<span>Edit</span>
</DropdownMenuItem>
{isDeletable && (
{shouldShowDelete && (
<DropdownMenuItem
onClick={() => setIsDeleteOpen(true)}
className="text-destructive focus:text-destructive"
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
disabled={isDeleteDisabled}
className={isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : "text-destructive focus:text-destructive"}
>
<Trash2 className="mr-2 h-4 w-4" />
<span>Delete</span>
@ -100,13 +108,13 @@ export function RowActions({
</DropdownMenu>
) : (
// Non-editable documents: show only delete button directly
isDeletable && (
shouldShowDelete && (
<Button
variant="ghost"
size="icon"
className="h-8 w-8 text-muted-foreground hover:text-destructive hover:bg-destructive/10"
onClick={() => setIsDeleteOpen(true)}
disabled={isDeleting}
className={`h-8 w-8 ${isDeleteDisabled ? "text-muted-foreground/50 cursor-not-allowed" : "text-muted-foreground hover:text-destructive hover:bg-destructive/10"}`}
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
disabled={isDeleting || isDeleteDisabled}
>
<Trash2 className="h-4 w-4" />
<span className="sr-only">Delete</span>
@ -131,10 +139,11 @@ export function RowActions({
<Pencil className="mr-2 h-4 w-4" />
<span>Edit</span>
</DropdownMenuItem>
{isDeletable && (
{shouldShowDelete && (
<DropdownMenuItem
onClick={() => setIsDeleteOpen(true)}
className="text-destructive focus:text-destructive"
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
disabled={isDeleteDisabled}
className={isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : "text-destructive focus:text-destructive"}
>
<Trash2 className="mr-2 h-4 w-4" />
<span>Delete</span>
@ -144,13 +153,13 @@ export function RowActions({
</DropdownMenu>
) : (
// Non-editable documents: show only delete button directly
isDeletable && (
shouldShowDelete && (
<Button
variant="ghost"
size="icon"
className="h-8 w-8 text-muted-foreground hover:text-destructive hover:bg-destructive/10"
onClick={() => setIsDeleteOpen(true)}
disabled={isDeleting}
className={`h-8 w-8 ${isDeleteDisabled ? "text-muted-foreground/50 cursor-not-allowed" : "text-muted-foreground hover:text-destructive hover:bg-destructive/10"}`}
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
disabled={isDeleting || isDeleteDisabled}
>
<Trash2 className="h-4 w-4" />
<span className="sr-only">Delete</span>

View file

@ -1,5 +1,10 @@
export type DocumentType = string;
export type DocumentStatus = {
state: "ready" | "pending" | "processing" | "failed";
reason?: string;
};
export type Document = {
id: number;
title: string;
@ -11,10 +16,12 @@ export type Document = {
search_space_id: number;
created_by_id?: string | null;
created_by_name?: string | null;
status?: DocumentStatus;
};
export type ColumnVisibility = {
document_type: boolean;
created_by: boolean;
created_at: boolean;
status: boolean;
};

View file

@ -38,6 +38,7 @@ export default function DocumentsTable() {
document_type: true,
created_by: true,
created_at: true,
status: true,
});
const [pageIndex, setPageIndex] = useState(0);
const [sortKey, setSortKey] = useState<SortKey>("created_at");
@ -115,6 +116,7 @@ export default function DocumentsTable() {
created_by_id: item.created_by_id ?? null,
created_by_name: item.created_by_name ?? null,
created_at: item.created_at,
status: (item as { status?: { state: "ready" | "pending" | "processing" | "failed"; reason?: string } }).status ?? { state: "ready" as const },
}))
: paginatedRealtimeDocuments;
@ -159,10 +161,35 @@ export default function DocumentsTable() {
toast.error(t("no_rows_selected"));
return;
}
// Filter out pending/processing documents - they cannot be deleted
// For real-time mode, use sortedRealtimeDocuments (which has status)
// For search mode, use searchResponse items (need to safely access status)
const allDocs = isSearchMode
? (searchResponse?.items || []).map(item => ({
id: item.id,
status: (item as { status?: { state: string } }).status,
}))
: sortedRealtimeDocuments.map(doc => ({ id: doc.id, status: doc.status }));
const selectedDocs = allDocs.filter((doc) => selectedIds.has(doc.id));
const deletableIds = selectedDocs
.filter((doc) => doc.status?.state !== "pending" && doc.status?.state !== "processing")
.map((doc) => doc.id);
const inProgressCount = selectedIds.size - deletableIds.length;
if (inProgressCount > 0) {
toast.warning(`${inProgressCount} document(s) are pending or processing and cannot be deleted.`);
}
if (deletableIds.length === 0) {
return;
}
try {
// Delete documents one by one using the mutation
const results = await Promise.all(
Array.from(selectedIds).map(async (id) => {
deletableIds.map(async (id) => {
try {
await deleteDocumentMutation({ id });
return true;
@ -172,7 +199,7 @@ export default function DocumentsTable() {
})
);
const okCount = results.filter((r) => r === true).length;
if (okCount === selectedIds.size)
if (okCount === deletableIds.length)
toast.success(t("delete_success_count", { count: okCount }));
else toast.error(t("delete_partial_failed"));

View file

@ -9,6 +9,12 @@ import { useElectricClient } from "@/lib/electric/context";
// Stable empty array to prevent infinite re-renders when no typeFilter is provided
const EMPTY_TYPE_FILTER: DocumentTypeEnum[] = [];
// Document status type (matches backend DocumentStatus JSONB)
export interface DocumentStatusType {
state: "ready" | "pending" | "processing" | "failed";
reason?: string;
}
// Document from Electric sync (lightweight table columns - NO content/metadata)
interface DocumentElectric {
id: number;
@ -17,6 +23,7 @@ interface DocumentElectric {
title: string;
created_by_id: string | null;
created_at: string;
status: DocumentStatusType | null;
}
// Document for display (with resolved user name)
@ -28,6 +35,7 @@ export interface DocumentDisplay {
created_by_id: string | null;
created_by_name: string | null;
created_at: string;
status: DocumentStatusType;
}
/**
@ -117,6 +125,7 @@ export function useDocuments(
created_by_id?: string | null;
created_by_name?: string | null;
created_at: string;
status?: DocumentStatusType | null;
}): DocumentDisplay => ({
id: item.id,
search_space_id: item.search_space_id,
@ -125,6 +134,7 @@ export function useDocuments(
created_by_id: item.created_by_id ?? null,
created_by_name: item.created_by_name ?? null,
created_at: item.created_at,
status: item.status ?? { state: "ready" },
}),
[]
);
@ -136,6 +146,7 @@ export function useDocuments(
created_by_name: doc.created_by_id
? userCacheRef.current.get(doc.created_by_id) ?? null
: null,
status: doc.status ?? { state: "ready" },
}),
[]
);
@ -221,7 +232,7 @@ export function useDocuments(
const handle = await client.syncShape({
table: "documents",
where: `search_space_id = ${spaceId}`,
columns: ["id", "document_type", "search_space_id", "title", "created_by_id", "created_at"],
columns: ["id", "document_type", "search_space_id", "title", "created_by_id", "created_at", "status"],
primaryKey: ["id"],
});
@ -259,7 +270,7 @@ export function useDocuments(
return;
}
let query = `SELECT id, document_type, search_space_id, title, created_by_id, created_at
let query = `SELECT id, document_type, search_space_id, title, created_by_id, created_at, status
FROM documents
WHERE search_space_id = $1`;

View file

@ -72,7 +72,9 @@ const pendingSyncs = new Map<string, Promise<SyncHandle>>();
// - fixed getSyncCutoffDate to use stable midnight UTC timestamps
// v6: real-time documents table - added title and created_by_id columns for live document display
// v7: removed use-documents-electric.ts - consolidated to single documents sync to prevent conflicts
const SYNC_VERSION = 7;
// v8: added status column for real-time document processing status (ready/processing/failed)
// v9: added pending state for accurate document queue visibility
const SYNC_VERSION = 11;
// Database name prefix for identifying SurfSense databases
const DB_PREFIX = "surfsense-";
@ -245,12 +247,14 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
document_type TEXT NOT NULL,
title TEXT NOT NULL DEFAULT '',
created_by_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
status JSONB DEFAULT '{"state": "ready"}'::jsonb
);
CREATE INDEX IF NOT EXISTS idx_documents_search_space_id ON documents(search_space_id);
CREATE INDEX IF NOT EXISTS idx_documents_type ON documents(document_type);
CREATE INDEX IF NOT EXISTS idx_documents_search_space_type ON documents(search_space_id, document_type);
CREATE INDEX IF NOT EXISTS idx_documents_status ON documents((status->>'state'));
`);
await db.exec(`