diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index 226d511cc..52989d27f 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -18,6 +18,8 @@ from app.db import ( ) from app.schemas import ( DocumentRead, + DocumentStatusBatchResponse, + DocumentStatusItemRead, DocumentsCreate, DocumentStatusSchema, DocumentTitleRead, @@ -148,6 +150,7 @@ async def create_documents_file_upload( tuple[Document, str, str] ] = [] # (document, temp_path, filename) skipped_duplicates = 0 + duplicate_document_ids: list[int] = [] # ===== PHASE 1: Create pending documents for all files ===== # This makes ALL documents visible in the UI immediately with pending status @@ -182,6 +185,7 @@ async def create_documents_file_upload( # True duplicate — content already indexed, skip os.unlink(temp_path) skipped_duplicates += 1 + duplicate_document_ids.append(existing.id) continue # Existing document is stuck (failed/pending/processing) @@ -255,6 +259,7 @@ async def create_documents_file_upload( return { "message": "Files uploaded for processing", "document_ids": [doc.id for doc in created_documents], + "duplicate_document_ids": duplicate_document_ids, "total_files": len(files), "pending_files": len(files_to_process), "skipped_duplicates": skipped_duplicates, @@ -678,6 +683,74 @@ async def search_document_titles( ) from e +@router.get("/documents/status", response_model=DocumentStatusBatchResponse) +async def get_documents_status( + search_space_id: int, + document_ids: str, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Batch status endpoint for documents in a search space. + + Returns lightweight status info for the provided document IDs, intended for + polling async ETL progress in chat upload flows. + """ + try: + await check_permission( + session, + user, + search_space_id, + Permission.DOCUMENTS_READ.value, + "You don't have permission to read documents in this search space", + ) + + # Parse comma-separated IDs (e.g. "1,2,3") + parsed_ids = [] + for raw_id in document_ids.split(","): + value = raw_id.strip() + if not value: + continue + try: + parsed_ids.append(int(value)) + except ValueError: + raise HTTPException( + status_code=400, + detail=f"Invalid document id: {value}", + ) from None + + if not parsed_ids: + return DocumentStatusBatchResponse(items=[]) + + result = await session.execute( + select(Document).filter( + Document.search_space_id == search_space_id, + Document.id.in_(parsed_ids), + ) + ) + docs = result.scalars().all() + + items = [ + DocumentStatusItemRead( + id=doc.id, + title=doc.title, + document_type=doc.document_type, + status=DocumentStatusSchema( + state=(doc.status or {}).get("state", "ready"), + reason=(doc.status or {}).get("reason"), + ), + ) + for doc in docs + ] + return DocumentStatusBatchResponse(items=items) + except HTTPException: + raise + except Exception as e: + raise HTTPException( + status_code=500, detail=f"Failed to fetch document status: {e!s}" + ) from e + + @router.get("/documents/type-counts") async def get_document_type_counts( search_space_id: int | None = None, diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index 6d5268a8d..bfb5e109d 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -8,16 +8,11 @@ These endpoints support the ThreadHistoryAdapter pattern from assistant-ui: - PUT /threads/{thread_id} - Update thread (rename, archive) - DELETE /threads/{thread_id} - Delete thread - POST /threads/{thread_id}/messages - Append message -- POST /attachments/process - Process attachments for chat context """ -import contextlib -import os -import tempfile -import uuid from datetime import UTC, datetime -from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile +from fastapi import APIRouter, Depends, HTTPException, Request from fastapi.responses import StreamingResponse from sqlalchemy import func, or_ from sqlalchemy.exc import IntegrityError, OperationalError @@ -1047,7 +1042,6 @@ async def handle_new_chat( session=session, user_id=str(user.id), llm_config_id=llm_config_id, - attachments=request.attachments, mentioned_document_ids=request.mentioned_document_ids, mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids, needs_history_bootstrap=thread.needs_history_bootstrap, @@ -1278,7 +1272,6 @@ async def regenerate_response( session=session, user_id=str(user.id), llm_config_id=llm_config_id, - attachments=request.attachments, mentioned_document_ids=request.mentioned_document_ids, mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids, checkpoint_id=target_checkpoint_id, @@ -1334,184 +1327,3 @@ async def regenerate_response( detail=f"An unexpected error occurred during regeneration: {e!s}", ) from None - -# ============================================================================= -# Attachment Processing Endpoint -# ============================================================================= - - -@router.post("/attachments/process") -async def process_attachment( - file: UploadFile = File(...), - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Process an attachment file and extract its content as markdown. - - This endpoint uses the configured ETL service to parse files and return - the extracted content that can be used as context in chat messages. - - Supported file types depend on the configured ETL_SERVICE: - - Markdown/Text files: .md, .markdown, .txt (always supported) - - Audio files: .mp3, .mp4, .mpeg, .mpga, .m4a, .wav, .webm (if STT configured) - - Documents: .pdf, .docx, .doc, .pptx, .xlsx (depends on ETL service) - - Returns: - JSON with attachment id, name, type, and extracted content - """ - from app.config import config as app_config - - if not file.filename: - raise HTTPException(status_code=400, detail="No filename provided") - - filename = file.filename - attachment_id = str(uuid.uuid4()) - - try: - # Save file to a temporary location - file_ext = os.path.splitext(filename)[1].lower() - with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as temp_file: - temp_path = temp_file.name - content = await file.read() - temp_file.write(content) - - extracted_content = "" - - # Process based on file type - if file_ext in (".md", ".markdown", ".txt"): - # For text/markdown files, read content directly - with open(temp_path, encoding="utf-8") as f: - extracted_content = f.read() - - elif file_ext in (".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm"): - # Audio files - transcribe if STT service is configured - if not app_config.STT_SERVICE: - raise HTTPException( - status_code=422, - detail="Audio transcription is not configured. Please set STT_SERVICE.", - ) - - stt_service_type = ( - "local" if app_config.STT_SERVICE.startswith("local/") else "external" - ) - - if stt_service_type == "local": - from app.services.stt_service import stt_service - - result = stt_service.transcribe_file(temp_path) - extracted_content = result.get("text", "") - else: - from litellm import atranscription - - with open(temp_path, "rb") as audio_file: - transcription_kwargs = { - "model": app_config.STT_SERVICE, - "file": audio_file, - "api_key": app_config.STT_SERVICE_API_KEY, - } - if app_config.STT_SERVICE_API_BASE: - transcription_kwargs["api_base"] = ( - app_config.STT_SERVICE_API_BASE - ) - - transcription_response = await atranscription( - **transcription_kwargs - ) - extracted_content = transcription_response.get("text", "") - - if extracted_content: - extracted_content = ( - f"# Transcription of {filename}\n\n{extracted_content}" - ) - - else: - # Document files - use configured ETL service - if app_config.ETL_SERVICE == "UNSTRUCTURED": - from langchain_unstructured import UnstructuredLoader - - from app.utils.document_converters import convert_document_to_markdown - - loader = UnstructuredLoader( - temp_path, - mode="elements", - post_processors=[], - languages=["eng"], - include_orig_elements=False, - include_metadata=False, - strategy="auto", - ) - docs = await loader.aload() - extracted_content = await convert_document_to_markdown(docs) - - elif app_config.ETL_SERVICE == "LLAMACLOUD": - from llama_cloud_services import LlamaParse - from llama_cloud_services.parse.utils import ResultType - - parser = LlamaParse( - api_key=app_config.LLAMA_CLOUD_API_KEY, - num_workers=1, - verbose=False, - language="en", - result_type=ResultType.MD, - ) - result = await parser.aparse(temp_path) - markdown_documents = await result.aget_markdown_documents( - split_by_page=False - ) - - if markdown_documents: - extracted_content = "\n\n".join( - doc.text for doc in markdown_documents - ) - - elif app_config.ETL_SERVICE == "DOCLING": - from app.services.docling_service import create_docling_service - - docling_service = create_docling_service() - result = await docling_service.process_document(temp_path, filename) - extracted_content = result.get("content", "") - - else: - raise HTTPException( - status_code=422, - detail=f"ETL service not configured or unsupported file type: {file_ext}", - ) - - # Clean up temp file - with contextlib.suppress(Exception): - os.unlink(temp_path) - - if not extracted_content: - raise HTTPException( - status_code=422, - detail=f"Could not extract content from file: {filename}", - ) - - # Determine attachment type (must be one of: "image", "document", "file") - # assistant-ui only supports these three types - if file_ext in (".png", ".jpg", ".jpeg", ".gif", ".webp"): - attachment_type = "image" - else: - # All other files (including audio, documents, text) are treated as "document" - attachment_type = "document" - - return { - "id": attachment_id, - "name": filename, - "type": attachment_type, - "content": extracted_content, - "contentLength": len(extracted_content), - } - - except HTTPException: - raise - except Exception as e: - # Clean up temp file on error - with contextlib.suppress(Exception): - os.unlink(temp_path) - - raise HTTPException( - status_code=500, - detail=f"Failed to process attachment: {e!s}", - ) from e diff --git a/surfsense_backend/app/schemas/__init__.py b/surfsense_backend/app/schemas/__init__.py index c6d66149f..7c363e41f 100644 --- a/surfsense_backend/app/schemas/__init__.py +++ b/surfsense_backend/app/schemas/__init__.py @@ -10,6 +10,8 @@ from .chunks import ChunkBase, ChunkCreate, ChunkRead, ChunkUpdate from .documents import ( DocumentBase, DocumentRead, + DocumentStatusBatchResponse, + DocumentStatusItemRead, DocumentsCreate, DocumentStatusSchema, DocumentTitleRead, @@ -105,6 +107,8 @@ __all__ = [ # Document schemas "DocumentBase", "DocumentRead", + "DocumentStatusBatchResponse", + "DocumentStatusItemRead", "DocumentStatusSchema", "DocumentTitleRead", "DocumentTitleSearchResponse", diff --git a/surfsense_backend/app/schemas/documents.py b/surfsense_backend/app/schemas/documents.py index 4cedc7d93..2ca341921 100644 --- a/surfsense_backend/app/schemas/documents.py +++ b/surfsense_backend/app/schemas/documents.py @@ -99,3 +99,20 @@ class DocumentTitleSearchResponse(BaseModel): items: list[DocumentTitleRead] has_more: bool + + +class DocumentStatusItemRead(BaseModel): + """Lightweight document status payload for batch status polling.""" + + id: int + title: str + document_type: DocumentType + status: DocumentStatusSchema + + model_config = ConfigDict(from_attributes=True) + + +class DocumentStatusBatchResponse(BaseModel): + """Batch status response for a set of document IDs.""" + + items: list[DocumentStatusItemRead] diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py index 61af0d92c..aa95e49e6 100644 --- a/surfsense_backend/app/schemas/new_chat.py +++ b/surfsense_backend/app/schemas/new_chat.py @@ -159,15 +159,6 @@ class ChatMessage(BaseModel): content: str -class ChatAttachment(BaseModel): - """An attachment with its extracted content for chat context.""" - - id: str # Unique attachment ID - name: str # Original filename - type: str # Attachment type: document, image, audio - content: str # Extracted markdown content from the file - - class NewChatRequest(BaseModel): """Request schema for the deep agent chat endpoint.""" @@ -175,9 +166,6 @@ class NewChatRequest(BaseModel): user_query: str search_space_id: int messages: list[ChatMessage] | None = None # Optional chat history from frontend - attachments: list[ChatAttachment] | None = ( - None # Optional attachments with extracted content - ) mentioned_document_ids: list[int] | None = ( None # Optional document IDs mentioned with @ in the chat ) @@ -201,7 +189,6 @@ class RegenerateRequest(BaseModel): user_query: str | None = ( None # New user query (for edit). None = reload with same query ) - attachments: list[ChatAttachment] | None = None mentioned_document_ids: list[int] | None = None mentioned_surfsense_doc_ids: list[int] | None = None diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index af5a2b0df..a8560ecba 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -28,7 +28,6 @@ from app.agents.new_chat.llm_config import ( ) from app.db import ChatVisibility, Document, SurfsenseDocsDocument from app.prompts import TITLE_GENERATION_PROMPT_TEMPLATE -from app.schemas.new_chat import ChatAttachment from app.services.chat_session_state_service import ( clear_ai_responding, set_ai_responding, @@ -37,24 +36,6 @@ from app.services.connector_service import ConnectorService from app.services.new_streaming_service import VercelStreamingService from app.utils.content_utils import bootstrap_history_from_db - -def format_attachments_as_context(attachments: list[ChatAttachment]) -> str: - """Format attachments as context for the agent.""" - if not attachments: - return "" - - context_parts = [""] - for i, attachment in enumerate(attachments, 1): - context_parts.append( - f"" - ) - context_parts.append(f"") - context_parts.append("") - context_parts.append("") - - return "\n".join(context_parts) - - def format_mentioned_documents_as_context(documents: list[Document]) -> str: """ Format mentioned documents as context for the agent. @@ -203,7 +184,6 @@ async def stream_new_chat( session: AsyncSession, user_id: str | None = None, llm_config_id: int = -1, - attachments: list[ChatAttachment] | None = None, mentioned_document_ids: list[int] | None = None, mentioned_surfsense_doc_ids: list[int] | None = None, checkpoint_id: str | None = None, @@ -224,7 +204,6 @@ async def stream_new_chat( session: The database session user_id: The current user's UUID string (for memory tools and session state) llm_config_id: The LLM configuration ID (default: -1 for first global config) - attachments: Optional attachments with extracted content needs_history_bootstrap: If True, load message history from DB (for cloned chats) mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat mentioned_surfsense_doc_ids: Optional list of SurfSense doc IDs mentioned with @ in the chat @@ -360,13 +339,10 @@ async def stream_new_chat( ) mentioned_surfsense_docs = list(result.scalars().all()) - # Format the user query with context (attachments + mentioned documents + surfsense docs) + # Format the user query with context (mentioned documents + SurfSense docs) final_query = user_query context_parts = [] - if attachments: - context_parts.append(format_attachments_as_context(attachments)) - if mentioned_documents: context_parts.append( format_mentioned_documents_as_context(mentioned_documents) @@ -459,39 +435,20 @@ async def stream_new_chat( last_active_step_id = analyze_step_id # Determine step title and action verb based on context - if attachments and (mentioned_documents or mentioned_surfsense_docs): - last_active_step_title = "Analyzing your content" - action_verb = "Reading" - elif attachments: - last_active_step_title = "Reading your content" - action_verb = "Reading" - elif mentioned_documents or mentioned_surfsense_docs: + if mentioned_documents or mentioned_surfsense_docs: last_active_step_title = "Analyzing referenced content" action_verb = "Analyzing" else: last_active_step_title = "Understanding your request" action_verb = "Processing" - # Build the message with inline context about attachments/documents + # Build the message with inline context about referenced documents processing_parts = [] # Add the user query query_text = user_query[:80] + ("..." if len(user_query) > 80 else "") processing_parts.append(query_text) - # Add file attachment names inline - if attachments: - attachment_names = [] - for attachment in attachments: - name = attachment.name - if len(name) > 30: - name = name[:27] + "..." - attachment_names.append(name) - if len(attachment_names) == 1: - processing_parts.append(f"[{attachment_names[0]}]") - else: - processing_parts.append(f"[{len(attachment_names)} files]") - # Add mentioned document names inline if mentioned_documents: doc_names = [] diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 22085e064..bc5aca91e 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -44,7 +44,6 @@ import { useMessagesElectric } from "@/hooks/use-messages-electric"; import { documentsApiService } from "@/lib/apis/documents-api.service"; // import { WriteTodosToolUI } from "@/components/tool-ui/write-todos"; import { getBearerToken } from "@/lib/auth-utils"; -import { createAttachmentAdapter, extractAttachmentContent } from "@/lib/chat/attachment-adapter"; import { convertToThreadMessage } from "@/lib/chat/message-utils"; import { isPodcastGenerating, @@ -216,9 +215,6 @@ export default function NewChatPage() { useMessagesElectric(threadId, handleElectricMessagesUpdate); - // Create the attachment adapter for file processing - const attachmentAdapter = useMemo(() => createAttachmentAdapter(), []); - // Extract search_space_id from URL params const searchSpaceId = useMemo(() => { const id = params.search_space_id; @@ -409,16 +405,7 @@ export default function NewChatPage() { } } - // Extract attachments from message - // AppendMessage.attachments contains the processed attachment objects (from adapter.send()) - const messageAttachments: Array> = []; - if (message.attachments && message.attachments.length > 0) { - for (const att of message.attachments) { - messageAttachments.push(att as unknown as Record); - } - } - - if (!userQuery.trim() && messageAttachments.length === 0) return; + if (!userQuery.trim()) return; // Check if podcast is already generating if (isPodcastGenerating() && looksLikePodcastRequest(userQuery)) { @@ -485,14 +472,13 @@ export default function NewChatPage() { role: "user", content: message.content, createdAt: new Date(), - attachments: message.attachments || [], metadata: authorMetadata, }; setMessages((prev) => [...prev, userMessage]); // Track message sent trackChatMessageSent(searchSpaceId, currentThreadId, { - hasAttachments: messageAttachments.length > 0, + hasAttachments: false, hasMentionedDocuments: mentionedDocumentIds.surfsense_doc_ids.length > 0 || mentionedDocumentIds.document_ids.length > 0, @@ -512,7 +498,7 @@ export default function NewChatPage() { })); } - // Persist user message with mentioned documents and attachments (don't await, fire and forget) + // Persist user message with mentioned documents (don't await, fire and forget) const persistContent: unknown[] = [...message.content]; // Add mentioned documents for persistence @@ -527,23 +513,6 @@ export default function NewChatPage() { }); } - // Add attachments for persistence (so they survive page reload) - if (message.attachments && message.attachments.length > 0) { - persistContent.push({ - type: "attachments", - items: message.attachments.map((att) => ({ - id: att.id, - name: att.name, - type: att.type, - contentType: (att as { contentType?: string }).contentType, - // Include imageDataUrl for images so they can be displayed after reload - imageDataUrl: (att as { imageDataUrl?: string }).imageDataUrl, - // Include extractedContent for context (already extracted, no re-processing needed) - extractedContent: (att as { extractedContent?: string }).extractedContent, - })), - }); - } - appendMessage(currentThreadId, { role: "user", content: persistContent, @@ -688,9 +657,6 @@ export default function NewChatPage() { }) .filter((m) => m.content.length > 0); - // Extract attachment content to send with the request - const attachments = extractAttachmentContent(messageAttachments); - // Get mentioned document IDs for context (separate fields for backend) const hasDocumentIds = mentionedDocumentIds.document_ids.length > 0; const hasSurfsenseDocIds = mentionedDocumentIds.surfsense_doc_ids.length > 0; @@ -715,7 +681,6 @@ export default function NewChatPage() { user_query: userQuery.trim(), search_space_id: searchSpaceId, messages: messageHistory, - attachments: attachments.length > 0 ? attachments : undefined, mentioned_document_ids: hasDocumentIds ? mentionedDocumentIds.document_ids : undefined, mentioned_surfsense_doc_ids: hasSurfsenseDocIds ? mentionedDocumentIds.surfsense_doc_ids @@ -1010,7 +975,6 @@ export default function NewChatPage() { // Extract the original user query BEFORE removing messages (for reload mode) let userQueryToDisplay = newUserQuery; let originalUserMessageContent: ThreadMessageLike["content"] | null = null; - let originalUserMessageAttachments: ThreadMessageLike["attachments"] | undefined; let originalUserMessageMetadata: ThreadMessageLike["metadata"] | undefined; if (!newUserQuery) { @@ -1018,7 +982,6 @@ export default function NewChatPage() { const lastUserMessage = [...messages].reverse().find((m) => m.role === "user"); if (lastUserMessage) { originalUserMessageContent = lastUserMessage.content; - originalUserMessageAttachments = lastUserMessage.attachments; originalUserMessageMetadata = lastUserMessage.metadata; // Extract text for the API request for (const part of lastUserMessage.content) { @@ -1144,7 +1107,6 @@ export default function NewChatPage() { ? [{ type: "text", text: newUserQuery }] : originalUserMessageContent || [{ type: "text", text: userQueryToDisplay || "" }], createdAt: new Date(), - attachments: newUserQuery ? undefined : originalUserMessageAttachments, metadata: newUserQuery ? undefined : originalUserMessageMetadata, }; setMessages((prev) => [...prev, userMessage]); @@ -1391,7 +1353,7 @@ export default function NewChatPage() { await handleRegenerate(null); }, [handleRegenerate]); - // Create external store runtime with attachment support + // Create external store runtime const runtime = useExternalStoreRuntime({ messages, isRunning, @@ -1400,9 +1362,6 @@ export default function NewChatPage() { onReload, convertMessage, onCancel: cancelRun, - adapters: { - attachments: attachmentAdapter, - }, }); // Show loading state only when loading an existing thread diff --git a/surfsense_web/components/assistant-ui/attachment.tsx b/surfsense_web/components/assistant-ui/attachment.tsx deleted file mode 100644 index 3a64d3a6c..000000000 --- a/surfsense_web/components/assistant-ui/attachment.tsx +++ /dev/null @@ -1,377 +0,0 @@ -"use client"; - -import { - AttachmentPrimitive, - ComposerPrimitive, - MessagePrimitive, - useAssistantApi, - useAssistantState, -} from "@assistant-ui/react"; -import { FileText, Paperclip, PlusIcon, Upload, XIcon } from "lucide-react"; -import Image from "next/image"; -import { type FC, type PropsWithChildren, useEffect, useRef, useState } from "react"; -import { useShallow } from "zustand/shallow"; -import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; -import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; -import { Dialog, DialogContent, DialogTitle, DialogTrigger } from "@/components/ui/dialog"; -import { - DropdownMenu, - DropdownMenuContent, - DropdownMenuItem, - DropdownMenuTrigger, -} from "@/components/ui/dropdown-menu"; -import { Spinner } from "@/components/ui/spinner"; -import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; -import { cn } from "@/lib/utils"; -import { useDocumentUploadDialog } from "./document-upload-popup"; - -const useFileSrc = (file: File | undefined) => { - const [src, setSrc] = useState(undefined); - - useEffect(() => { - if (!file) { - setSrc(undefined); - return; - } - - const objectUrl = URL.createObjectURL(file); - setSrc(objectUrl); - - return () => { - URL.revokeObjectURL(objectUrl); - }; - }, [file]); - - return src; -}; - -const useAttachmentSrc = () => { - const { file, src } = useAssistantState( - useShallow(({ attachment }): { file?: File; src?: string } => { - if (!attachment || attachment.type !== "image") return {}; - - // First priority: use File object if available (for new uploads) - if (attachment.file) return { file: attachment.file }; - - // Second priority: use stored imageDataUrl (for persisted messages) - // This is stored in our custom ChatAttachment interface - const customAttachment = attachment as { imageDataUrl?: string }; - if (customAttachment.imageDataUrl) { - return { src: customAttachment.imageDataUrl }; - } - - // Third priority: try to extract from content array (standard assistant-ui format) - if (Array.isArray(attachment.content)) { - const contentSrc = attachment.content.filter((c) => c.type === "image")[0]?.image; - if (contentSrc) return { src: contentSrc }; - } - - return {}; - }) - ); - - return useFileSrc(file) ?? src; -}; - -type AttachmentPreviewProps = { - src: string; -}; - -const AttachmentPreview: FC = ({ src }) => { - const [isLoaded, setIsLoaded] = useState(false); - return ( - Image Preview setIsLoaded(true)} - priority={false} - /> - ); -}; - -const AttachmentPreviewDialog: FC = ({ children }) => { - const src = useAttachmentSrc(); - - if (!src) return children; - - return ( - - - {children} - - - Image Attachment Preview -
- -
-
-
- ); -}; - -const AttachmentThumb: FC = () => { - const isImage = useAssistantState(({ attachment }) => attachment?.type === "image"); - // Check if actively processing (running AND progress < 100) - // When progress is 100, processing is done but waiting for send() - const isProcessing = useAssistantState(({ attachment }) => { - const status = attachment?.status; - if (status?.type !== "running") return false; - // If progress is defined and equals 100, processing is complete - const progress = (status as { type: "running"; progress?: number }).progress; - return progress === undefined || progress < 100; - }); - const src = useAttachmentSrc(); - - // Show loading spinner only when actively processing (not when done and waiting for send) - if (isProcessing) { - return ( -
- -
- ); - } - - return ( - - - - - - - ); -}; - -const AttachmentUI: FC = () => { - const api = useAssistantApi(); - const isComposer = api.attachment.source === "composer"; - - const isImage = useAssistantState(({ attachment }) => attachment?.type === "image"); - // Check if actively processing (running AND progress < 100) - // When progress is 100, processing is done but waiting for send() - const isProcessing = useAssistantState(({ attachment }) => { - const status = attachment?.status; - if (status?.type !== "running") return false; - const progress = (status as { type: "running"; progress?: number }).progress; - return progress === undefined || progress < 100; - }); - const typeLabel = useAssistantState(({ attachment }) => { - const type = attachment?.type; - switch (type) { - case "image": - return "Image"; - case "document": - return "Document"; - case "file": - return "File"; - default: - return "File"; // Default fallback for unknown types - } - }); - - return ( - - #attachment-tile]:size-24" - )} - > - - - - - - {isComposer && !isProcessing && } - - - {isProcessing ? ( - - - Processing... - - ) : ( - - )} - - - ); -}; - -const AttachmentRemove: FC = () => { - return ( - - - - - - ); -}; - -/** - * Image attachment with preview thumbnail (click to expand) - */ -const MessageImageAttachment: FC = () => { - const attachmentName = useAssistantState(({ attachment }) => attachment?.name || "Image"); - const src = useAttachmentSrc(); - - if (!src) return null; - - return ( - -
- {attachmentName} - {/* Hover overlay with filename */} -
-
- - {attachmentName} - -
-
-
-
- ); -}; - -/** - * Document/file attachment as chip (similar to mentioned documents) - */ -const MessageDocumentAttachment: FC = () => { - const attachmentName = useAssistantState(({ attachment }) => attachment?.name || "Attachment"); - - return ( - - - - {attachmentName} - - - ); -}; - -/** - * Attachment component for user messages - * Shows image preview for images, chip for documents - */ -const MessageAttachmentChip: FC = () => { - const isImage = useAssistantState(({ attachment }) => attachment?.type === "image"); - - if (isImage) { - return ; - } - - return ; -}; - -export const UserMessageAttachments: FC = () => { - return ; -}; - -export const ComposerAttachments: FC = () => { - return ( -
- -
- ); -}; - -export const ComposerAddAttachment: FC = () => { - const chatAttachmentInputRef = useRef(null); - const { openDialog } = useDocumentUploadDialog(); - - const handleFileUpload = () => { - openDialog(); - }; - - const handleChatAttachment = () => { - chatAttachmentInputRef.current?.click(); - }; - - // Prevent event bubbling when file input is clicked - const handleFileInputClick = (e: React.MouseEvent) => { - e.stopPropagation(); - }; - - return ( - <> - - - - - - - - - - Add attachment to this chat - - - - Upload documents to Search Space - - - - - - - - ); -}; diff --git a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx index 437e5a7a5..b8194c91f 100644 --- a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx +++ b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx @@ -27,6 +27,12 @@ export interface InlineMentionEditorRef { getText: () => string; getMentionedDocuments: () => MentionedDocument[]; insertDocumentChip: (doc: Pick) => void; + setDocumentChipStatus: ( + docId: number, + docType: string | undefined, + statusLabel: string | null, + statusKind?: "pending" | "processing" | "ready" | "failed" + ) => void; } interface InlineMentionEditorProps { @@ -46,6 +52,7 @@ interface InlineMentionEditorProps { const CHIP_DATA_ATTR = "data-mention-chip"; const CHIP_ID_ATTR = "data-mention-id"; const CHIP_DOCTYPE_ATTR = "data-mention-doctype"; +const CHIP_STATUS_ATTR = "data-mention-status"; /** * Type guard to check if a node is a chip element @@ -182,6 +189,11 @@ export const InlineMentionEditor = forwardRef { + if (!editorRef.current) return; + + const chips = editorRef.current.querySelectorAll( + `span[${CHIP_DATA_ATTR}="true"]` + ); + for (const chip of chips) { + const chipId = getChipId(chip); + const chipType = getChipDocType(chip); + if (chipId !== docId) continue; + if ((docType ?? "UNKNOWN") !== chipType) continue; + + const statusEl = chip.querySelector(`span[${CHIP_STATUS_ATTR}="true"]`); + if (!statusEl) continue; + + if (!statusLabel) { + statusEl.textContent = ""; + statusEl.className = "text-[10px] font-semibold opacity-80 hidden"; + continue; + } + + const statusClass = + statusKind === "failed" + ? "text-destructive" + : statusKind === "processing" + ? "text-amber-700" + : statusKind === "ready" + ? "text-emerald-700" + : "text-amber-700"; + statusEl.textContent = statusLabel; + statusEl.className = `text-[10px] font-semibold opacity-80 ${statusClass}`; + } + }, + [] + ); + // Expose methods via ref useImperativeHandle(ref, () => ({ focus: () => editorRef.current?.focus(), @@ -339,6 +394,7 @@ export const InlineMentionEditor = forwardRef; header?: React.ReactNode; @@ -230,8 +246,11 @@ const Composer: FC = () => { const [mentionedDocuments, setMentionedDocuments] = useAtom(mentionedDocumentsAtom); const [showDocumentPopover, setShowDocumentPopover] = useState(false); const [mentionQuery, setMentionQuery] = useState(""); + const [uploadedMentionDocs, setUploadedMentionDocs] = useState>({}); + const [isUploadingDocs, setIsUploadingDocs] = useState(false); const editorRef = useRef(null); const editorContainerRef = useRef(null); + const uploadInputRef = useRef(null); const documentPickerRef = useRef(null); const { search_space_id, chat_id } = useParams(); const setMentionedDocumentIds = useSetAtom(mentionedDocumentIdsAtom); @@ -357,9 +376,28 @@ const Composer: FC = () => { [showDocumentPopover] ); + const uploadedMentionedDocs = useMemo( + () => mentionedDocuments.filter((doc) => uploadedMentionDocs[doc.id]), + [mentionedDocuments, uploadedMentionDocs] + ); + + const blockingUploadedMentions = useMemo( + () => + uploadedMentionedDocs.filter((doc) => { + const state = uploadedMentionDocs[doc.id]?.state; + return state === "pending" || state === "processing" || state === "failed"; + }), + [uploadedMentionedDocs, uploadedMentionDocs] + ); + // Submit message (blocked during streaming, document picker open, or AI responding to another user) const handleSubmit = useCallback(() => { - if (isThreadRunning || isBlockedByOtherUser) { + if ( + isThreadRunning || + isBlockedByOtherUser || + isUploadingDocs || + blockingUploadedMentions.length > 0 + ) { return; } if (!showDocumentPopover) { @@ -375,6 +413,8 @@ const Composer: FC = () => { showDocumentPopover, isThreadRunning, isBlockedByOtherUser, + isUploadingDocs, + blockingUploadedMentions.length, composerRuntime, setMentionedDocuments, setMentionedDocumentIds, @@ -395,6 +435,11 @@ const Composer: FC = () => { }); return updated; }); + setUploadedMentionDocs((prev) => { + if (!(docId in prev)) return prev; + const { [docId]: _removed, ...rest } = prev; + return rest; + }); }, [setMentionedDocuments, setMentionedDocumentIds] ); @@ -433,6 +478,139 @@ const Composer: FC = () => { [mentionedDocuments, setMentionedDocuments, setMentionedDocumentIds] ); + const refreshUploadedDocStatuses = useCallback( + async (documentIds: number[]) => { + if (!search_space_id || documentIds.length === 0) return; + const statusResponse = await documentsApiService.getDocumentsStatus({ + queryParams: { + search_space_id: Number(search_space_id), + document_ids: documentIds, + }, + }); + + setUploadedMentionDocs((prev) => { + const next = { ...prev }; + for (const item of statusResponse.items) { + next[item.id] = { + id: item.id, + title: item.title, + document_type: item.document_type, + state: item.status.state, + reason: item.status.reason, + }; + } + return next; + }); + + handleDocumentsMention( + statusResponse.items.map((item) => ({ + id: item.id, + title: item.title, + document_type: item.document_type, + })) + ); + }, + [search_space_id, handleDocumentsMention] + ); + + const handleUploadClick = useCallback(() => { + uploadInputRef.current?.click(); + }, []); + + const handleUploadInputChange = useCallback( + async (event: React.ChangeEvent) => { + const files = Array.from(event.target.files ?? []); + event.target.value = ""; + if (files.length === 0 || !search_space_id) return; + + setIsUploadingDocs(true); + try { + const uploadResponse = await documentsApiService.uploadDocument({ + files, + search_space_id: Number(search_space_id), + }); + const uploadedIds = uploadResponse.document_ids ?? []; + const duplicateIds = uploadResponse.duplicate_document_ids ?? []; + const idsToMention = Array.from(new Set([...uploadedIds, ...duplicateIds])); + if (idsToMention.length === 0) { + toast.warning("No documents were created or matched from selected files."); + return; + } + + await refreshUploadedDocStatuses(idsToMention); + if (uploadedIds.length > 0 && duplicateIds.length > 0) { + toast.success( + `Uploaded ${uploadedIds.length} file${uploadedIds.length > 1 ? "s" : ""} and matched ${duplicateIds.length} existing file${duplicateIds.length > 1 ? "s" : ""}.` + ); + } else if (uploadedIds.length > 0) { + toast.success(`Uploaded ${uploadedIds.length} file${uploadedIds.length > 1 ? "s" : ""}`); + } else { + toast.success( + `Matched ${duplicateIds.length} existing file${duplicateIds.length > 1 ? "s" : ""} and added mention${duplicateIds.length > 1 ? "s" : ""}.` + ); + } + } catch (error) { + const message = error instanceof Error ? error.message : "Upload failed"; + toast.error(`Upload failed: ${message}`); + } finally { + setIsUploadingDocs(false); + } + }, + [search_space_id, refreshUploadedDocStatuses] + ); + + // Poll status for uploaded mentioned documents until all are ready or removed. + useEffect(() => { + const trackedIds = uploadedMentionedDocs.map((doc) => doc.id); + const needsPolling = trackedIds.some((id) => { + const state = uploadedMentionDocs[id]?.state; + return state === "pending" || state === "processing"; + }); + if (!needsPolling) return; + + const interval = setInterval(() => { + refreshUploadedDocStatuses(trackedIds).catch((error) => { + console.error("[Composer] Failed to refresh uploaded mention statuses:", error); + }); + }, 2500); + + return () => clearInterval(interval); + }, [uploadedMentionedDocs, uploadedMentionDocs, refreshUploadedDocStatuses]); + + // Push upload status directly onto mention chips (instead of separate status rows). + useEffect(() => { + for (const doc of uploadedMentionedDocs) { + const state = uploadedMentionDocs[doc.id]?.state ?? "pending"; + const statusLabel = + state === "ready" + ? null + : state === "failed" + ? "failed" + : state === "processing" + ? "indexing" + : "queued"; + editorRef.current?.setDocumentChipStatus(doc.id, doc.document_type, statusLabel, state); + } + }, [uploadedMentionedDocs, uploadedMentionDocs]); + + // Prune upload status entries that are no longer mentioned in the composer. + useEffect(() => { + const activeIds = new Set(mentionedDocuments.map((doc) => doc.id)); + setUploadedMentionDocs((prev) => { + let changed = false; + const next: Record = {}; + for (const [key, value] of Object.entries(prev)) { + const id = Number(key); + if (activeIds.has(id)) { + next[id] = value; + } else { + changed = true; + } + } + return changed ? next : prev; + }); + }, [mentionedDocuments]); + return ( { currentUserId={currentUser?.id ?? null} members={members ?? []} /> - - +
{/* Inline editor with @mention support */}
{ className="min-h-[24px]" />
+ {/* Document picker popover (portal to body for proper z-index stacking) */} {showDocumentPopover && @@ -483,33 +668,43 @@ const Composer: FC = () => { />, document.body )} - - + uploadedMentionDocs[doc.id]?.state === "failed" + )} + /> +
); }; interface ComposerActionProps { isBlockedByOtherUser?: boolean; + onUploadClick: () => void; + isUploadingDocs: boolean; + blockingUploadedMentionsCount: number; + hasFailedUploadedMentions: boolean; } -const ComposerAction: FC = ({ isBlockedByOtherUser = false }) => { - // Check if any attachments are still being processed (running AND progress < 100) - // When progress is 100, processing is done but waiting for send() - const hasProcessingAttachments = useAssistantState(({ composer }) => - composer.attachments?.some((att) => { - const status = att.status; - if (status?.type !== "running") return false; - const progress = (status as { type: "running"; progress?: number }).progress; - return progress === undefined || progress < 100; - }) - ); +const ComposerAction: FC = ({ + isBlockedByOtherUser = false, + onUploadClick, + isUploadingDocs, + blockingUploadedMentionsCount, + hasFailedUploadedMentions, +}) => { + const mentionedDocuments = useAtomValue(mentionedDocumentsAtom); - // Check if composer text is empty - const isComposerEmpty = useAssistantState(({ composer }) => { + // Check if composer text is empty (chips are represented in mentionedDocuments atom) + const isComposerTextEmpty = useAssistantState(({ composer }) => { const text = composer.text?.trim() || ""; return text.length === 0; }); + const isComposerEmpty = isComposerTextEmpty && mentionedDocuments.length === 0; // Check if a model is configured const { data: userConfigs } = useAtomValue(newLLMConfigsAtom); @@ -530,25 +725,51 @@ const ComposerAction: FC = ({ isBlockedByOtherUser = false }, [preferences, globalConfigs, userConfigs]); const isSendDisabled = - hasProcessingAttachments || isComposerEmpty || !hasModelConfigured || isBlockedByOtherUser; + isComposerEmpty || + !hasModelConfigured || + isBlockedByOtherUser || + isUploadingDocs || + blockingUploadedMentionsCount > 0; return (
- + + {isUploadingDocs ? ( + + ) : ( + + )} +
- {/* Show processing indicator when attachments are being processed */} - {hasProcessingAttachments && ( + {blockingUploadedMentionsCount > 0 && (
- - Processing... + {hasFailedUploadedMentions ? ( + + ) : ( + + )} + + {hasFailedUploadedMentions + ? "Remove or retry failed uploads" + : "Waiting for uploaded files to finish indexing"} +
)} {/* Show warning when no model is configured */} - {!hasModelConfigured && !hasProcessingAttachments && ( + {!hasModelConfigured && blockingUploadedMentionsCount === 0 && (
Select a model @@ -561,11 +782,15 @@ const ComposerAction: FC = ({ isBlockedByOtherUser = false tooltip={ isBlockedByOtherUser ? "Wait for AI to finish responding" + : hasFailedUploadedMentions + ? "Remove or retry failed uploads before sending" + : blockingUploadedMentionsCount > 0 + ? "Waiting for uploaded files to finish indexing" + : isUploadingDocs + ? "Uploading documents..." : !hasModelConfigured ? "Please select a model from the header to start chatting" - : hasProcessingAttachments - ? "Wait for attachments to process" - : isComposerEmpty + : isComposerEmpty ? "Enter a message to send" : "Send message" } diff --git a/surfsense_web/components/assistant-ui/user-message.tsx b/surfsense_web/components/assistant-ui/user-message.tsx index e70806d44..7ba5b9462 100644 --- a/surfsense_web/components/assistant-ui/user-message.tsx +++ b/surfsense_web/components/assistant-ui/user-message.tsx @@ -3,7 +3,6 @@ import { useAtomValue } from "jotai"; import { FileText, PencilIcon } from "lucide-react"; import { type FC, useState } from "react"; import { messageDocumentsMapAtom } from "@/atoms/chat/mentioned-documents.atom"; -import { UserMessageAttachments } from "@/components/assistant-ui/attachment"; import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; interface AuthorMetadata { @@ -48,9 +47,6 @@ export const UserMessage: FC = () => { const mentionedDocs = messageId ? messageDocumentsMap[messageId] : undefined; const metadata = useAssistantState(({ message }) => message?.metadata); const author = metadata?.custom?.author as AuthorMetadata | undefined; - const hasAttachments = useAssistantState( - ({ message }) => message?.attachments && message.attachments.length > 0 - ); return ( { >
- {/* Display attachments and mentioned documents */} - {(hasAttachments || (mentionedDocs && mentionedDocs.length > 0)) && ( + {/* Display mentioned documents */} + {mentionedDocs && mentionedDocs.length > 0 && (
- {/* Attachments (images show as thumbnails, documents as chips) */} - {/* Mentioned documents as chips */} {mentionedDocs?.map((doc) => ( ; export type CreateDocumentResponse = z.infer; export type UploadDocumentRequest = z.infer; export type UploadDocumentResponse = z.infer; +export type GetDocumentsStatusRequest = z.infer; +export type GetDocumentsStatusResponse = z.infer; +export type DocumentStatus = z.infer; +export type DocumentStatusItem = z.infer; export type SearchDocumentsRequest = z.infer; export type SearchDocumentsResponse = z.infer; export type SearchDocumentTitlesRequest = z.infer; diff --git a/surfsense_web/lib/apis/documents-api.service.ts b/surfsense_web/lib/apis/documents-api.service.ts index 03d86a253..c21d3efd2 100644 --- a/surfsense_web/lib/apis/documents-api.service.ts +++ b/surfsense_web/lib/apis/documents-api.service.ts @@ -8,6 +8,7 @@ import { type GetDocumentByChunkRequest, type GetDocumentRequest, type GetDocumentsRequest, + type GetDocumentsStatusRequest, type GetDocumentTypeCountsRequest, type GetSurfsenseDocsRequest, getDocumentByChunkRequest, @@ -15,6 +16,8 @@ import { getDocumentRequest, getDocumentResponse, getDocumentsRequest, + getDocumentsStatusRequest, + getDocumentsStatusResponse, getDocumentsResponse, getDocumentTypeCountsRequest, getDocumentTypeCountsResponse, @@ -130,6 +133,27 @@ class DocumentsApiService { }); }; + /** + * Batch document status for async processing tracking + */ + getDocumentsStatus = async (request: GetDocumentsStatusRequest) => { + const parsedRequest = getDocumentsStatusRequest.safeParse(request); + + if (!parsedRequest.success) { + console.error("Invalid request:", parsedRequest.error); + const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + const { search_space_id, document_ids } = parsedRequest.data.queryParams; + const params = new URLSearchParams({ + search_space_id: String(search_space_id), + document_ids: document_ids.join(","), + }); + + return baseApiService.get(`/api/v1/documents/status?${params.toString()}`, getDocumentsStatusResponse); + }; + /** * Search documents by title */ diff --git a/surfsense_web/lib/chat/attachment-adapter.ts b/surfsense_web/lib/chat/attachment-adapter.ts deleted file mode 100644 index f084af411..000000000 --- a/surfsense_web/lib/chat/attachment-adapter.ts +++ /dev/null @@ -1,324 +0,0 @@ -/** - * Attachment adapter for assistant-ui - * - * This adapter handles file uploads by: - * 1. Uploading the file to the backend /attachments/process endpoint - * 2. The backend extracts markdown content using the configured ETL service - * 3. The extracted content is stored in the attachment and sent with messages - */ - -import type { AttachmentAdapter, CompleteAttachment, PendingAttachment } from "@assistant-ui/react"; -import { getBearerToken } from "@/lib/auth-utils"; - -/** - * Supported file types for the attachment adapter - * - * - Text/Markdown: .md, .markdown, .txt - * - Audio (if STT configured): .mp3, .mp4, .mpeg, .mpga, .m4a, .wav, .webm - * - Documents (depends on ETL service): .pdf, .docx, .doc, .pptx, .xlsx, .html - * - Images: .jpg, .jpeg, .png, .gif, .webp - */ -const ACCEPTED_FILE_TYPES = [ - // Text/Markdown (always supported) - ".md", - ".markdown", - ".txt", - // Audio files - ".mp3", - ".mp4", - ".mpeg", - ".mpga", - ".m4a", - ".wav", - ".webm", - // Document files (depends on ETL service) - ".pdf", - ".docx", - ".doc", - ".pptx", - ".xlsx", - ".html", - // Image files - ".jpg", - ".jpeg", - ".png", - ".gif", - ".webp", -].join(","); - -/** - * Response from the attachment processing endpoint - */ -interface ProcessAttachmentResponse { - id: string; - name: string; - type: "document" | "image" | "file"; - content: string; - contentLength: number; -} - -/** - * Extended CompleteAttachment with our custom extractedContent field - * We store the extracted text in a custom field so we can access it in onNew - * For images, we also store the data URL so it can be displayed after persistence - */ -export interface ChatAttachment extends CompleteAttachment { - extractedContent: string; - imageDataUrl?: string; // Base64 data URL for images (persists across page reloads) -} - -/** - * Process a file through the backend ETL service - */ -async function processAttachment(file: File): Promise { - const token = getBearerToken(); - if (!token) { - throw new Error("Not authenticated"); - } - - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; - - const formData = new FormData(); - formData.append("file", file); - - const response = await fetch(`${backendUrl}/api/v1/attachments/process`, { - method: "POST", - headers: { - Authorization: `Bearer ${token}`, - }, - body: formData, - }); - - if (!response.ok) { - const errorText = await response.text(); - console.error("[processAttachment] Error response:", errorText); - let errorDetail = "Unknown error"; - try { - const errorJson = JSON.parse(errorText); - // FastAPI validation errors return detail as array - if (Array.isArray(errorJson.detail)) { - errorDetail = errorJson.detail - .map((err: { msg?: string; loc?: string[] }) => { - const field = err.loc?.join(".") || "unknown"; - return `${field}: ${err.msg || "validation error"}`; - }) - .join("; "); - } else if (typeof errorJson.detail === "string") { - errorDetail = errorJson.detail; - } else { - errorDetail = JSON.stringify(errorJson); - } - } catch { - errorDetail = errorText || `HTTP ${response.status}`; - } - throw new Error(errorDetail); - } - - return response.json(); -} - -// Store processed results for the send() method -const processedAttachments = new Map(); - -// Store image data URLs for attachments (so they persist after File objects are lost) -const imageDataUrls = new Map(); - -/** - * Convert a File to a data URL (base64) for images - */ -async function fileToDataUrl(file: File): Promise { - return new Promise((resolve, reject) => { - const reader = new FileReader(); - reader.onload = () => resolve(reader.result as string); - reader.onerror = reject; - reader.readAsDataURL(file); - }); -} - -/** - * Create the attachment adapter for assistant-ui - * - * This adapter: - * 1. Accepts file upload - * 2. Processes the file through the backend ETL service - * 3. Returns the attachment with extracted markdown content - * - * The content is stored in the attachment and will be sent with the message. - */ -export function createAttachmentAdapter(): AttachmentAdapter { - return { - accept: ACCEPTED_FILE_TYPES, - - /** - * Async generator that yields pending states while processing - * and returns a pending attachment when done. - * - * IMPORTANT: The generator should return status: { type: "running", progress: 100 } - * NOT status: { type: "complete" }. The "complete" status is set by send(). - * Returning "complete" from the generator will prevent send() from being called! - * - * This pattern allows the UI to show a loading indicator - * while the file is being processed by the backend. - * The send() method is called to finalize the attachment. - */ - async *add(input: File | { file: File }): AsyncGenerator { - // Handle both direct File and { file: File } patterns - const file = input instanceof File ? input : input.file; - - if (!file) { - console.error("[AttachmentAdapter] No file found in input:", input); - throw new Error("No file provided"); - } - - // Generate a unique ID for this attachment - const id = crypto.randomUUID(); - - // Determine attachment type from file - const attachmentType = file.type.startsWith("image/") ? "image" : "document"; - - // Yield initial pending state with "running" status (0% progress) - // This triggers the loading indicator in the UI - yield { - id, - type: attachmentType, - name: file.name, - file, - status: { type: "running", reason: "uploading", progress: 0 }, - } as PendingAttachment; - - try { - // For images, convert to data URL so we can display them after persistence - if (attachmentType === "image") { - const dataUrl = await fileToDataUrl(file); - imageDataUrls.set(id, dataUrl); - } - - // Process the file through the backend ETL service - const result = await processAttachment(file); - - // Verify we have the required fields - if (!result.content) { - console.error("[AttachmentAdapter] WARNING: No content received from backend!"); - } - - // Store the processed result for send() - processedAttachments.set(id, result); - - // Create the final pending attachment - // IMPORTANT: Use "running" status with progress: 100 to indicate processing is done - // but attachment is still pending. The "complete" status will be set by send(). - // Yield the final state to ensure it gets processed by the UI - yield { - id, - type: result.type, - name: result.name, - file, - status: { type: "running", reason: "uploading", progress: 100 }, - } as PendingAttachment; - } catch (error) { - console.error("[AttachmentAdapter] Failed to process attachment:", error); - throw error; - } - }, - - /** - * Called when user sends the message. - * Converts the pending attachment to a complete attachment. - */ - async send(pendingAttachment: PendingAttachment): Promise { - const result = processedAttachments.get(pendingAttachment.id); - const imageDataUrl = imageDataUrls.get(pendingAttachment.id); - - if (result) { - // Clean up stored result - processedAttachments.delete(pendingAttachment.id); - if (imageDataUrl) { - imageDataUrls.delete(pendingAttachment.id); - } - - return { - id: result.id, - type: result.type, - name: result.name, - contentType: "text/markdown", - status: { type: "complete" }, - content: [ - { - type: "text", - text: result.content, - }, - ], - extractedContent: result.content, - imageDataUrl, // Store data URL for images so they can be displayed after persistence - }; - } - - // Fallback if no processed result found - console.warn( - "[AttachmentAdapter] send() - No processed result found for attachment:", - pendingAttachment.id - ); - return { - id: pendingAttachment.id, - type: pendingAttachment.type, - name: pendingAttachment.name, - contentType: "text/plain", - status: { type: "complete" }, - content: [], - extractedContent: "", - imageDataUrl, // Still include data URL if available - }; - }, - - async remove() { - // No server-side cleanup needed since we don't persist attachments - }, - }; -} - -/** - * Extract attachment content for chat request - * - * This function extracts the content from attachments to be sent with the chat request. - * Only attachments that have been fully processed (have content) will be included. - */ -export function extractAttachmentContent( - attachments: Array -): Array<{ id: string; name: string; type: string; content: string }> { - return attachments - .filter((att): att is ChatAttachment => { - if (!att || typeof att !== "object") return false; - const a = att as Record; - // Check for our custom extractedContent field first - if (typeof a.extractedContent === "string" && a.extractedContent.length > 0) { - return true; - } - // Fallback: check if content array has text content - if (Array.isArray(a.content)) { - const textContent = (a.content as Array<{ type: string; text?: string }>).find( - (c) => c.type === "text" && typeof c.text === "string" && c.text.length > 0 - ); - return Boolean(textContent); - } - return false; - }) - .map((att) => { - // Get content from extractedContent or from content array - let content = ""; - if (typeof att.extractedContent === "string") { - content = att.extractedContent; - } else if (Array.isArray(att.content)) { - const textContent = (att.content as Array<{ type: string; text?: string }>).find( - (c) => c.type === "text" - ); - content = textContent?.text || ""; - } - - return { - id: att.id, - name: att.name, - type: att.type, - content, - }; - }); -} diff --git a/surfsense_web/lib/chat/message-utils.ts b/surfsense_web/lib/chat/message-utils.ts index 868ed28eb..81538731b 100644 --- a/surfsense_web/lib/chat/message-utils.ts +++ b/surfsense_web/lib/chat/message-utils.ts @@ -1,46 +1,9 @@ import type { ThreadMessageLike } from "@assistant-ui/react"; -import { z } from "zod"; import type { MessageRecord } from "./thread-persistence"; -/** - * Zod schema for persisted attachment info - */ -const PersistedAttachmentSchema = z.object({ - id: z.string(), - name: z.string(), - type: z.string(), - contentType: z.string().optional(), - imageDataUrl: z.string().optional(), - extractedContent: z.string().optional(), -}); - -const AttachmentsPartSchema = z.object({ - type: z.literal("attachments"), - items: z.array(PersistedAttachmentSchema), -}); - -type PersistedAttachment = z.infer; - -/** - * Extract persisted attachments from message content (type-safe with Zod) - */ -function extractPersistedAttachments(content: unknown): PersistedAttachment[] { - if (!Array.isArray(content)) return []; - - for (const part of content) { - const result = AttachmentsPartSchema.safeParse(part); - if (result.success) { - return result.data.items; - } - } - - return []; -} - /** * Convert backend message to assistant-ui ThreadMessageLike format * Filters out 'thinking-steps' part as it's handled separately via messageThinkingSteps - * Restores attachments for user messages from persisted data */ export function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { let content: ThreadMessageLike["content"]; @@ -52,7 +15,7 @@ export function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { const filteredContent = msg.content.filter((part: unknown) => { if (typeof part !== "object" || part === null || !("type" in part)) return true; const partType = (part as { type: string }).type; - // Filter out thinking-steps, mentioned-documents, and attachments + // Filter out metadata parts not directly renderable by assistant-ui return ( partType !== "thinking-steps" && partType !== "mentioned-documents" && @@ -67,25 +30,6 @@ export function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { content = [{ type: "text", text: String(msg.content) }]; } - // Restore attachments for user messages - let attachments: ThreadMessageLike["attachments"]; - if (msg.role === "user") { - const persistedAttachments = extractPersistedAttachments(msg.content); - if (persistedAttachments.length > 0) { - attachments = persistedAttachments.map((att) => ({ - id: att.id, - name: att.name, - type: att.type as "document" | "image" | "file", - contentType: att.contentType || "application/octet-stream", - status: { type: "complete" as const }, - content: [], - // Custom fields for our ChatAttachment interface - imageDataUrl: att.imageDataUrl, - extractedContent: att.extractedContent, - })); - } - } - // Build metadata.custom for author display in shared chats const metadata = msg.author_id ? { @@ -103,7 +47,6 @@ export function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { role: msg.role, content, createdAt: new Date(msg.created_at), - attachments, metadata, }; }