diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index 61a8fbdd6..3c5ab55a0 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -64,18 +64,23 @@ You have access to the following tools: - The preview card will automatically be displayed in the chat. 4. display_image: Display an image in the chat with metadata. - - Use this tool when you want to show an image from a URL to the user. + - Use this tool ONLY when you have a valid public HTTP/HTTPS image URL to show. - This displays the image with an optional title, description, and source attribution. - - Common use cases: - * Showing an image from a URL mentioned in the conversation - * Displaying a diagram, chart, or illustration you're referencing - * Showing visual examples when explaining concepts - - IMPORTANT: Do NOT use this tool for user-uploaded image attachments! - * User attachments are already visible in the chat UI - the user can see them - * This tool requires a valid HTTP/HTTPS URL, not a local file path - * When a user uploads an image, just analyze it and respond - don't try to display it again + - Valid use cases: + * Showing an image from a URL the user explicitly mentioned in their message + * Displaying images found in scraped webpage content (from scrape_webpage tool) + * Showing a publicly accessible diagram or chart from a known URL + + CRITICAL - NEVER USE THIS TOOL FOR USER-UPLOADED ATTACHMENTS: + When a user uploads/attaches an image file to their message: + * The image is ALREADY VISIBLE in the chat UI as a thumbnail on their message + * You do NOT have a URL for their uploaded image - only extracted text/description + * Calling display_image will FAIL and show "Image not available" error + * Simply analyze the image content and respond with your analysis - DO NOT try to display it + * The user can already see their own uploaded image - they don't need you to show it again + - Args: - - src: The URL of the image to display (must be a valid HTTP/HTTPS image URL, not a local path) + - src: The URL of the image (MUST be a valid public HTTP/HTTPS URL that you know exists) - alt: Alternative text describing the image (for accessibility) - title: Optional title to display below the image - description: Optional description providing context about the image @@ -134,8 +139,15 @@ You have access to the following tools: - User: "Show me this image: https://example.com/image.png" - Call: `display_image(src="https://example.com/image.png", alt="User shared image")` -- User: "Can you display a diagram of a neural network?" - - Call: `display_image(src="https://example.com/neural-network.png", alt="Neural network diagram", title="Neural Network Architecture", description="A visual representation of a neural network with input, hidden, and output layers")` +- User uploads an image file and asks: "What is this image about?" + - DO NOT call display_image! The user's uploaded image is already visible in the chat. + - Simply analyze the image content (which you receive as extracted text/description) and respond. + - WRONG: `display_image(src="...", ...)` - This will fail with "Image not available" + - CORRECT: Just provide your analysis directly: "Based on the image you shared, this appears to be..." + +- User uploads a screenshot and asks: "Can you explain what's in this image?" + - DO NOT call display_image! Just analyze and respond directly. + - The user can already see their screenshot - they don't need you to display it again. - User: "Read this article and summarize it for me: https://example.com/blog/ai-trends" - Call: `scrape_webpage(url="https://example.com/blog/ai-trends")` diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 722973067..20b71dd63 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -86,9 +86,44 @@ function extractMentionedDocuments(content: unknown): MentionedDocumentInfo[] { return []; } +/** + * Zod schema for persisted attachment info + */ +const PersistedAttachmentSchema = z.object({ + id: z.string(), + name: z.string(), + type: z.string(), + imageDataUrl: z.string().optional(), + extractedContent: z.string().optional(), +}); + +const AttachmentsPartSchema = z.object({ + type: z.literal("attachments"), + items: z.array(PersistedAttachmentSchema), +}); + +type PersistedAttachment = z.infer; + +/** + * Extract persisted attachments from message content (type-safe with Zod) + */ +function extractPersistedAttachments(content: unknown): PersistedAttachment[] { + if (!Array.isArray(content)) return []; + + for (const part of content) { + const result = AttachmentsPartSchema.safeParse(part); + if (result.success) { + return result.data.items; + } + } + + return []; +} + /** * Convert backend message to assistant-ui ThreadMessageLike format * Filters out 'thinking-steps' part as it's handled separately via messageThinkingSteps + * Restores attachments for user messages from persisted data */ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { let content: ThreadMessageLike["content"]; @@ -100,8 +135,8 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { const filteredContent = msg.content.filter((part: unknown) => { if (typeof part !== "object" || part === null || !("type" in part)) return true; const partType = (part as { type: string }).type; - // Filter out thinking-steps and mentioned-documents - return partType !== "thinking-steps" && partType !== "mentioned-documents"; + // Filter out thinking-steps, mentioned-documents, and attachments + return partType !== "thinking-steps" && partType !== "mentioned-documents" && partType !== "attachments"; }); content = filteredContent.length > 0 @@ -111,11 +146,30 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { content = [{ type: "text", text: String(msg.content) }]; } + // Restore attachments for user messages + let attachments: ThreadMessageLike["attachments"]; + if (msg.role === "user") { + const persistedAttachments = extractPersistedAttachments(msg.content); + if (persistedAttachments.length > 0) { + attachments = persistedAttachments.map((att) => ({ + id: att.id, + name: att.name, + type: att.type as "document" | "image" | "file", + status: { type: "complete" as const }, + content: [], + // Custom fields for our ChatAttachment interface + imageDataUrl: att.imageDataUrl, + extractedContent: att.extractedContent, + })); + } + } + return { id: `msg-${msg.id}`, role: msg.role, content, createdAt: new Date(msg.created_at), + attachments, }; } @@ -348,21 +402,37 @@ export default function NewChatPage() { })); } - // Persist user message with mentioned documents (don't await, fire and forget) - const persistContent = - mentionedDocuments.length > 0 - ? [ - ...message.content, - { - type: "mentioned-documents", - documents: mentionedDocuments.map((doc) => ({ - id: doc.id, - title: doc.title, - document_type: doc.document_type, - })), - }, - ] - : message.content; + // Persist user message with mentioned documents and attachments (don't await, fire and forget) + const persistContent: unknown[] = [...message.content]; + + // Add mentioned documents for persistence + if (mentionedDocuments.length > 0) { + persistContent.push({ + type: "mentioned-documents", + documents: mentionedDocuments.map((doc) => ({ + id: doc.id, + title: doc.title, + document_type: doc.document_type, + })), + }); + } + + // Add attachments for persistence (so they survive page reload) + if (message.attachments && message.attachments.length > 0) { + persistContent.push({ + type: "attachments", + items: message.attachments.map((att) => ({ + id: att.id, + name: att.name, + type: att.type, + // Include imageDataUrl for images so they can be displayed after reload + imageDataUrl: (att as { imageDataUrl?: string }).imageDataUrl, + // Include extractedContent for context (already extracted, no re-processing needed) + extractedContent: (att as { extractedContent?: string }).extractedContent, + })), + }); + } + appendMessage(currentThreadId, { role: "user", content: persistContent, diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index fb96e4d7a..fa633a55a 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -571,7 +571,7 @@ const Composer: FC = () => {