refactor: enhance chat functionality with new tools and attachment handling

- Updated system prompt to clarify usage of the display_image tool, emphasizing valid URL requirements and prohibiting user-uploaded image displays. - Introduced write_todos tool for creating and updating planning lists, with detailed usage instructions and examples. - Enhanced message handling to persist attachments and mentioned documents, ensuring they survive page reloads. - Improved UI components to integrate the new write_todos tool and ensure consistent user experience across chat interactions.
2026-07-16 23:01:06 +02:00 · 2025-12-26 11:38:32 +05:30 · 2025-12-26 11:38:32 +05:30 · 1dd740bb23
commit 1dd740bb23
parent 287fc236ad
3 changed files with 112 additions and 30 deletions
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@ -64,18 +64,23 @@ You have access to the following tools:
  - The preview card will automatically be displayed in the chat.

 4. display_image: Display an image in the chat with metadata.
-  - Use this tool when you want to show an image from a URL to the user.
+  - Use this tool ONLY when you have a valid public HTTP/HTTPS image URL to show.
  - This displays the image with an optional title, description, and source attribution.
-  - Common use cases:
-    * Showing an image from a URL mentioned in the conversation
-    * Displaying a diagram, chart, or illustration you're referencing
-    * Showing visual examples when explaining concepts
-  - IMPORTANT: Do NOT use this tool for user-uploaded image attachments!
-    * User attachments are already visible in the chat UI - the user can see them
-    * This tool requires a valid HTTP/HTTPS URL, not a local file path
-    * When a user uploads an image, just analyze it and respond - don't try to display it again
+  - Valid use cases:
+    * Showing an image from a URL the user explicitly mentioned in their message
+    * Displaying images found in scraped webpage content (from scrape_webpage tool)
+    * Showing a publicly accessible diagram or chart from a known URL
+  
+  CRITICAL - NEVER USE THIS TOOL FOR USER-UPLOADED ATTACHMENTS:
+  When a user uploads/attaches an image file to their message:
+    * The image is ALREADY VISIBLE in the chat UI as a thumbnail on their message
+    * You do NOT have a URL for their uploaded image - only extracted text/description
+    * Calling display_image will FAIL and show "Image not available" error
+    * Simply analyze the image content and respond with your analysis - DO NOT try to display it
+    * The user can already see their own uploaded image - they don't need you to show it again
+  
  - Args:
-    - src: The URL of the image to display (must be a valid HTTP/HTTPS image URL, not a local path)
+    - src: The URL of the image (MUST be a valid public HTTP/HTTPS URL that you know exists)
    - alt: Alternative text describing the image (for accessibility)
    - title: Optional title to display below the image
    - description: Optional description providing context about the image
@ -134,8 +139,15 @@ You have access to the following tools:
 - User: "Show me this image: https://example.com/image.png"
  - Call: `display_image(src="https://example.com/image.png", alt="User shared image")`

- User: "Can you display a diagram of a neural network?"
-  - Call: `display_image(src="https://example.com/neural-network.png", alt="Neural network diagram", title="Neural Network Architecture", description="A visual representation of a neural network with input, hidden, and output layers")`
+- User uploads an image file and asks: "What is this image about?"
+  - DO NOT call display_image! The user's uploaded image is already visible in the chat.
+  - Simply analyze the image content (which you receive as extracted text/description) and respond.
+  - WRONG: `display_image(src="...", ...)` - This will fail with "Image not available"
+  - CORRECT: Just provide your analysis directly: "Based on the image you shared, this appears to be..."
+
+- User uploads a screenshot and asks: "Can you explain what's in this image?"
+  - DO NOT call display_image! Just analyze and respond directly.
+  - The user can already see their screenshot - they don't need you to display it again.

 - User: "Read this article and summarize it for me: https://example.com/blog/ai-trends"
  - Call: `scrape_webpage(url="https://example.com/blog/ai-trends")`
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@ -86,9 +86,44 @@ function extractMentionedDocuments(content: unknown): MentionedDocumentInfo[] {
 	return [];
 }

+/**
+ * Zod schema for persisted attachment info
+ */
+const PersistedAttachmentSchema = z.object({
+	id: z.string(),
+	name: z.string(),
+	type: z.string(),
+	imageDataUrl: z.string().optional(),
+	extractedContent: z.string().optional(),
+});
+
+const AttachmentsPartSchema = z.object({
+	type: z.literal("attachments"),
+	items: z.array(PersistedAttachmentSchema),
+});
+
+type PersistedAttachment = z.infer<typeof PersistedAttachmentSchema>;
+
+/**
+ * Extract persisted attachments from message content (type-safe with Zod)
+ */
+function extractPersistedAttachments(content: unknown): PersistedAttachment[] {
+	if (!Array.isArray(content)) return [];
+
+	for (const part of content) {
+		const result = AttachmentsPartSchema.safeParse(part);
+		if (result.success) {
+			return result.data.items;
+		}
+	}
+
+	return [];
+}
+
 /**
 * Convert backend message to assistant-ui ThreadMessageLike format
 * Filters out 'thinking-steps' part as it's handled separately via messageThinkingSteps
+ * Restores attachments for user messages from persisted data
 */
 function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
 	let content: ThreadMessageLike["content"];
@ -100,8 +135,8 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
 		const filteredContent = msg.content.filter((part: unknown) => {
 			if (typeof part !== "object" || part === null || !("type" in part)) return true;
 			const partType = (part as { type: string }).type;
-			// Filter out thinking-steps and mentioned-documents
-			return partType !== "thinking-steps" && partType !== "mentioned-documents";
+			// Filter out thinking-steps, mentioned-documents, and attachments
+			return partType !== "thinking-steps" && partType !== "mentioned-documents" && partType !== "attachments";
 		});
 		content =
 			filteredContent.length > 0
@ -111,11 +146,30 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
 		content = [{ type: "text", text: String(msg.content) }];
 	}

+	// Restore attachments for user messages
+	let attachments: ThreadMessageLike["attachments"];
+	if (msg.role === "user") {
+		const persistedAttachments = extractPersistedAttachments(msg.content);
+		if (persistedAttachments.length > 0) {
+			attachments = persistedAttachments.map((att) => ({
+				id: att.id,
+				name: att.name,
+				type: att.type as "document" | "image" | "file",
+				status: { type: "complete" as const },
+				content: [],
+				// Custom fields for our ChatAttachment interface
+				imageDataUrl: att.imageDataUrl,
+				extractedContent: att.extractedContent,
+			}));
+		}
+	}
+
 	return {
 		id: `msg-${msg.id}`,
 		role: msg.role,
 		content,
 		createdAt: new Date(msg.created_at),
+		attachments,
 	};
 }

@ -348,21 +402,37 @@ export default function NewChatPage() {
 				}));
 			}

-			// Persist user message with mentioned documents (don't await, fire and forget)
-			const persistContent =
-				mentionedDocuments.length > 0
-					? [
-							...message.content,
-							{
-								type: "mentioned-documents",
-								documents: mentionedDocuments.map((doc) => ({
-									id: doc.id,
-									title: doc.title,
-									document_type: doc.document_type,
-								})),
-							},
-						]
-					: message.content;
+			// Persist user message with mentioned documents and attachments (don't await, fire and forget)
+			const persistContent: unknown[] = [...message.content];
+
+			// Add mentioned documents for persistence
+			if (mentionedDocuments.length > 0) {
+				persistContent.push({
+					type: "mentioned-documents",
+					documents: mentionedDocuments.map((doc) => ({
+						id: doc.id,
+						title: doc.title,
+						document_type: doc.document_type,
+					})),
+				});
+			}
+
+			// Add attachments for persistence (so they survive page reload)
+			if (message.attachments && message.attachments.length > 0) {
+				persistContent.push({
+					type: "attachments",
+					items: message.attachments.map((att) => ({
+						id: att.id,
+						name: att.name,
+						type: att.type,
+						// Include imageDataUrl for images so they can be displayed after reload
+						imageDataUrl: (att as { imageDataUrl?: string }).imageDataUrl,
+						// Include extractedContent for context (already extracted, no re-processing needed)
+						extractedContent: (att as { extractedContent?: string }).extractedContent,
+					})),
+				});
+			}
+
 			appendMessage(currentThreadId, {
 				role: "user",
 				content: persistContent,
--- a/surfsense_web/components/assistant-ui/thread.tsx
+++ b/surfsense_web/components/assistant-ui/thread.tsx
@ -571,7 +571,7 @@ const Composer: FC = () => {
 				<div ref={editorContainerRef} className="aui-composer-input-wrapper px-3 pt-3 pb-6">
 					<InlineMentionEditor
 						ref={editorRef}
-						placeholder="Ask SurfSense (type @ to mention docs)"
+						placeholder="Ask SurfSense or @mention docs"
 						onMentionTrigger={handleMentionTrigger}
 						onMentionClose={handleMentionClose}
 						onChange={handleEditorChange}