From 8b38660a687f014aa2a32308e1acc7a17b3d59e9 Mon Sep 17 00:00:00 2001 From: arkml Date: Fri, 19 Sep 2025 21:00:25 +0530 Subject: [PATCH] Image upload (#270) * added upload button * image shows up when attached * added automatic parsing of the image * make generate image accept an input image * move image description to debug message * disable message sending if the image processing hasnt completed yet * move to x icon for dismiss * image description processing is stopped on image dismiss * minor changes --- .../app/api/uploaded-images/[id]/route.ts | 75 ++++++++ apps/rowboat/app/api/uploaded-images/route.ts | 85 +++++++++ .../playground/components/chat.tsx | 64 ++++++- .../common/compose-box-playground.tsx | 173 +++++++++++++++++- .../lib/agents-runtime/agent-tools.ts | 146 ++++++++++++++- 5 files changed, 526 insertions(+), 17 deletions(-) create mode 100644 apps/rowboat/app/api/uploaded-images/[id]/route.ts create mode 100644 apps/rowboat/app/api/uploaded-images/route.ts diff --git a/apps/rowboat/app/api/uploaded-images/[id]/route.ts b/apps/rowboat/app/api/uploaded-images/[id]/route.ts new file mode 100644 index 00000000..9f841ba9 --- /dev/null +++ b/apps/rowboat/app/api/uploaded-images/[id]/route.ts @@ -0,0 +1,75 @@ +import { NextRequest, NextResponse } from 'next/server'; +import { S3Client, GetObjectCommand, HeadObjectCommand } from '@aws-sdk/client-s3'; +import { Readable } from 'stream'; + +// Serves uploaded images from S3 by UUID-only path: /api/uploaded-images/{id} +// Reconstructs the S3 key using the same sharding logic as image upload. +export async function GET(request: NextRequest, props: { params: Promise<{ id: string }> }) { + const params = await props.params; + const id = params.id; + if (!id) { + return NextResponse.json({ error: 'Missing id' }, { status: 400 }); + } + + const bucket = process.env.RAG_UPLOADS_S3_BUCKET || ''; + if (!bucket) { + return NextResponse.json({ error: 'S3 bucket not configured' }, { status: 500 }); + } + + const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1'; + const s3 = new S3Client({ + region, + credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? { + accessKeyId: process.env.AWS_ACCESS_KEY_ID, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY, + } as any : undefined, + }); + + // Reconstruct directory sharding from last two characters of UUID + const last2 = id.slice(-2).padStart(2, '0'); + const dirA = last2.charAt(0); + const dirB = last2.charAt(1); + const baseKey = `uploaded_images/${dirA}/${dirB}/${id}`; + + // Try known extensions in order + const exts = ['.png', '.jpg', '.webp', '.bin']; + let foundExt: string | null = null; + for (const ext of exts) { + try { + await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: `${baseKey}${ext}` })); + foundExt = ext; + break; + } catch { + // continue + } + } + + if (!foundExt) { + return NextResponse.json({ error: 'Not found' }, { status: 404 }); + } + + const key = `${baseKey}${foundExt}`; + const filename = `${id}${foundExt}`; + try { + const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key })); + const contentType = resp.ContentType || 'application/octet-stream'; + const body = resp.Body as any; + const webStream = body?.transformToWebStream + ? body.transformToWebStream() + : (Readable as any)?.toWeb + ? (Readable as any).toWeb(body) + : body; + return new NextResponse(webStream, { + status: 200, + headers: { + 'Content-Type': contentType, + 'Cache-Control': 'public, max-age=31536000, immutable', + 'Content-Disposition': `inline; filename="${filename}"`, + }, + }); + } catch (e) { + console.error('S3 get error', e); + return NextResponse.json({ error: 'Not found' }, { status: 404 }); + } +} + diff --git a/apps/rowboat/app/api/uploaded-images/route.ts b/apps/rowboat/app/api/uploaded-images/route.ts new file mode 100644 index 00000000..46c8d741 --- /dev/null +++ b/apps/rowboat/app/api/uploaded-images/route.ts @@ -0,0 +1,85 @@ +import { NextRequest, NextResponse } from 'next/server'; +import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3'; +import crypto from 'crypto'; +import { tempBinaryCache } from '@/src/application/services/temp-binary-cache'; +import { GoogleGenerativeAI } from '@google/generative-ai'; + +// POST /api/uploaded-images +// Accepts an image file (multipart/form-data, field name: "file") +// Stores it either in S3 (if configured) under uploaded_images///. +// or in the in-memory temp cache. Returns a JSON with a URL that the agent can fetch. +export async function POST(request: NextRequest) { + try { + const contentType = request.headers.get('content-type') || ''; + if (!contentType.includes('multipart/form-data')) { + return NextResponse.json({ error: 'Expected multipart/form-data' }, { status: 400 }); + } + + const form = await request.formData(); + const file = form.get('file') as File | null; + if (!file) { + return NextResponse.json({ error: 'Missing file' }, { status: 400 }); + } + + const arrayBuf = await file.arrayBuffer(); + const buf = Buffer.from(arrayBuf); + const mime = file.type || 'application/octet-stream'; + + // Optionally describe image with Gemini + let descriptionMarkdown: string | null = null; + try { + const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || ''; + if (apiKey) { + const genAI = new GoogleGenerativeAI(apiKey); + const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' }); + const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.'; + const result = await model.generateContent([ + { inlineData: { data: buf.toString('base64'), mimeType: mime } }, + prompt, + ]); + descriptionMarkdown = result.response?.text?.() || null; + } + } catch (e) { + console.warn('Gemini description failed', e); + } + + // If S3 configured, upload there + const s3Bucket = process.env.RAG_UPLOADS_S3_BUCKET || ''; + if (s3Bucket) { + const s3Region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1'; + const s3 = new S3Client({ + region: s3Region, + credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? { + accessKeyId: process.env.AWS_ACCESS_KEY_ID as string, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY as string, + } : undefined, + }); + + const ext = mime === 'image/jpeg' ? '.jpg' : mime === 'image/webp' ? '.webp' : mime === 'image/png' ? '.png' : '.bin'; + const imageId = crypto.randomUUID(); + const last2 = imageId.slice(-2).padStart(2, '0'); + const dirA = last2.charAt(0); + const dirB = last2.charAt(1); + const key = `uploaded_images/${dirA}/${dirB}/${imageId}${ext}`; + + await s3.send(new PutObjectCommand({ + Bucket: s3Bucket, + Key: key, + Body: buf, + ContentType: mime, + })); + + const url = `/api/uploaded-images/${imageId}`; + return NextResponse.json({ url, storage: 's3', id: imageId, mimeType: mime, description: descriptionMarkdown }); + } + + // Otherwise store in temp cache and return temp URL + const ttlSec = 10 * 60; // 10 minutes + const id = tempBinaryCache.put(buf, mime, ttlSec * 1000); + const url = `/api/tmp-images/${id}`; + return NextResponse.json({ url, storage: 'temp', id, mimeType: mime, expiresInSec: ttlSec, description: descriptionMarkdown }); + } catch (e) { + console.error('upload image error', e); + return NextResponse.json({ error: 'Upload failed' }, { status: 500 }); + } +} diff --git a/apps/rowboat/app/projects/[projectId]/playground/components/chat.tsx b/apps/rowboat/app/projects/[projectId]/playground/components/chat.tsx index 11b88aad..d7378377 100644 --- a/apps/rowboat/app/projects/[projectId]/playground/components/chat.tsx +++ b/apps/rowboat/app/projects/[projectId]/playground/components/chat.tsx @@ -152,11 +152,24 @@ export function Chat({ } }, []); - function handleUserMessage(prompt: string) { - const updatedMessages: z.infer[] = [...messages, { - role: 'user', - content: prompt, - }]; + function handleUserMessage(prompt: string, imageDebug?: { url: string; description?: string | null }) { + // Insert an internal-only debug message with image URL/markdown (if provided), + // then the actual user message last so streaming triggers correctly. + const debugMessages: z.infer[] = imageDebug ? [{ + role: 'assistant', + content: `Image Description\n\nURL: ${imageDebug.url}\n\n${imageDebug.description ? imageDebug.description : ''}`.trim(), + agentName: 'Image Description', + responseType: 'internal', + } as any] : []; + + const updatedMessages: z.infer[] = [ + ...messages, + ...debugMessages, + { + role: 'user', + content: prompt, + } as any, + ]; setMessages(updatedMessages); setError(null); setIsLastInteracted(true); @@ -229,9 +242,46 @@ export function Chat({ } // set up a cached turn + // Merge-at-send: if the immediately preceding message is our internal + // Image Description debug message, append its details (URL/markdown) + // to the outgoing user message content, without changing the UI. + const last = messages[messages.length - 1]; + let mergedContent = (typeof last?.content === 'string' ? last.content : '') || ''; + if (messages.length >= 2) { + const prev = messages[messages.length - 2] as any; + const isImageDebug = prev && prev.role === 'assistant' && prev.responseType === 'internal' && prev.agentName === 'Image Description' && typeof prev.content === 'string'; + if (isImageDebug) { + // Expect prev.content to have: "Image Description\n\nURL: \n\n" + // Extract URL and markdown blocks for a clean append + const content = prev.content as string; + let url: string | undefined; + let markdown: string | undefined; + const urlMatch = content.match(/URL:\s*(\S+)/i); + if (urlMatch) url = urlMatch[1]; + // markdown is whatever comes after the blank line following URL + const parts = content.split(/\n\n/); + if (parts.length >= 3) { + markdown = parts.slice(2).join('\n\n').trim(); + } + const appendSections: string[] = []; + if (url) appendSections.push(`The user uploaded an image. URL: ${url}`); + if (markdown) appendSections.push(`Image description (markdown):\n\n${markdown}`); + if (appendSections.length > 0) { + mergedContent = [mergedContent, appendSections.join('\n\n')] + .filter(Boolean) + .join('\n\n'); + } + } + } + + const messagesToSend: z.infer[] = [{ + role: 'user', + content: mergedContent, + } as any]; + const response = await createCachedTurn({ conversationId: conversationId.current, - messages: messages.slice(-1), // only send the last message + messages: messagesToSend, // send merged content only }); if (ignore) { return; @@ -500,4 +550,4 @@ export function Chat({ /> ); -} \ No newline at end of file +} diff --git a/apps/rowboat/components/common/compose-box-playground.tsx b/apps/rowboat/components/common/compose-box-playground.tsx index 2c43a455..fe9f6396 100644 --- a/apps/rowboat/components/common/compose-box-playground.tsx +++ b/apps/rowboat/components/common/compose-box-playground.tsx @@ -3,7 +3,7 @@ import { Textarea } from '@/components/ui/textarea'; import { Button, Spinner } from "@heroui/react"; interface ComposeBoxPlaygroundProps { - handleUserMessage: (message: string) => void; + handleUserMessage: (message: string, imageDebug?: { url: string; description?: string | null }) => void; messages: any[]; loading: boolean; disabled?: boolean; @@ -22,9 +22,12 @@ export function ComposeBoxPlayground({ onCancel, }: ComposeBoxPlaygroundProps) { const [input, setInput] = useState(''); + const [uploading, setUploading] = useState(false); + const [pendingImage, setPendingImage] = useState<{ url?: string; previewSrc?: string; mimeType?: string; description?: string | null } | null>(null); const [isFocused, setIsFocused] = useState(false); const textareaRef = useRef(null); const previousMessagesLength = useRef(messages.length); + const uploadAbortRef = useRef(null); // Handle auto-focus when new messages arrive useEffect(() => { @@ -35,12 +38,27 @@ export function ComposeBoxPlayground({ }, [messages.length, shouldAutoFocus]); function handleInput() { - const prompt = input.trim(); - if (!prompt) { + // Mirror send-button disable rules to block Enter submits + if (disabled || loading || uploading) return; + if (pendingImage?.url && pendingImage.description === undefined) return; + const text = input.trim(); + if (!text && !pendingImage) { return; } + // Only include the user's typed text; omit image URL/markdown from user message + const parts: string[] = []; + if (text) parts.push(text); + const prompt = parts.join('\n\n'); + // Build optional debug payload to render as internal-only message in debug view + const imageDebug = pendingImage?.url + ? { url: pendingImage.url, description: pendingImage.description ?? null } + : undefined; setInput(''); - handleUserMessage(prompt); + if (pendingImage?.previewSrc) { + try { URL.revokeObjectURL(pendingImage.previewSrc); } catch {} + } + setPendingImage(null); + handleUserMessage(prompt, imageDebug); } const handleInputKeyDown = (e: React.KeyboardEvent) => { @@ -55,6 +73,57 @@ export function ComposeBoxPlayground({ onFocus?.(); }; + async function handleImagePicked(file: File) { + if (!file) return; + try { + // Show immediate local preview + const previewSrc = URL.createObjectURL(file); + setPendingImage({ previewSrc }); + setUploading(true); + // Cancel any in-flight request + if (uploadAbortRef.current) { + try { uploadAbortRef.current.abort(); } catch {} + uploadAbortRef.current = null; + } + const controller = new AbortController(); + uploadAbortRef.current = controller; + const form = new FormData(); + form.append('file', file); + const res = await fetch('/api/uploaded-images', { + method: 'POST', + body: form, + signal: controller.signal, + }); + if (!res.ok) { + throw new Error(`Upload failed: ${res.status}`); + } + const data = await res.json(); + const url: string | undefined = data?.url; + if (!url) throw new Error('No URL returned'); + // Only apply state if request wasn't aborted/dismissed + if (uploadAbortRef.current === controller) { + setPendingImage({ url, previewSrc, mimeType: data?.mimeType, description: data?.description ?? null }); + } + } catch (e: any) { + if (e?.name === 'AbortError') { + // Swallow aborts + console.log('Image upload/description aborted'); + } else { + console.error('Image upload failed', e); + alert('Image upload failed. Please try again.'); + } + } finally { + if (uploadAbortRef.current === null) { + // Dismissed earlier; ensure uploading is false + setUploading(false); + } else { + // If this is still the active controller, clear uploading and ref + setUploading(false); + uploadAbortRef.current = null; + } + } + } + return (
{/* Keyboard shortcut hint */} @@ -68,6 +137,33 @@ export function ComposeBoxPlayground({ bg-white dark:bg-[#1e2023] flex items-end gap-2"> {/* Textarea */}
+ {pendingImage && ( +
+ Uploaded image preview + +
+ )}