From 61254bfc1281a53fd6cde0fbd36f6efe31c405bf Mon Sep 17 00:00:00 2001 From: arkml Date: Fri, 19 Sep 2025 15:17:37 +0530 Subject: [PATCH] added automatic parsing of the image --- apps/rowboat/app/api/uploaded-images/route.ts | 24 ++++++++++++++++--- .../common/compose-box-playground.tsx | 22 +++++++++++++---- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/apps/rowboat/app/api/uploaded-images/route.ts b/apps/rowboat/app/api/uploaded-images/route.ts index 13ec14d5..46c8d741 100644 --- a/apps/rowboat/app/api/uploaded-images/route.ts +++ b/apps/rowboat/app/api/uploaded-images/route.ts @@ -2,6 +2,7 @@ import { NextRequest, NextResponse } from 'next/server'; import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3'; import crypto from 'crypto'; import { tempBinaryCache } from '@/src/application/services/temp-binary-cache'; +import { GoogleGenerativeAI } from '@google/generative-ai'; // POST /api/uploaded-images // Accepts an image file (multipart/form-data, field name: "file") @@ -24,6 +25,24 @@ export async function POST(request: NextRequest) { const buf = Buffer.from(arrayBuf); const mime = file.type || 'application/octet-stream'; + // Optionally describe image with Gemini + let descriptionMarkdown: string | null = null; + try { + const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || ''; + if (apiKey) { + const genAI = new GoogleGenerativeAI(apiKey); + const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' }); + const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.'; + const result = await model.generateContent([ + { inlineData: { data: buf.toString('base64'), mimeType: mime } }, + prompt, + ]); + descriptionMarkdown = result.response?.text?.() || null; + } + } catch (e) { + console.warn('Gemini description failed', e); + } + // If S3 configured, upload there const s3Bucket = process.env.RAG_UPLOADS_S3_BUCKET || ''; if (s3Bucket) { @@ -51,17 +70,16 @@ export async function POST(request: NextRequest) { })); const url = `/api/uploaded-images/${imageId}`; - return NextResponse.json({ url, storage: 's3', id: imageId, mimeType: mime }); + return NextResponse.json({ url, storage: 's3', id: imageId, mimeType: mime, description: descriptionMarkdown }); } // Otherwise store in temp cache and return temp URL const ttlSec = 10 * 60; // 10 minutes const id = tempBinaryCache.put(buf, mime, ttlSec * 1000); const url = `/api/tmp-images/${id}`; - return NextResponse.json({ url, storage: 'temp', id, mimeType: mime, expiresInSec: ttlSec }); + return NextResponse.json({ url, storage: 'temp', id, mimeType: mime, expiresInSec: ttlSec, description: descriptionMarkdown }); } catch (e) { console.error('upload image error', e); return NextResponse.json({ error: 'Upload failed' }, { status: 500 }); } } - diff --git a/apps/rowboat/components/common/compose-box-playground.tsx b/apps/rowboat/components/common/compose-box-playground.tsx index e6fa2abe..e950a43f 100644 --- a/apps/rowboat/components/common/compose-box-playground.tsx +++ b/apps/rowboat/components/common/compose-box-playground.tsx @@ -23,7 +23,7 @@ export function ComposeBoxPlayground({ }: ComposeBoxPlaygroundProps) { const [input, setInput] = useState(''); const [uploading, setUploading] = useState(false); - const [pendingImage, setPendingImage] = useState<{ url: string; mimeType?: string } | null>(null); + const [pendingImage, setPendingImage] = useState<{ url?: string; previewSrc?: string; mimeType?: string; description?: string | null } | null>(null); const [isFocused, setIsFocused] = useState(false); const textareaRef = useRef(null); const previousMessagesLength = useRef(messages.length); @@ -45,9 +45,15 @@ export function ComposeBoxPlayground({ if (text) parts.push(text); if (pendingImage?.url) { parts.push(`The user uploaded an image. URL: ${pendingImage.url}`); + if (pendingImage.description) { + parts.push(`Image description (markdown):\n\n${pendingImage.description}`); + } } const prompt = parts.join('\n\n'); setInput(''); + if (pendingImage?.previewSrc) { + try { URL.revokeObjectURL(pendingImage.previewSrc); } catch {} + } setPendingImage(null); handleUserMessage(prompt); } @@ -67,6 +73,9 @@ export function ComposeBoxPlayground({ async function handleImagePicked(file: File) { if (!file) return; try { + // Show immediate local preview + const previewSrc = URL.createObjectURL(file); + setPendingImage({ previewSrc }); setUploading(true); const form = new FormData(); form.append('file', file); @@ -80,7 +89,7 @@ export function ComposeBoxPlayground({ const data = await res.json(); const url: string | undefined = data?.url; if (!url) throw new Error('No URL returned'); - setPendingImage({ url, mimeType: data?.mimeType }); + setPendingImage({ url, previewSrc, mimeType: data?.mimeType, description: data?.description }); } catch (e) { console.error('Image upload failed', e); alert('Image upload failed. Please try again.'); @@ -124,14 +133,19 @@ export function ComposeBoxPlayground({ {pendingImage && (
Uploaded image preview