mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-05-25 18:55:19 +02:00
added automatic parsing of the image
This commit is contained in:
parent
4c31912eaf
commit
61254bfc12
2 changed files with 39 additions and 7 deletions
|
|
@ -2,6 +2,7 @@ import { NextRequest, NextResponse } from 'next/server';
|
||||||
import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3';
|
import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3';
|
||||||
import crypto from 'crypto';
|
import crypto from 'crypto';
|
||||||
import { tempBinaryCache } from '@/src/application/services/temp-binary-cache';
|
import { tempBinaryCache } from '@/src/application/services/temp-binary-cache';
|
||||||
|
import { GoogleGenerativeAI } from '@google/generative-ai';
|
||||||
|
|
||||||
// POST /api/uploaded-images
|
// POST /api/uploaded-images
|
||||||
// Accepts an image file (multipart/form-data, field name: "file")
|
// Accepts an image file (multipart/form-data, field name: "file")
|
||||||
|
|
@ -24,6 +25,24 @@ export async function POST(request: NextRequest) {
|
||||||
const buf = Buffer.from(arrayBuf);
|
const buf = Buffer.from(arrayBuf);
|
||||||
const mime = file.type || 'application/octet-stream';
|
const mime = file.type || 'application/octet-stream';
|
||||||
|
|
||||||
|
// Optionally describe image with Gemini
|
||||||
|
let descriptionMarkdown: string | null = null;
|
||||||
|
try {
|
||||||
|
const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || '';
|
||||||
|
if (apiKey) {
|
||||||
|
const genAI = new GoogleGenerativeAI(apiKey);
|
||||||
|
const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
|
||||||
|
const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.';
|
||||||
|
const result = await model.generateContent([
|
||||||
|
{ inlineData: { data: buf.toString('base64'), mimeType: mime } },
|
||||||
|
prompt,
|
||||||
|
]);
|
||||||
|
descriptionMarkdown = result.response?.text?.() || null;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('Gemini description failed', e);
|
||||||
|
}
|
||||||
|
|
||||||
// If S3 configured, upload there
|
// If S3 configured, upload there
|
||||||
const s3Bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
|
const s3Bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
|
||||||
if (s3Bucket) {
|
if (s3Bucket) {
|
||||||
|
|
@ -51,17 +70,16 @@ export async function POST(request: NextRequest) {
|
||||||
}));
|
}));
|
||||||
|
|
||||||
const url = `/api/uploaded-images/${imageId}`;
|
const url = `/api/uploaded-images/${imageId}`;
|
||||||
return NextResponse.json({ url, storage: 's3', id: imageId, mimeType: mime });
|
return NextResponse.json({ url, storage: 's3', id: imageId, mimeType: mime, description: descriptionMarkdown });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise store in temp cache and return temp URL
|
// Otherwise store in temp cache and return temp URL
|
||||||
const ttlSec = 10 * 60; // 10 minutes
|
const ttlSec = 10 * 60; // 10 minutes
|
||||||
const id = tempBinaryCache.put(buf, mime, ttlSec * 1000);
|
const id = tempBinaryCache.put(buf, mime, ttlSec * 1000);
|
||||||
const url = `/api/tmp-images/${id}`;
|
const url = `/api/tmp-images/${id}`;
|
||||||
return NextResponse.json({ url, storage: 'temp', id, mimeType: mime, expiresInSec: ttlSec });
|
return NextResponse.json({ url, storage: 'temp', id, mimeType: mime, expiresInSec: ttlSec, description: descriptionMarkdown });
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('upload image error', e);
|
console.error('upload image error', e);
|
||||||
return NextResponse.json({ error: 'Upload failed' }, { status: 500 });
|
return NextResponse.json({ error: 'Upload failed' }, { status: 500 });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ export function ComposeBoxPlayground({
|
||||||
}: ComposeBoxPlaygroundProps) {
|
}: ComposeBoxPlaygroundProps) {
|
||||||
const [input, setInput] = useState('');
|
const [input, setInput] = useState('');
|
||||||
const [uploading, setUploading] = useState(false);
|
const [uploading, setUploading] = useState(false);
|
||||||
const [pendingImage, setPendingImage] = useState<{ url: string; mimeType?: string } | null>(null);
|
const [pendingImage, setPendingImage] = useState<{ url?: string; previewSrc?: string; mimeType?: string; description?: string | null } | null>(null);
|
||||||
const [isFocused, setIsFocused] = useState(false);
|
const [isFocused, setIsFocused] = useState(false);
|
||||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||||
const previousMessagesLength = useRef(messages.length);
|
const previousMessagesLength = useRef(messages.length);
|
||||||
|
|
@ -45,9 +45,15 @@ export function ComposeBoxPlayground({
|
||||||
if (text) parts.push(text);
|
if (text) parts.push(text);
|
||||||
if (pendingImage?.url) {
|
if (pendingImage?.url) {
|
||||||
parts.push(`The user uploaded an image. URL: ${pendingImage.url}`);
|
parts.push(`The user uploaded an image. URL: ${pendingImage.url}`);
|
||||||
|
if (pendingImage.description) {
|
||||||
|
parts.push(`Image description (markdown):\n\n${pendingImage.description}`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const prompt = parts.join('\n\n');
|
const prompt = parts.join('\n\n');
|
||||||
setInput('');
|
setInput('');
|
||||||
|
if (pendingImage?.previewSrc) {
|
||||||
|
try { URL.revokeObjectURL(pendingImage.previewSrc); } catch {}
|
||||||
|
}
|
||||||
setPendingImage(null);
|
setPendingImage(null);
|
||||||
handleUserMessage(prompt);
|
handleUserMessage(prompt);
|
||||||
}
|
}
|
||||||
|
|
@ -67,6 +73,9 @@ export function ComposeBoxPlayground({
|
||||||
async function handleImagePicked(file: File) {
|
async function handleImagePicked(file: File) {
|
||||||
if (!file) return;
|
if (!file) return;
|
||||||
try {
|
try {
|
||||||
|
// Show immediate local preview
|
||||||
|
const previewSrc = URL.createObjectURL(file);
|
||||||
|
setPendingImage({ previewSrc });
|
||||||
setUploading(true);
|
setUploading(true);
|
||||||
const form = new FormData();
|
const form = new FormData();
|
||||||
form.append('file', file);
|
form.append('file', file);
|
||||||
|
|
@ -80,7 +89,7 @@ export function ComposeBoxPlayground({
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
const url: string | undefined = data?.url;
|
const url: string | undefined = data?.url;
|
||||||
if (!url) throw new Error('No URL returned');
|
if (!url) throw new Error('No URL returned');
|
||||||
setPendingImage({ url, mimeType: data?.mimeType });
|
setPendingImage({ url, previewSrc, mimeType: data?.mimeType, description: data?.description });
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('Image upload failed', e);
|
console.error('Image upload failed', e);
|
||||||
alert('Image upload failed. Please try again.');
|
alert('Image upload failed. Please try again.');
|
||||||
|
|
@ -124,14 +133,19 @@ export function ComposeBoxPlayground({
|
||||||
{pendingImage && (
|
{pendingImage && (
|
||||||
<div className="mb-2 flex items-center gap-2">
|
<div className="mb-2 flex items-center gap-2">
|
||||||
<img
|
<img
|
||||||
src={pendingImage.url}
|
src={pendingImage.previewSrc || pendingImage.url}
|
||||||
alt="Uploaded image preview"
|
alt="Uploaded image preview"
|
||||||
className="w-16 h-16 object-cover rounded border border-gray-200 dark:border-gray-700"
|
className="w-16 h-16 object-cover rounded border border-gray-200 dark:border-gray-700"
|
||||||
/>
|
/>
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
className="text-xs px-2 py-1 rounded bg-gray-100 hover:bg-gray-200 dark:bg-gray-800 dark:hover:bg-gray-700 text-gray-700 dark:text-gray-200 border border-gray-200 dark:border-gray-700"
|
className="text-xs px-2 py-1 rounded bg-gray-100 hover:bg-gray-200 dark:bg-gray-800 dark:hover:bg-gray-700 text-gray-700 dark:text-gray-200 border border-gray-200 dark:border-gray-700"
|
||||||
onClick={() => setPendingImage(null)}
|
onClick={() => {
|
||||||
|
if (pendingImage?.previewSrc) {
|
||||||
|
try { URL.revokeObjectURL(pendingImage.previewSrc); } catch {}
|
||||||
|
}
|
||||||
|
setPendingImage(null);
|
||||||
|
}}
|
||||||
>
|
>
|
||||||
Dismiss
|
Dismiss
|
||||||
</button>
|
</button>
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue