mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-05-04 21:02:39 +02:00
Image upload (#270)
* added upload button * image shows up when attached * added automatic parsing of the image * make generate image accept an input image * move image description to debug message * disable message sending if the image processing hasnt completed yet * move to x icon for dismiss * image description processing is stopped on image dismiss * minor changes
This commit is contained in:
parent
109997ca2e
commit
8b38660a68
5 changed files with 526 additions and 17 deletions
75
apps/rowboat/app/api/uploaded-images/[id]/route.ts
Normal file
75
apps/rowboat/app/api/uploaded-images/[id]/route.ts
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
import { NextRequest, NextResponse } from 'next/server';
|
||||
import { S3Client, GetObjectCommand, HeadObjectCommand } from '@aws-sdk/client-s3';
|
||||
import { Readable } from 'stream';
|
||||
|
||||
// Serves uploaded images from S3 by UUID-only path: /api/uploaded-images/{id}
|
||||
// Reconstructs the S3 key using the same sharding logic as image upload.
|
||||
export async function GET(request: NextRequest, props: { params: Promise<{ id: string }> }) {
|
||||
const params = await props.params;
|
||||
const id = params.id;
|
||||
if (!id) {
|
||||
return NextResponse.json({ error: 'Missing id' }, { status: 400 });
|
||||
}
|
||||
|
||||
const bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
|
||||
if (!bucket) {
|
||||
return NextResponse.json({ error: 'S3 bucket not configured' }, { status: 500 });
|
||||
}
|
||||
|
||||
const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1';
|
||||
const s3 = new S3Client({
|
||||
region,
|
||||
credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? {
|
||||
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
|
||||
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
|
||||
} as any : undefined,
|
||||
});
|
||||
|
||||
// Reconstruct directory sharding from last two characters of UUID
|
||||
const last2 = id.slice(-2).padStart(2, '0');
|
||||
const dirA = last2.charAt(0);
|
||||
const dirB = last2.charAt(1);
|
||||
const baseKey = `uploaded_images/${dirA}/${dirB}/${id}`;
|
||||
|
||||
// Try known extensions in order
|
||||
const exts = ['.png', '.jpg', '.webp', '.bin'];
|
||||
let foundExt: string | null = null;
|
||||
for (const ext of exts) {
|
||||
try {
|
||||
await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: `${baseKey}${ext}` }));
|
||||
foundExt = ext;
|
||||
break;
|
||||
} catch {
|
||||
// continue
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundExt) {
|
||||
return NextResponse.json({ error: 'Not found' }, { status: 404 });
|
||||
}
|
||||
|
||||
const key = `${baseKey}${foundExt}`;
|
||||
const filename = `${id}${foundExt}`;
|
||||
try {
|
||||
const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
||||
const contentType = resp.ContentType || 'application/octet-stream';
|
||||
const body = resp.Body as any;
|
||||
const webStream = body?.transformToWebStream
|
||||
? body.transformToWebStream()
|
||||
: (Readable as any)?.toWeb
|
||||
? (Readable as any).toWeb(body)
|
||||
: body;
|
||||
return new NextResponse(webStream, {
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': contentType,
|
||||
'Cache-Control': 'public, max-age=31536000, immutable',
|
||||
'Content-Disposition': `inline; filename="${filename}"`,
|
||||
},
|
||||
});
|
||||
} catch (e) {
|
||||
console.error('S3 get error', e);
|
||||
return NextResponse.json({ error: 'Not found' }, { status: 404 });
|
||||
}
|
||||
}
|
||||
|
||||
85
apps/rowboat/app/api/uploaded-images/route.ts
Normal file
85
apps/rowboat/app/api/uploaded-images/route.ts
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
import { NextRequest, NextResponse } from 'next/server';
|
||||
import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3';
|
||||
import crypto from 'crypto';
|
||||
import { tempBinaryCache } from '@/src/application/services/temp-binary-cache';
|
||||
import { GoogleGenerativeAI } from '@google/generative-ai';
|
||||
|
||||
// POST /api/uploaded-images
|
||||
// Accepts an image file (multipart/form-data, field name: "file")
|
||||
// Stores it either in S3 (if configured) under uploaded_images/<a>/<b>/<uuid>.<ext>
|
||||
// or in the in-memory temp cache. Returns a JSON with a URL that the agent can fetch.
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
const contentType = request.headers.get('content-type') || '';
|
||||
if (!contentType.includes('multipart/form-data')) {
|
||||
return NextResponse.json({ error: 'Expected multipart/form-data' }, { status: 400 });
|
||||
}
|
||||
|
||||
const form = await request.formData();
|
||||
const file = form.get('file') as File | null;
|
||||
if (!file) {
|
||||
return NextResponse.json({ error: 'Missing file' }, { status: 400 });
|
||||
}
|
||||
|
||||
const arrayBuf = await file.arrayBuffer();
|
||||
const buf = Buffer.from(arrayBuf);
|
||||
const mime = file.type || 'application/octet-stream';
|
||||
|
||||
// Optionally describe image with Gemini
|
||||
let descriptionMarkdown: string | null = null;
|
||||
try {
|
||||
const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || '';
|
||||
if (apiKey) {
|
||||
const genAI = new GoogleGenerativeAI(apiKey);
|
||||
const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
|
||||
const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.';
|
||||
const result = await model.generateContent([
|
||||
{ inlineData: { data: buf.toString('base64'), mimeType: mime } },
|
||||
prompt,
|
||||
]);
|
||||
descriptionMarkdown = result.response?.text?.() || null;
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('Gemini description failed', e);
|
||||
}
|
||||
|
||||
// If S3 configured, upload there
|
||||
const s3Bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
|
||||
if (s3Bucket) {
|
||||
const s3Region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1';
|
||||
const s3 = new S3Client({
|
||||
region: s3Region,
|
||||
credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? {
|
||||
accessKeyId: process.env.AWS_ACCESS_KEY_ID as string,
|
||||
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY as string,
|
||||
} : undefined,
|
||||
});
|
||||
|
||||
const ext = mime === 'image/jpeg' ? '.jpg' : mime === 'image/webp' ? '.webp' : mime === 'image/png' ? '.png' : '.bin';
|
||||
const imageId = crypto.randomUUID();
|
||||
const last2 = imageId.slice(-2).padStart(2, '0');
|
||||
const dirA = last2.charAt(0);
|
||||
const dirB = last2.charAt(1);
|
||||
const key = `uploaded_images/${dirA}/${dirB}/${imageId}${ext}`;
|
||||
|
||||
await s3.send(new PutObjectCommand({
|
||||
Bucket: s3Bucket,
|
||||
Key: key,
|
||||
Body: buf,
|
||||
ContentType: mime,
|
||||
}));
|
||||
|
||||
const url = `/api/uploaded-images/${imageId}`;
|
||||
return NextResponse.json({ url, storage: 's3', id: imageId, mimeType: mime, description: descriptionMarkdown });
|
||||
}
|
||||
|
||||
// Otherwise store in temp cache and return temp URL
|
||||
const ttlSec = 10 * 60; // 10 minutes
|
||||
const id = tempBinaryCache.put(buf, mime, ttlSec * 1000);
|
||||
const url = `/api/tmp-images/${id}`;
|
||||
return NextResponse.json({ url, storage: 'temp', id, mimeType: mime, expiresInSec: ttlSec, description: descriptionMarkdown });
|
||||
} catch (e) {
|
||||
console.error('upload image error', e);
|
||||
return NextResponse.json({ error: 'Upload failed' }, { status: 500 });
|
||||
}
|
||||
}
|
||||
|
|
@ -152,11 +152,24 @@ export function Chat({
|
|||
}
|
||||
}, []);
|
||||
|
||||
function handleUserMessage(prompt: string) {
|
||||
const updatedMessages: z.infer<typeof Message>[] = [...messages, {
|
||||
role: 'user',
|
||||
content: prompt,
|
||||
}];
|
||||
function handleUserMessage(prompt: string, imageDebug?: { url: string; description?: string | null }) {
|
||||
// Insert an internal-only debug message with image URL/markdown (if provided),
|
||||
// then the actual user message last so streaming triggers correctly.
|
||||
const debugMessages: z.infer<typeof Message>[] = imageDebug ? [{
|
||||
role: 'assistant',
|
||||
content: `Image Description\n\nURL: ${imageDebug.url}\n\n${imageDebug.description ? imageDebug.description : ''}`.trim(),
|
||||
agentName: 'Image Description',
|
||||
responseType: 'internal',
|
||||
} as any] : [];
|
||||
|
||||
const updatedMessages: z.infer<typeof Message>[] = [
|
||||
...messages,
|
||||
...debugMessages,
|
||||
{
|
||||
role: 'user',
|
||||
content: prompt,
|
||||
} as any,
|
||||
];
|
||||
setMessages(updatedMessages);
|
||||
setError(null);
|
||||
setIsLastInteracted(true);
|
||||
|
|
@ -229,9 +242,46 @@ export function Chat({
|
|||
}
|
||||
|
||||
// set up a cached turn
|
||||
// Merge-at-send: if the immediately preceding message is our internal
|
||||
// Image Description debug message, append its details (URL/markdown)
|
||||
// to the outgoing user message content, without changing the UI.
|
||||
const last = messages[messages.length - 1];
|
||||
let mergedContent = (typeof last?.content === 'string' ? last.content : '') || '';
|
||||
if (messages.length >= 2) {
|
||||
const prev = messages[messages.length - 2] as any;
|
||||
const isImageDebug = prev && prev.role === 'assistant' && prev.responseType === 'internal' && prev.agentName === 'Image Description' && typeof prev.content === 'string';
|
||||
if (isImageDebug) {
|
||||
// Expect prev.content to have: "Image Description\n\nURL: <url>\n\n<markdown>"
|
||||
// Extract URL and markdown blocks for a clean append
|
||||
const content = prev.content as string;
|
||||
let url: string | undefined;
|
||||
let markdown: string | undefined;
|
||||
const urlMatch = content.match(/URL:\s*(\S+)/i);
|
||||
if (urlMatch) url = urlMatch[1];
|
||||
// markdown is whatever comes after the blank line following URL
|
||||
const parts = content.split(/\n\n/);
|
||||
if (parts.length >= 3) {
|
||||
markdown = parts.slice(2).join('\n\n').trim();
|
||||
}
|
||||
const appendSections: string[] = [];
|
||||
if (url) appendSections.push(`The user uploaded an image. URL: ${url}`);
|
||||
if (markdown) appendSections.push(`Image description (markdown):\n\n${markdown}`);
|
||||
if (appendSections.length > 0) {
|
||||
mergedContent = [mergedContent, appendSections.join('\n\n')]
|
||||
.filter(Boolean)
|
||||
.join('\n\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const messagesToSend: z.infer<typeof Message>[] = [{
|
||||
role: 'user',
|
||||
content: mergedContent,
|
||||
} as any];
|
||||
|
||||
const response = await createCachedTurn({
|
||||
conversationId: conversationId.current,
|
||||
messages: messages.slice(-1), // only send the last message
|
||||
messages: messagesToSend, // send merged content only
|
||||
});
|
||||
if (ignore) {
|
||||
return;
|
||||
|
|
@ -500,4 +550,4 @@ export function Chat({
|
|||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import { Textarea } from '@/components/ui/textarea';
|
|||
import { Button, Spinner } from "@heroui/react";
|
||||
|
||||
interface ComposeBoxPlaygroundProps {
|
||||
handleUserMessage: (message: string) => void;
|
||||
handleUserMessage: (message: string, imageDebug?: { url: string; description?: string | null }) => void;
|
||||
messages: any[];
|
||||
loading: boolean;
|
||||
disabled?: boolean;
|
||||
|
|
@ -22,9 +22,12 @@ export function ComposeBoxPlayground({
|
|||
onCancel,
|
||||
}: ComposeBoxPlaygroundProps) {
|
||||
const [input, setInput] = useState('');
|
||||
const [uploading, setUploading] = useState(false);
|
||||
const [pendingImage, setPendingImage] = useState<{ url?: string; previewSrc?: string; mimeType?: string; description?: string | null } | null>(null);
|
||||
const [isFocused, setIsFocused] = useState(false);
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||
const previousMessagesLength = useRef(messages.length);
|
||||
const uploadAbortRef = useRef<AbortController | null>(null);
|
||||
|
||||
// Handle auto-focus when new messages arrive
|
||||
useEffect(() => {
|
||||
|
|
@ -35,12 +38,27 @@ export function ComposeBoxPlayground({
|
|||
}, [messages.length, shouldAutoFocus]);
|
||||
|
||||
function handleInput() {
|
||||
const prompt = input.trim();
|
||||
if (!prompt) {
|
||||
// Mirror send-button disable rules to block Enter submits
|
||||
if (disabled || loading || uploading) return;
|
||||
if (pendingImage?.url && pendingImage.description === undefined) return;
|
||||
const text = input.trim();
|
||||
if (!text && !pendingImage) {
|
||||
return;
|
||||
}
|
||||
// Only include the user's typed text; omit image URL/markdown from user message
|
||||
const parts: string[] = [];
|
||||
if (text) parts.push(text);
|
||||
const prompt = parts.join('\n\n');
|
||||
// Build optional debug payload to render as internal-only message in debug view
|
||||
const imageDebug = pendingImage?.url
|
||||
? { url: pendingImage.url, description: pendingImage.description ?? null }
|
||||
: undefined;
|
||||
setInput('');
|
||||
handleUserMessage(prompt);
|
||||
if (pendingImage?.previewSrc) {
|
||||
try { URL.revokeObjectURL(pendingImage.previewSrc); } catch {}
|
||||
}
|
||||
setPendingImage(null);
|
||||
handleUserMessage(prompt, imageDebug);
|
||||
}
|
||||
|
||||
const handleInputKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
|
||||
|
|
@ -55,6 +73,57 @@ export function ComposeBoxPlayground({
|
|||
onFocus?.();
|
||||
};
|
||||
|
||||
async function handleImagePicked(file: File) {
|
||||
if (!file) return;
|
||||
try {
|
||||
// Show immediate local preview
|
||||
const previewSrc = URL.createObjectURL(file);
|
||||
setPendingImage({ previewSrc });
|
||||
setUploading(true);
|
||||
// Cancel any in-flight request
|
||||
if (uploadAbortRef.current) {
|
||||
try { uploadAbortRef.current.abort(); } catch {}
|
||||
uploadAbortRef.current = null;
|
||||
}
|
||||
const controller = new AbortController();
|
||||
uploadAbortRef.current = controller;
|
||||
const form = new FormData();
|
||||
form.append('file', file);
|
||||
const res = await fetch('/api/uploaded-images', {
|
||||
method: 'POST',
|
||||
body: form,
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (!res.ok) {
|
||||
throw new Error(`Upload failed: ${res.status}`);
|
||||
}
|
||||
const data = await res.json();
|
||||
const url: string | undefined = data?.url;
|
||||
if (!url) throw new Error('No URL returned');
|
||||
// Only apply state if request wasn't aborted/dismissed
|
||||
if (uploadAbortRef.current === controller) {
|
||||
setPendingImage({ url, previewSrc, mimeType: data?.mimeType, description: data?.description ?? null });
|
||||
}
|
||||
} catch (e: any) {
|
||||
if (e?.name === 'AbortError') {
|
||||
// Swallow aborts
|
||||
console.log('Image upload/description aborted');
|
||||
} else {
|
||||
console.error('Image upload failed', e);
|
||||
alert('Image upload failed. Please try again.');
|
||||
}
|
||||
} finally {
|
||||
if (uploadAbortRef.current === null) {
|
||||
// Dismissed earlier; ensure uploading is false
|
||||
setUploading(false);
|
||||
} else {
|
||||
// If this is still the active controller, clear uploading and ref
|
||||
setUploading(false);
|
||||
uploadAbortRef.current = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="relative group">
|
||||
{/* Keyboard shortcut hint */}
|
||||
|
|
@ -68,6 +137,33 @@ export function ComposeBoxPlayground({
|
|||
bg-white dark:bg-[#1e2023] flex items-end gap-2">
|
||||
{/* Textarea */}
|
||||
<div className="flex-1">
|
||||
{pendingImage && (
|
||||
<div className="mb-2 inline-block relative">
|
||||
<img
|
||||
src={pendingImage.previewSrc || pendingImage.url}
|
||||
alt="Uploaded image preview"
|
||||
className="w-16 h-16 object-cover rounded border border-gray-200 dark:border-gray-700"
|
||||
/>
|
||||
<button
|
||||
type="button"
|
||||
aria-label="Remove image"
|
||||
className="absolute -top-1 -right-1 p-1 rounded-full bg-white dark:bg-zinc-900 border border-gray-200 dark:border-gray-700 shadow hover:bg-gray-50 dark:hover:bg-zinc-800"
|
||||
onClick={() => {
|
||||
if (pendingImage?.previewSrc) {
|
||||
try { URL.revokeObjectURL(pendingImage.previewSrc); } catch {}
|
||||
}
|
||||
if (uploadAbortRef.current) {
|
||||
try { uploadAbortRef.current.abort(); } catch {}
|
||||
uploadAbortRef.current = null;
|
||||
}
|
||||
setUploading(false);
|
||||
setPendingImage(null);
|
||||
}}
|
||||
>
|
||||
<XIcon size={12} />
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
<Textarea
|
||||
ref={textareaRef}
|
||||
value={input}
|
||||
|
|
@ -95,11 +191,37 @@ export function ComposeBoxPlayground({
|
|||
/>
|
||||
</div>
|
||||
|
||||
{/* Image upload button (moved to the right) */}
|
||||
<label className={`
|
||||
flex items-center justify-center w-9 h-9 rounded-lg cursor-pointer
|
||||
${uploading ? 'bg-gray-100 dark:bg-gray-800 text-gray-400' : 'bg-gray-100 hover:bg-gray-200 dark:bg-gray-800 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-300'}
|
||||
transition-colors
|
||||
`}>
|
||||
<input
|
||||
type="file"
|
||||
accept="image/*"
|
||||
className="hidden"
|
||||
disabled={disabled || loading || uploading}
|
||||
onChange={(e) => {
|
||||
const f = e.target.files?.[0];
|
||||
if (f) handleImagePicked(f);
|
||||
e.currentTarget.value = '';
|
||||
}}
|
||||
/>
|
||||
{uploading ? <Spinner size="sm" /> : <ImageIcon size={16} />}
|
||||
</label>
|
||||
{/* Send/Stop button */}
|
||||
<Button
|
||||
size="sm"
|
||||
isIconOnly
|
||||
disabled={disabled || (loading ? false : !input.trim())}
|
||||
disabled={
|
||||
disabled
|
||||
|| uploading
|
||||
// If an image is selected but description isn't ready yet, keep disabled
|
||||
|| (pendingImage?.url && pendingImage.description === undefined)
|
||||
// When not loading a response, require either text or a ready image
|
||||
|| (loading ? false : (!input.trim() && !pendingImage))
|
||||
}
|
||||
onPress={loading ? onCancel : handleInput}
|
||||
className={`
|
||||
transition-all duration-200
|
||||
|
|
@ -163,4 +285,43 @@ function StopIcon({ size, className }: { size: number, className?: string }) {
|
|||
<rect x="6" y="6" width="12" height="12" rx="1" />
|
||||
</svg>
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function ImageIcon({ size, className }: { size: number, className?: string }) {
|
||||
return (
|
||||
<svg
|
||||
width={size}
|
||||
height={size}
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
strokeWidth="2"
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
className={className}
|
||||
>
|
||||
<rect x="3" y="3" width="18" height="18" rx="2" ry="2" />
|
||||
<circle cx="8.5" cy="8.5" r="1.5" />
|
||||
<path d="M21 15l-5-5L5 21" />
|
||||
</svg>
|
||||
);
|
||||
}
|
||||
|
||||
function XIcon({ size, className }: { size: number, className?: string }) {
|
||||
return (
|
||||
<svg
|
||||
width={size}
|
||||
height={size}
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
strokeWidth="2"
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
className={className}
|
||||
>
|
||||
<line x1="18" y1="6" x2="6" y2="18" />
|
||||
<line x1="6" y1="6" x2="18" y2="18" />
|
||||
</svg>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import { SignJWT } from "jose";
|
|||
import crypto from "crypto";
|
||||
import { GoogleGenerativeAI } from "@google/generative-ai";
|
||||
import { tempBinaryCache } from "@/src/application/services/temp-binary-cache";
|
||||
import { S3Client, PutObjectCommand } from "@aws-sdk/client-s3";
|
||||
import { S3Client, PutObjectCommand, GetObjectCommand, HeadObjectCommand } from "@aws-sdk/client-s3";
|
||||
|
||||
// Internal dependencies
|
||||
import { embeddingModel } from "@/app/lib/embedding";
|
||||
|
|
@ -44,6 +44,7 @@ export async function invokeGenerateImageTool(
|
|||
prompt: string,
|
||||
options?: {
|
||||
modelName?: string;
|
||||
inputImageUrl?: string;
|
||||
}
|
||||
): Promise<{
|
||||
texts: string[];
|
||||
|
|
@ -62,7 +63,140 @@ export async function invokeGenerateImageTool(
|
|||
const model = client.getGenerativeModel({ model: modelName });
|
||||
|
||||
log.log(`Generating image with model: ${modelName}`);
|
||||
const result = await model.generateContent(prompt);
|
||||
|
||||
let result: any;
|
||||
const inputImageUrl = options?.inputImageUrl;
|
||||
if (inputImageUrl) {
|
||||
try {
|
||||
// Resolve the image into inlineData for Gemini
|
||||
let imageBuf: Buffer | null = null;
|
||||
let imageMime: string = 'image/png';
|
||||
|
||||
if (inputImageUrl.startsWith('/api/tmp-images/')) {
|
||||
const id = inputImageUrl.split('/api/tmp-images/')[1];
|
||||
const entry = tempBinaryCache.get(id);
|
||||
if (entry) {
|
||||
imageBuf = entry.buf;
|
||||
imageMime = entry.mimeType || imageMime;
|
||||
}
|
||||
} else if (inputImageUrl.startsWith('/api/uploaded-images/')) {
|
||||
const bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
|
||||
if (bucket) {
|
||||
const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1';
|
||||
const s3 = new S3Client({
|
||||
region,
|
||||
credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? {
|
||||
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
|
||||
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
|
||||
} as any : undefined,
|
||||
});
|
||||
const id = inputImageUrl.split('/api/uploaded-images/')[1];
|
||||
const last2 = id.slice(-2).padStart(2, '0');
|
||||
const dirA = last2.charAt(0);
|
||||
const dirB = last2.charAt(1);
|
||||
const baseKey = `uploaded_images/${dirA}/${dirB}/${id}`;
|
||||
const exts = ['.png', '.jpg', '.webp', '.bin'];
|
||||
let foundExt: string | null = null;
|
||||
for (const ext of exts) {
|
||||
try {
|
||||
await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: `${baseKey}${ext}` }));
|
||||
foundExt = ext; break;
|
||||
} catch {}
|
||||
}
|
||||
if (foundExt) {
|
||||
const key = `${baseKey}${foundExt}`;
|
||||
const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
||||
const chunks: Buffer[] = [];
|
||||
const body = resp.Body as any;
|
||||
const nodeStream = typeof body?.pipe === 'function' ? body : undefined;
|
||||
if (nodeStream) {
|
||||
imageMime = resp.ContentType || imageMime;
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
nodeStream.on('data', (c: Buffer) => chunks.push(Buffer.isBuffer(c) ? c : Buffer.from(c)));
|
||||
nodeStream.on('end', () => resolve());
|
||||
nodeStream.on('error', reject);
|
||||
});
|
||||
imageBuf = Buffer.concat(chunks);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (inputImageUrl.startsWith('/api/generated-images/')) {
|
||||
const bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
|
||||
if (bucket) {
|
||||
const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1';
|
||||
const s3 = new S3Client({
|
||||
region,
|
||||
credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? {
|
||||
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
|
||||
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
|
||||
} as any : undefined,
|
||||
});
|
||||
const id = inputImageUrl.split('/api/generated-images/')[1];
|
||||
const last2 = id.slice(-2).padStart(2, '0');
|
||||
const dirA = last2.charAt(0);
|
||||
const dirB = last2.charAt(1);
|
||||
const baseKey = `generated_images/${dirA}/${dirB}/${id}`;
|
||||
const exts = ['.png', '.jpg', '.webp'];
|
||||
let foundExt: string | null = null;
|
||||
for (const ext of exts) {
|
||||
try {
|
||||
await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: `${baseKey}${ext}` }));
|
||||
foundExt = ext; break;
|
||||
} catch {}
|
||||
}
|
||||
if (foundExt) {
|
||||
const key = `${baseKey}${foundExt}`;
|
||||
const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
||||
const chunks: Buffer[] = [];
|
||||
const body = resp.Body as any;
|
||||
const nodeStream = typeof body?.pipe === 'function' ? body : undefined;
|
||||
if (nodeStream) {
|
||||
imageMime = resp.ContentType || imageMime;
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
nodeStream.on('data', (c: Buffer) => chunks.push(Buffer.isBuffer(c) ? c : Buffer.from(c)));
|
||||
nodeStream.on('end', () => resolve());
|
||||
nodeStream.on('error', reject);
|
||||
});
|
||||
imageBuf = Buffer.concat(chunks);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (inputImageUrl.startsWith('data:')) {
|
||||
// data URL
|
||||
const m = inputImageUrl.match(/^data:([^;]+);base64,(.*)$/);
|
||||
if (m) {
|
||||
imageMime = m[1];
|
||||
imageBuf = Buffer.from(m[2], 'base64');
|
||||
}
|
||||
} else if (/^https?:\/\//.test(inputImageUrl)) {
|
||||
// Best-effort network fetch (may fail if egress restricted)
|
||||
try {
|
||||
const resp = await fetch(inputImageUrl);
|
||||
const ab = await resp.arrayBuffer();
|
||||
imageBuf = Buffer.from(ab);
|
||||
imageMime = resp.headers.get('content-type') || imageMime;
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
if (imageBuf) {
|
||||
const parts: any[] = [
|
||||
{ inlineData: { data: imageBuf.toString('base64'), mimeType: imageMime } },
|
||||
prompt,
|
||||
];
|
||||
result = await model.generateContent(parts as any);
|
||||
} else {
|
||||
// Fallback to text-only
|
||||
result = await model.generateContent(prompt);
|
||||
}
|
||||
} catch (e) {
|
||||
log.log('Falling back to text-only generation due to input image error');
|
||||
result = await model.generateContent(prompt);
|
||||
}
|
||||
} else {
|
||||
result = await model.generateContent(prompt);
|
||||
}
|
||||
const response = result.response as any;
|
||||
|
||||
// Track usage if available
|
||||
|
|
@ -627,7 +761,10 @@ export function createGenerateImageTool(
|
|||
strict: false,
|
||||
parameters: {
|
||||
type: 'object',
|
||||
properties: parameters.properties,
|
||||
properties: {
|
||||
...parameters.properties,
|
||||
input_image_url: { type: 'string', description: 'Optional URL of an input image to condition generation.' },
|
||||
},
|
||||
required: parameters.required || [],
|
||||
additionalProperties: true,
|
||||
},
|
||||
|
|
@ -638,11 +775,12 @@ export function createGenerateImageTool(
|
|||
return JSON.stringify({ error: "Missing required field: prompt" });
|
||||
}
|
||||
const modelName: string | undefined = input?.modelName;
|
||||
const inputImageUrl: string | undefined = input?.input_image_url;
|
||||
const result = await invokeGenerateImageTool(
|
||||
logger,
|
||||
usageTracker,
|
||||
prompt,
|
||||
{ modelName }
|
||||
{ modelName, inputImageUrl }
|
||||
);
|
||||
// If S3 bucket configured, store in S3 under generated_images/<c>/<d>/<filename>
|
||||
const s3Bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue