mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-06-03 19:25:19 +02:00
Image (#238)
* added image tool * show image in playground * store images in Redis * new images use unique urls * moved from redis to s3 for image urls * removed unnecessary changes * removed the bubble around assistant messages * added a download button on hover on image * increased image size and removed border * revert the bubbes for the assistant messages
This commit is contained in:
parent
af0fcce127
commit
158777b045
7 changed files with 489 additions and 22 deletions
45
apps/rowboat/app/api/generated-images/[...path]/route.ts
Normal file
45
apps/rowboat/app/api/generated-images/[...path]/route.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import { NextRequest, NextResponse } from 'next/server';
|
||||
import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3';
|
||||
|
||||
export async function GET(request: NextRequest, props: { params: Promise<{ path: string[] }> }) {
|
||||
const params = await props.params;
|
||||
const path = params.path || [];
|
||||
if (path.length < 3) {
|
||||
return NextResponse.json({ error: 'Invalid path' }, { status: 400 });
|
||||
}
|
||||
|
||||
const bucket = process.env.UPLOADS_S3_BUCKET || '';
|
||||
if (!bucket) {
|
||||
return NextResponse.json({ error: 'S3 bucket not configured' }, { status: 500 });
|
||||
}
|
||||
|
||||
const region = process.env.UPLOADS_AWS_REGION || 'us-east-1';
|
||||
const s3 = new S3Client({
|
||||
region,
|
||||
credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? {
|
||||
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
|
||||
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
|
||||
} as any : undefined,
|
||||
});
|
||||
|
||||
const filename = path[path.length - 1];
|
||||
const key = `generated_images/${path.join('/')}`;
|
||||
try {
|
||||
const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
||||
const contentType = resp.ContentType || 'application/octet-stream';
|
||||
const body = resp.Body as any;
|
||||
const webStream = body?.transformToWebStream ? body.transformToWebStream() : body;
|
||||
return new NextResponse(webStream, {
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': contentType,
|
||||
'Cache-Control': 'public, max-age=31536000, immutable',
|
||||
'Content-Disposition': `inline; filename="${filename}"`,
|
||||
},
|
||||
});
|
||||
} catch (e) {
|
||||
console.error('S3 get error', e);
|
||||
return NextResponse.json({ error: 'Not found' }, { status: 404 });
|
||||
}
|
||||
}
|
||||
|
||||
25
apps/rowboat/app/api/tmp-images/[id]/route.ts
Normal file
25
apps/rowboat/app/api/tmp-images/[id]/route.ts
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import { NextRequest, NextResponse } from 'next/server';
|
||||
import { tempBinaryCache } from '@/src/application/services/temp-binary-cache';
|
||||
|
||||
export async function GET(request: NextRequest, props: { params: Promise<{ id: string }> }) {
|
||||
const params = await props.params;
|
||||
const id = params.id;
|
||||
if (!id) {
|
||||
return NextResponse.json({ error: 'Missing id' }, { status: 400 });
|
||||
}
|
||||
|
||||
// Serve from in-memory temp cache
|
||||
const entry = tempBinaryCache.get(id);
|
||||
if (!entry) {
|
||||
return NextResponse.json({ error: 'Not found or expired' }, { status: 404 });
|
||||
}
|
||||
|
||||
return new NextResponse(entry.buf, {
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': entry.mimeType || 'application/octet-stream',
|
||||
'Cache-Control': 'no-store',
|
||||
'Content-Disposition': `inline; filename="${id}"`,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
@ -34,7 +34,22 @@ function loadTemplatesFromFiles(): { [key: string]: z.infer<typeof WorkflowTempl
|
|||
},
|
||||
],
|
||||
prompts: [],
|
||||
tools: [],
|
||||
tools: [
|
||||
{
|
||||
name: "Generate Image",
|
||||
description: "Generate an image using Google Gemini given a text prompt. Returns base64-encoded image data and any text parts.",
|
||||
isGeminiImage: true,
|
||||
parameters: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
prompt: { type: 'string', description: 'Text prompt describing the image to generate' },
|
||||
modelName: { type: 'string', description: 'Optional Gemini model override' },
|
||||
},
|
||||
required: ['prompt'],
|
||||
additionalProperties: true,
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
try {
|
||||
|
|
@ -73,4 +88,4 @@ function loadTemplatesFromFiles(): { [key: string]: z.infer<typeof WorkflowTempl
|
|||
export const templates: { [key: string]: z.infer<typeof WorkflowTemplate> } = loadTemplatesFromFiles();
|
||||
|
||||
// Note: Prebuilt cards are now loaded from app/lib/prebuilt-cards/ directory
|
||||
// starting_copilot_prompts has been removed as it was unused
|
||||
// starting_copilot_prompts has been removed as it was unused
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ export const WorkflowTool = z.object({
|
|||
isComposio: z.boolean().optional(), // whether this is a Composio tool
|
||||
isLibrary: z.boolean().default(false).optional(), // whether this is a library tool
|
||||
isWebhook: z.boolean().optional(), // whether this is a webhook tool
|
||||
isGeminiImage: z.boolean().optional(), // whether this tool generates images via Gemini
|
||||
composioData: z.object({
|
||||
slug: z.string(), // the slug for the Composio tool e.g. "GITHUB_CREATE_AN_ISSUE"
|
||||
noAuth: z.boolean(), // whether the tool requires no authentication
|
||||
|
|
@ -190,4 +191,4 @@ export function sanitizeTextWithMentions(
|
|||
sanitized: text,
|
||||
entities,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import z from "zod";
|
|||
import { Workflow } from "@/app/lib/types/workflow_types";
|
||||
import { WorkflowTool } from "@/app/lib/types/workflow_types";
|
||||
import MarkdownContent from "@/app/lib/components/markdown-content";
|
||||
import { ChevronRightIcon, ChevronDownIcon, ChevronUpIcon, CodeIcon, CheckCircleIcon, FileTextIcon, EyeIcon, EyeOffIcon, WrapTextIcon, ArrowRightFromLineIcon, BracesIcon, TextIcon, FlagIcon, HelpCircleIcon, MoreHorizontal } from "lucide-react";
|
||||
import { ChevronRightIcon, ChevronDownIcon, ChevronUpIcon, CodeIcon, CheckCircleIcon, FileTextIcon, EyeIcon, EyeOffIcon, WrapTextIcon, ArrowRightFromLineIcon, BracesIcon, TextIcon, FlagIcon, HelpCircleIcon, MoreHorizontal, Download as DownloadIcon } from "lucide-react";
|
||||
import { Dropdown, DropdownMenu, DropdownTrigger, DropdownItem } from "@heroui/react";
|
||||
import { ProfileContextBox } from "./profile-context-box";
|
||||
import { Message, ToolMessage, AssistantMessageWithToolCalls } from "@/app/lib/types/types";
|
||||
|
|
@ -101,7 +101,10 @@ function InternalAssistantMessage({ content, sender, latency, delta, showJsonMod
|
|||
/>
|
||||
)}
|
||||
</div>
|
||||
<div className="bg-gray-50 dark:bg-zinc-800 px-4 py-2.5 rounded-2xl rounded-bl-lg text-sm leading-relaxed text-gray-700 dark:text-gray-200 border-none shadow-sm animate-slideUpAndFade flex flex-col items-stretch">
|
||||
<div className="bg-purple-50 dark:bg-purple-900/30 px-4 py-2.5
|
||||
rounded-2xl rounded-bl-lg text-sm leading-relaxed
|
||||
text-gray-800 dark:text-purple-100
|
||||
border-none shadow-sm animate-slideUpAndFade">
|
||||
<div className="text-left mb-2">
|
||||
{isJsonContent && jsonMode && (
|
||||
<div className="mb-2 flex gap-4">
|
||||
|
|
@ -146,7 +149,8 @@ function AssistantMessage({
|
|||
onExplain,
|
||||
showDebugMessages,
|
||||
isFirstAssistant,
|
||||
index
|
||||
index,
|
||||
imagePreviews,
|
||||
}: {
|
||||
content: string,
|
||||
sender: string | null | undefined,
|
||||
|
|
@ -155,7 +159,8 @@ function AssistantMessage({
|
|||
onExplain?: (type: 'assistant', message: string, index: number) => void,
|
||||
showDebugMessages?: boolean,
|
||||
isFirstAssistant?: boolean,
|
||||
index: number
|
||||
index: number,
|
||||
imagePreviews?: { mimeType: string; url?: string; dataBase64?: string; truncated?: boolean }[],
|
||||
}) {
|
||||
return (
|
||||
<div className="self-start flex flex-col gap-1 my-5">
|
||||
|
|
@ -174,14 +179,42 @@ function AssistantMessage({
|
|||
/>
|
||||
)}
|
||||
</div>
|
||||
<div className="bg-purple-50 dark:bg-purple-900/30 px-4 py-2.5
|
||||
rounded-2xl rounded-bl-lg text-sm leading-relaxed
|
||||
text-gray-800 dark:text-purple-100
|
||||
border-none shadow-sm animate-slideUpAndFade">
|
||||
<div className="flex flex-col gap-1">
|
||||
<div className="text-sm leading-relaxed text-gray-800 dark:text-gray-100 animate-slideUpAndFade">
|
||||
<div className="flex flex-col gap-2">
|
||||
<div className="text-left">
|
||||
<MarkdownContent content={content} />
|
||||
</div>
|
||||
{Array.isArray(imagePreviews) && imagePreviews.length > 0 && (
|
||||
<div className="flex flex-wrap gap-3">
|
||||
{imagePreviews.map((img, i) => {
|
||||
const src = img.url ? img.url : `data:${img.mimeType};base64,${img.dataBase64}`;
|
||||
const ext = img.mimeType === 'image/jpeg' ? 'jpg' : (img.mimeType === 'image/webp' ? 'webp' : 'png');
|
||||
const filename = `generated_image_${i + 1}.${ext}`;
|
||||
return (
|
||||
<div key={i} className="group relative rounded-lg p-2 bg-white dark:bg-zinc-900">
|
||||
<a
|
||||
href={src}
|
||||
download={filename}
|
||||
className="absolute bottom-3 right-3 opacity-0 group-hover:opacity-100 transition-opacity bg-white/80 dark:bg-zinc-900/80 rounded-md p-1 shadow hover:bg-white dark:hover:bg-zinc-800"
|
||||
aria-label="Download image"
|
||||
>
|
||||
<DownloadIcon size={16} className="text-gray-700 dark:text-gray-200" />
|
||||
</a>
|
||||
<img
|
||||
src={src}
|
||||
alt={`Image ${i+1}`}
|
||||
className="max-h-80 max-w-full object-contain rounded"
|
||||
/>
|
||||
{img.truncated && (
|
||||
<div className="text-[11px] text-amber-600 dark:text-amber-400 mt-1">
|
||||
Preview truncated to meet size limits.
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
{latency > 0 && <div className="text-right text-xs text-gray-400 dark:text-gray-500 mt-1">
|
||||
{Math.round(latency / 1000)}s
|
||||
</div>}
|
||||
|
|
@ -196,10 +229,11 @@ function AssistantMessageLoading() {
|
|||
return (
|
||||
<div className="self-start flex flex-col gap-1 my-5">
|
||||
<div className="max-w-[85%] inline-block">
|
||||
<div className="bg-purple-50 dark:bg-purple-900/30 px-4 py-2.5
|
||||
rounded-2xl rounded-bl-lg
|
||||
border-none shadow-sm animate-slideUpAndFade min-h-[2.5rem] flex items-center">
|
||||
<Spinner size="sm" className="ml-2" />
|
||||
<div className="bg-gray-50 dark:bg-gray-800 px-4 py-2.5
|
||||
rounded-lg border border-gray-200 dark:border-gray-700
|
||||
shadow-sm animate-slideUpAndFade min-h-[2.5rem] flex items-center gap-2">
|
||||
<Spinner size="sm" />
|
||||
<span className="text-sm text-gray-600 dark:text-gray-400">Generating...</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -241,6 +275,7 @@ function ToolCalls({
|
|||
result={results[toolCall.id]}
|
||||
sender={sender}
|
||||
workflow={workflow}
|
||||
messages={messages}
|
||||
delta={delta}
|
||||
onFix={onFix}
|
||||
onExplain={onExplain}
|
||||
|
|
@ -258,6 +293,7 @@ function ToolCall({
|
|||
result,
|
||||
sender,
|
||||
workflow,
|
||||
messages,
|
||||
delta,
|
||||
onFix,
|
||||
onExplain,
|
||||
|
|
@ -270,6 +306,7 @@ function ToolCall({
|
|||
result: z.infer<typeof ToolMessage> | undefined;
|
||||
sender: string | null | undefined;
|
||||
workflow: z.infer<typeof Workflow>;
|
||||
messages: z.infer<typeof Message>[];
|
||||
delta: number;
|
||||
onFix?: (message: string, index: number) => void;
|
||||
onExplain?: (type: 'tool' | 'transition', message: string, index: number) => void;
|
||||
|
|
@ -297,9 +334,17 @@ function ToolCall({
|
|||
toolCallIndex={toolCallIndex}
|
||||
/>;
|
||||
}
|
||||
// Prefer the ToolMessage that actually follows this tool call in the stream
|
||||
let nearestResult: z.infer<typeof ToolMessage> | undefined = result;
|
||||
for (let i = parentIndex; i < messages.length; i++) {
|
||||
const m = messages[i] as any;
|
||||
if (i > parentIndex && m.role === 'assistant') break; // stop at next assistant
|
||||
if (m.role === 'tool' && m.toolCallId === toolCall.id) { nearestResult = m as any; break; }
|
||||
}
|
||||
|
||||
return <ClientToolCall
|
||||
toolCall={toolCall}
|
||||
result={result}
|
||||
result={nearestResult}
|
||||
sender={sender ?? ''}
|
||||
workflow={workflow}
|
||||
delta={delta}
|
||||
|
|
@ -388,6 +433,26 @@ function ClientToolCall({
|
|||
const hasExpandedContent = paramsExpanded || resultsExpanded;
|
||||
const isCompressed = !paramsExpanded && !resultsExpanded;
|
||||
|
||||
// Try to parse tool result as JSON and extract images
|
||||
let parsedResult: any = undefined;
|
||||
let imagePreviews: { mimeType: string; dataBase64?: string; url?: string; truncated?: boolean }[] = [];
|
||||
if (availableResult && typeof availableResult.content === 'string') {
|
||||
try {
|
||||
parsedResult = JSON.parse(availableResult.content);
|
||||
const imgs = Array.isArray(parsedResult?.images) ? parsedResult.images : [];
|
||||
imagePreviews = imgs
|
||||
.filter((img: any) => (typeof img?.dataBase64 === 'string' && img.dataBase64.length > 0) || typeof img?.url === 'string')
|
||||
.map((img: any) => ({
|
||||
mimeType: img?.mimeType || 'image/png',
|
||||
dataBase64: typeof img?.dataBase64 === 'string' ? img.dataBase64 : undefined,
|
||||
url: typeof img?.url === 'string' ? img.url : undefined,
|
||||
truncated: Boolean(img?.truncated),
|
||||
}));
|
||||
} catch (_) {
|
||||
// ignore parse errors; treat as non-JSON result
|
||||
}
|
||||
}
|
||||
|
||||
// Compressed state: stretch header, no wrapping
|
||||
if (isCompressed) {
|
||||
return (
|
||||
|
|
@ -444,7 +509,38 @@ function ClientToolCall({
|
|||
onExpandedChange={setParamsExpanded}
|
||||
/>
|
||||
{availableResult && (
|
||||
<div className={(paramsExpanded ? 'mt-4 ' : '') + 'flex flex-col gap-2 min-w-0'}>
|
||||
<div className={(paramsExpanded ? 'mt-4 ' : '') + 'flex flex-col gap-3 min-w-0'}>
|
||||
{imagePreviews.length > 0 && (
|
||||
<div className="flex flex-wrap gap-3">
|
||||
{imagePreviews.map((img, i) => {
|
||||
const src = img.url ? img.url : `data:${img.mimeType};base64,${img.dataBase64}`;
|
||||
const ext = img.mimeType === 'image/jpeg' ? 'jpg' : (img.mimeType === 'image/webp' ? 'webp' : 'png');
|
||||
const filename = `generated_image_${i + 1}.${ext}`;
|
||||
return (
|
||||
<div key={i} className="group relative rounded-lg p-2 bg-white dark:bg-zinc-900">
|
||||
<a
|
||||
href={src}
|
||||
download={filename}
|
||||
className="absolute bottom-3 right-3 opacity-0 group-hover:opacity-100 transition-opacity bg-white/80 dark:bg-zinc-900/80 rounded-md p-1 shadow hover:bg-white dark:hover:bg-zinc-800"
|
||||
aria-label="Download image"
|
||||
>
|
||||
<DownloadIcon size={16} className="text-gray-700 dark:text-gray-200" />
|
||||
</a>
|
||||
<img
|
||||
src={src}
|
||||
alt={`Tool image ${i+1}`}
|
||||
className="max-h-64 max-w-full object-contain rounded"
|
||||
/>
|
||||
{img.truncated && (
|
||||
<div className="text-[11px] text-amber-600 dark:text-amber-400 mt-1">
|
||||
Preview truncated to meet size limits.
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
<ExpandableContent
|
||||
label="Result"
|
||||
content={availableResult.content}
|
||||
|
|
@ -517,7 +613,25 @@ function ClientToolCall({
|
|||
onExpandedChange={setParamsExpanded}
|
||||
/>
|
||||
{availableResult && (
|
||||
<div className={(paramsExpanded ? 'mt-4 ' : '') + 'flex flex-col gap-2 w-full'}>
|
||||
<div className={(paramsExpanded ? 'mt-4 ' : '') + 'flex flex-col gap-3 w-full'}>
|
||||
{imagePreviews.length > 0 && (
|
||||
<div className="flex flex-wrap gap-3">
|
||||
{imagePreviews.map((img, i) => (
|
||||
<div key={i} className="rounded-lg border border-gray-200 dark:border-gray-700 p-2 bg-white dark:bg-zinc-900">
|
||||
<img
|
||||
src={img.url ? img.url : `data:${img.mimeType};base64,${img.dataBase64}`}
|
||||
alt={`Tool image ${i+1}`}
|
||||
className="max-h-64 max-w-full object-contain rounded"
|
||||
/>
|
||||
{img.truncated && (
|
||||
<div className="text-[11px] text-amber-600 dark:text-amber-400 mt-1">
|
||||
Preview truncated to meet size limits.
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
<ExpandableContent
|
||||
label="Result"
|
||||
content={availableResult.content}
|
||||
|
|
@ -757,6 +871,36 @@ export function Messages({
|
|||
}
|
||||
|
||||
// Finally, regular assistant messages
|
||||
// Attach images from the nearest preceding tool call and its corresponding tool result message
|
||||
const previews: { mimeType: string; url?: string; dataBase64?: string; truncated?: boolean }[] = [];
|
||||
for (let i = index - 1; i >= 0; i--) {
|
||||
const prev = messages[i] as any;
|
||||
if (prev && prev.role === 'assistant' && Array.isArray(prev.toolCalls)) {
|
||||
for (const tc of prev.toolCalls) {
|
||||
// Find the nearest tool result message after 'i' and before next assistant
|
||||
let resMsg: any = null;
|
||||
for (let j = i + 1; j < messages.length; j++) {
|
||||
const m = messages[j] as any;
|
||||
if (m.role === 'assistant') break; // stop at next assistant
|
||||
if (m.role === 'tool' && m.toolCallId === tc.id) { resMsg = m; break; }
|
||||
}
|
||||
if (!resMsg || typeof resMsg.content !== 'string') continue;
|
||||
try {
|
||||
const parsed = JSON.parse(resMsg.content);
|
||||
const imgs = Array.isArray(parsed?.images) ? parsed.images : [];
|
||||
for (const img of imgs) {
|
||||
if (typeof img?.url === 'string') {
|
||||
previews.push({ mimeType: img?.mimeType || 'image/png', url: img.url, truncated: Boolean(img?.truncated) });
|
||||
} else if (typeof img?.dataBase64 === 'string' && img.dataBase64.length > 0) {
|
||||
previews.push({ mimeType: img?.mimeType || 'image/png', dataBase64: img.dataBase64, truncated: Boolean(img?.truncated) });
|
||||
}
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
if (previews.length > 0) break; // attach only the latest batch
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<AssistantMessage
|
||||
content={message.content ?? ''}
|
||||
|
|
@ -767,6 +911,7 @@ export function Messages({
|
|||
showDebugMessages={showDebugMessages}
|
||||
isFirstAssistant={isFirstAssistant}
|
||||
index={index}
|
||||
imagePreviews={previews}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
|
@ -810,4 +955,4 @@ export function Messages({
|
|||
|
||||
// Add a utility class for icon-with-label-on-hover
|
||||
const iconWithLabelClass = "group relative flex items-center gap-1 text-xs cursor-pointer hover:underline";
|
||||
const iconLabelClass = "absolute left-full ml-2 px-2 py-1 rounded bg-zinc-800 text-white text-xs opacity-0 group-hover:opacity-100 pointer-events-none whitespace-nowrap z-10";
|
||||
const iconLabelClass = "absolute left-full ml-2 px-2 py-1 rounded bg-zinc-800 text-white text-xs opacity-0 group-hover:opacity-100 pointer-events-none whitespace-nowrap z-10";
|
||||
|
|
|
|||
|
|
@ -6,6 +6,9 @@ import { z } from "zod";
|
|||
import { composio } from "@/src/application/lib/composio/composio";
|
||||
import { SignJWT } from "jose";
|
||||
import crypto from "crypto";
|
||||
import { GoogleGenerativeAI } from "@google/generative-ai";
|
||||
import { tempBinaryCache } from "@/src/application/services/temp-binary-cache";
|
||||
import { S3Client, PutObjectCommand } from "@aws-sdk/client-s3";
|
||||
|
||||
// Internal dependencies
|
||||
import { embeddingModel } from "@/app/lib/embedding";
|
||||
|
|
@ -31,6 +34,87 @@ const openai = createOpenAI({
|
|||
baseURL: PROVIDER_BASE_URL,
|
||||
});
|
||||
|
||||
// Image generation (Gemini) defaults
|
||||
const DEFAULT_IMAGE_MODEL = "gemini-2.5-flash-image-preview";
|
||||
|
||||
// Helper to generate an image using Gemini
|
||||
export async function invokeGenerateImageTool(
|
||||
logger: PrefixLogger,
|
||||
usageTracker: UsageTracker,
|
||||
prompt: string,
|
||||
options?: {
|
||||
modelName?: string;
|
||||
}
|
||||
): Promise<{
|
||||
texts: string[];
|
||||
images: { mimeType: string; bytes: number; dataBase64: string }[];
|
||||
model: string;
|
||||
}> {
|
||||
const log = logger.child("invokeGenerateImageTool");
|
||||
const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || "";
|
||||
if (!apiKey) {
|
||||
throw new Error("Missing API key. Set GOOGLE_API_KEY or GEMINI_API_KEY.");
|
||||
}
|
||||
|
||||
const modelName = options?.modelName || DEFAULT_IMAGE_MODEL;
|
||||
|
||||
const client = new GoogleGenerativeAI(apiKey);
|
||||
const model = client.getGenerativeModel({ model: modelName });
|
||||
|
||||
log.log(`Generating image with model: ${modelName}`);
|
||||
const result = await model.generateContent(prompt);
|
||||
const response = result.response as any;
|
||||
|
||||
// Track usage if available
|
||||
try {
|
||||
const inputTokens = response?.usageMetadata?.promptTokenCount || 0;
|
||||
const outputTokens = response?.usageMetadata?.candidatesTokenCount || 0;
|
||||
usageTracker.track({
|
||||
type: "LLM_USAGE",
|
||||
modelName: modelName,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
context: "agents_runtime.gemini_image_generation",
|
||||
});
|
||||
} catch (_) {
|
||||
// ignore usage tracking errors
|
||||
}
|
||||
|
||||
const candidates = (response?.candidates ?? []) as any[];
|
||||
if (!candidates.length) {
|
||||
throw new Error("No candidates returned in response.");
|
||||
}
|
||||
|
||||
const parts = (candidates[0]?.content?.parts ?? []) as any[];
|
||||
if (!parts.length) {
|
||||
throw new Error("No parts in candidate content.");
|
||||
}
|
||||
|
||||
const texts: string[] = [];
|
||||
const images: { mimeType: string; bytes: number; dataBase64: string }[] = [];
|
||||
|
||||
for (const part of parts) {
|
||||
if (typeof part.text === "string" && part.text.length) {
|
||||
texts.push(part.text);
|
||||
continue;
|
||||
}
|
||||
|
||||
const dataB64 = part?.inlineData?.data as string | undefined;
|
||||
if (dataB64) {
|
||||
const mime = part?.inlineData?.mimeType || "image/png";
|
||||
const buf = Buffer.from(dataB64, "base64");
|
||||
|
||||
images.push({ mimeType: mime, bytes: buf.length, dataBase64: dataB64 });
|
||||
}
|
||||
}
|
||||
|
||||
if (!images.length) {
|
||||
log.log("No image part found in response.");
|
||||
}
|
||||
|
||||
return { texts, images, model: modelName };
|
||||
}
|
||||
|
||||
// Helper to handle mock tool responses
|
||||
export async function invokeMockTool(
|
||||
logger: PrefixLogger,
|
||||
|
|
@ -528,6 +612,108 @@ export function createComposioTool(
|
|||
});
|
||||
}
|
||||
|
||||
// Helper to create a Gemini image generation tool
|
||||
export function createGenerateImageTool(
|
||||
logger: PrefixLogger,
|
||||
usageTracker: UsageTracker,
|
||||
config: z.infer<typeof WorkflowTool>,
|
||||
projectId: string,
|
||||
): Tool {
|
||||
const { name, description, parameters } = config;
|
||||
|
||||
return tool({
|
||||
name,
|
||||
description,
|
||||
strict: false,
|
||||
parameters: {
|
||||
type: 'object',
|
||||
properties: parameters.properties,
|
||||
required: parameters.required || [],
|
||||
additionalProperties: true,
|
||||
},
|
||||
async execute(input: any) {
|
||||
try {
|
||||
const prompt: string = input?.prompt || '';
|
||||
if (!prompt) {
|
||||
return JSON.stringify({ error: "Missing required field: prompt" });
|
||||
}
|
||||
const modelName: string | undefined = input?.modelName;
|
||||
const result = await invokeGenerateImageTool(
|
||||
logger,
|
||||
usageTracker,
|
||||
prompt,
|
||||
{ modelName }
|
||||
);
|
||||
// If S3 bucket configured, store in S3 under generated_images/<c>/<d>/<filename>
|
||||
const s3Bucket = process.env.UPLOADS_S3_BUCKET || '';
|
||||
if (s3Bucket) {
|
||||
const s3Region = process.env.UPLOADS_AWS_REGION || 'us-east-1';
|
||||
const s3 = new S3Client({
|
||||
region: s3Region,
|
||||
credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? {
|
||||
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
|
||||
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
|
||||
} as any : undefined,
|
||||
});
|
||||
|
||||
const images = await Promise.all(result.images.map(async (img) => {
|
||||
const buf = Buffer.from(img.dataBase64, 'base64');
|
||||
const ext = img.mimeType === 'image/jpeg' ? '.jpg' : img.mimeType === 'image/webp' ? '.webp' : '.png';
|
||||
const base = `${projectId}-${Math.floor(Math.random() * 1e12).toString(36)}`;
|
||||
const last2 = base.slice(-2).padStart(2, '0');
|
||||
const dirA = last2.charAt(0);
|
||||
const dirB = last2.charAt(1);
|
||||
const filename = `${base}${ext}`;
|
||||
const key = `generated_images/${dirA}/${dirB}/${filename}`;
|
||||
await s3.send(new PutObjectCommand({
|
||||
Bucket: s3Bucket,
|
||||
Key: key,
|
||||
Body: buf,
|
||||
ContentType: img.mimeType,
|
||||
}));
|
||||
const url = `/api/generated-images/${dirA}/${dirB}/${filename}`;
|
||||
return { mimeType: img.mimeType, bytes: buf.length, url };
|
||||
}));
|
||||
const payload = {
|
||||
model: result.model,
|
||||
texts: result.texts,
|
||||
images,
|
||||
storage: 's3',
|
||||
} as any;
|
||||
return JSON.stringify(payload);
|
||||
}
|
||||
|
||||
// Otherwise, use in-memory temp cache URLs
|
||||
const ttlSec = 10 * 60; // 10 minutes
|
||||
const ttlMs = ttlSec * 1000;
|
||||
const images = result.images.map(img => {
|
||||
try {
|
||||
const buf = Buffer.from(img.dataBase64, 'base64');
|
||||
const id = tempBinaryCache.put(buf, img.mimeType, ttlMs);
|
||||
const url = `/api/tmp-images/${id}`;
|
||||
return { mimeType: img.mimeType, bytes: buf.length, url };
|
||||
} catch {
|
||||
return { mimeType: img.mimeType, bytes: img.bytes, url: null };
|
||||
}
|
||||
});
|
||||
const payload = {
|
||||
model: result.model,
|
||||
texts: result.texts,
|
||||
images,
|
||||
storage: 'temp',
|
||||
expiresInSec: ttlSec,
|
||||
} as any;
|
||||
return JSON.stringify(payload);
|
||||
} catch (error) {
|
||||
logger.log(`Error executing generate image tool ${name}:`, error);
|
||||
return JSON.stringify({
|
||||
error: "Tool execution failed!",
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
export function createTools(
|
||||
logger: PrefixLogger,
|
||||
usageTracker: UsageTracker,
|
||||
|
|
@ -541,7 +727,7 @@ export function createTools(
|
|||
toolLogger.log(`=== CREATING ${Object.keys(toolConfig).length} TOOLS ===`);
|
||||
|
||||
for (const [toolName, config] of Object.entries(toolConfig)) {
|
||||
toolLogger.log(`creating tool: ${toolName} (type: ${config.mockTool ? 'mock' : config.isMcp ? 'mcp' : config.isComposio ? 'composio' : 'webhook'})`);
|
||||
toolLogger.log(`creating tool: ${toolName} (type: ${config.mockTool ? 'mock' : config.isMcp ? 'mcp' : config.isComposio ? 'composio' : config.isGeminiImage ? 'gemini-image' : 'webhook'})`);
|
||||
|
||||
if (config.mockTool) {
|
||||
tools[toolName] = createMockTool(logger, usageTracker, config);
|
||||
|
|
@ -552,6 +738,9 @@ export function createTools(
|
|||
} else if (config.isComposio) {
|
||||
tools[toolName] = createComposioTool(logger, usageTracker, config, projectId);
|
||||
toolLogger.log(`✓ created composio tool: ${toolName}`);
|
||||
} else if (config.isGeminiImage) {
|
||||
tools[toolName] = createGenerateImageTool(logger, usageTracker, config, projectId);
|
||||
toolLogger.log(`✓ created gemini image tool: ${toolName}`);
|
||||
} else if (config.isWebhook) {
|
||||
tools[toolName] = createWebhookTool(logger, usageTracker, config, projectId);
|
||||
toolLogger.log(`✓ created webhook tool: ${toolName} (fallback)`);
|
||||
|
|
@ -563,4 +752,4 @@ export function createTools(
|
|||
|
||||
toolLogger.log(`=== TOOL CREATION COMPLETE ===`);
|
||||
return tools;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
47
apps/rowboat/src/application/services/temp-binary-cache.ts
Normal file
47
apps/rowboat/src/application/services/temp-binary-cache.ts
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
import crypto from 'crypto';
|
||||
|
||||
type Entry = {
|
||||
buf: Buffer;
|
||||
mimeType: string;
|
||||
expiresAt: number; // epoch ms
|
||||
};
|
||||
|
||||
class TempBinaryCache {
|
||||
private store = new Map<string, Entry>();
|
||||
private cleanupInterval: NodeJS.Timeout | null = null;
|
||||
|
||||
constructor() {
|
||||
this.startCleanup();
|
||||
}
|
||||
|
||||
private startCleanup() {
|
||||
if (this.cleanupInterval) return;
|
||||
this.cleanupInterval = setInterval(() => {
|
||||
const now = Date.now();
|
||||
for (const [id, entry] of this.store.entries()) {
|
||||
if (entry.expiresAt <= now) this.store.delete(id);
|
||||
}
|
||||
}, 60_000); // every minute
|
||||
if (this.cleanupInterval.unref) this.cleanupInterval.unref();
|
||||
}
|
||||
|
||||
put(buf: Buffer, mimeType: string, ttlMs: number = 10 * 60 * 1000): string {
|
||||
const id = crypto.randomUUID();
|
||||
const expiresAt = Date.now() + ttlMs;
|
||||
this.store.set(id, { buf, mimeType, expiresAt });
|
||||
return id;
|
||||
}
|
||||
|
||||
get(id: string): { buf: Buffer; mimeType: string } | undefined {
|
||||
const entry = this.store.get(id);
|
||||
if (!entry) return undefined;
|
||||
if (entry.expiresAt <= Date.now()) {
|
||||
this.store.delete(id);
|
||||
return undefined;
|
||||
}
|
||||
return { buf: entry.buf, mimeType: entry.mimeType };
|
||||
}
|
||||
}
|
||||
|
||||
export const tempBinaryCache = new TempBinaryCache();
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue