mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-06-30 20:39:46 +02:00
make image description a server action
This commit is contained in:
parent
9b53e4d880
commit
2ef59f414d
3 changed files with 97 additions and 108 deletions
|
|
@ -4,6 +4,9 @@ import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
|
||||||
import crypto from 'crypto';
|
import crypto from 'crypto';
|
||||||
import { authCheck } from '@/app/actions/auth.actions';
|
import { authCheck } from '@/app/actions/auth.actions';
|
||||||
import { USE_AUTH } from '@/app/lib/feature_flags';
|
import { USE_AUTH } from '@/app/lib/feature_flags';
|
||||||
|
import { GoogleGenerativeAI } from '@google/generative-ai';
|
||||||
|
import { UsageTracker } from '@/app/lib/billing';
|
||||||
|
import { logUsage } from '@/app/actions/billing.actions';
|
||||||
|
|
||||||
export async function getUploadUrlForImage(mimeType: string): Promise<{ id: string; key: string; uploadUrl: string; url: string; mimeType: string }> {
|
export async function getUploadUrlForImage(mimeType: string): Promise<{ id: string; key: string; uploadUrl: string; url: string; mimeType: string }> {
|
||||||
// Enforce auth in server action context (supports guest mode when auth disabled)
|
// Enforce auth in server action context (supports guest mode when auth disabled)
|
||||||
|
|
@ -46,3 +49,93 @@ export async function getUploadUrlForImage(mimeType: string): Promise<{ id: stri
|
||||||
|
|
||||||
return { id: idWithExt, key, uploadUrl, url: `/api/uploaded-images/${idWithExt}`, mimeType };
|
return { id: idWithExt, key, uploadUrl, url: `/api/uploaded-images/${idWithExt}`, mimeType };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function describeUploadedImage(id: string): Promise<{ id: string; description: string | null }> {
|
||||||
|
if (USE_AUTH) {
|
||||||
|
await authCheck();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!id || typeof id !== 'string') {
|
||||||
|
throw new Error('id is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
const bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
|
||||||
|
if (!bucket) {
|
||||||
|
throw new Error('S3 bucket not configured');
|
||||||
|
}
|
||||||
|
|
||||||
|
const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1';
|
||||||
|
const s3 = new S3Client({
|
||||||
|
region,
|
||||||
|
credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? {
|
||||||
|
accessKeyId: process.env.AWS_ACCESS_KEY_ID as string,
|
||||||
|
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY as string,
|
||||||
|
} : undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
const lastDot = id.lastIndexOf('.');
|
||||||
|
const idWithoutExt = lastDot > 0 ? id.slice(0, lastDot) : id;
|
||||||
|
const last2 = idWithoutExt.slice(-2).padStart(2, '0');
|
||||||
|
const dirA = last2.charAt(0);
|
||||||
|
const dirB = last2.charAt(1);
|
||||||
|
const key = `uploaded_images/${dirA}/${dirB}/${id}`;
|
||||||
|
|
||||||
|
// Fetch object bytes from S3
|
||||||
|
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
||||||
|
const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
||||||
|
const contentType = resp.ContentType || 'application/octet-stream';
|
||||||
|
const body = resp.Body as any;
|
||||||
|
const chunks: Uint8Array[] = [];
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
body.on('data', (c: Uint8Array) => chunks.push(c));
|
||||||
|
body.on('end', () => resolve());
|
||||||
|
body.on('error', reject);
|
||||||
|
});
|
||||||
|
const buf = Buffer.concat(chunks);
|
||||||
|
|
||||||
|
let descriptionMarkdown: string | null = null;
|
||||||
|
const usageTracker = new UsageTracker();
|
||||||
|
try {
|
||||||
|
const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || '';
|
||||||
|
if (apiKey) {
|
||||||
|
const genAI = new GoogleGenerativeAI(apiKey);
|
||||||
|
const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
|
||||||
|
const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.';
|
||||||
|
const result = await model.generateContent([
|
||||||
|
{ inlineData: { data: buf.toString('base64'), mimeType: contentType } },
|
||||||
|
prompt,
|
||||||
|
]);
|
||||||
|
const response: any = result.response as any;
|
||||||
|
descriptionMarkdown = response?.text?.() || null;
|
||||||
|
|
||||||
|
// Track usage
|
||||||
|
try {
|
||||||
|
const inputTokens = response?.usageMetadata?.promptTokenCount || 0;
|
||||||
|
const outputTokens = response?.usageMetadata?.candidatesTokenCount || 0;
|
||||||
|
usageTracker.track({
|
||||||
|
type: 'LLM_USAGE',
|
||||||
|
modelName: 'gemini-2.5-flash',
|
||||||
|
inputTokens,
|
||||||
|
outputTokens,
|
||||||
|
context: 'uploaded_images.describe',
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('Gemini description failed', e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log usage to billing
|
||||||
|
try {
|
||||||
|
const items = usageTracker.flush();
|
||||||
|
if (items.length > 0) {
|
||||||
|
await logUsage({ items });
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore billing logging errors
|
||||||
|
}
|
||||||
|
|
||||||
|
return { id, description: descriptionMarkdown };
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,99 +0,0 @@
|
||||||
import { NextRequest, NextResponse } from 'next/server';
|
|
||||||
import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3';
|
|
||||||
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
||||||
import { UsageTracker } from '@/app/lib/billing';
|
|
||||||
import { logUsage } from '@/app/actions/billing.actions';
|
|
||||||
import { requireAuth } from '@/app/lib/auth';
|
|
||||||
|
|
||||||
export async function POST(request: NextRequest) {
|
|
||||||
try {
|
|
||||||
// Require authentication (handles guest mode internally when auth disabled)
|
|
||||||
await requireAuth();
|
|
||||||
|
|
||||||
const { id } = await request.json();
|
|
||||||
if (!id || typeof id !== 'string') {
|
|
||||||
return NextResponse.json({ error: 'id is required' }, { status: 400 });
|
|
||||||
}
|
|
||||||
|
|
||||||
const bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
|
|
||||||
if (!bucket) {
|
|
||||||
return NextResponse.json({ error: 'S3 bucket not configured' }, { status: 500 });
|
|
||||||
}
|
|
||||||
|
|
||||||
const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1';
|
|
||||||
const s3 = new S3Client({
|
|
||||||
region,
|
|
||||||
credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? {
|
|
||||||
accessKeyId: process.env.AWS_ACCESS_KEY_ID as string,
|
|
||||||
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY as string,
|
|
||||||
} : undefined,
|
|
||||||
});
|
|
||||||
|
|
||||||
// `id` includes extension (e.g., "<uuid>.png"). Shard using the UUID part.
|
|
||||||
const lastDot = id.lastIndexOf('.');
|
|
||||||
const idWithoutExt = lastDot > 0 ? id.slice(0, lastDot) : id;
|
|
||||||
const last2 = idWithoutExt.slice(-2).padStart(2, '0');
|
|
||||||
const dirA = last2.charAt(0);
|
|
||||||
const dirB = last2.charAt(1);
|
|
||||||
const key = `uploaded_images/${dirA}/${dirB}/${id}`;
|
|
||||||
const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
|
||||||
const contentType = resp.ContentType || 'application/octet-stream';
|
|
||||||
const body = resp.Body as any;
|
|
||||||
const chunks: Uint8Array[] = [];
|
|
||||||
await new Promise<void>((resolve, reject) => {
|
|
||||||
body.on('data', (c: Uint8Array) => chunks.push(c));
|
|
||||||
body.on('end', () => resolve());
|
|
||||||
body.on('error', reject);
|
|
||||||
});
|
|
||||||
const buf = Buffer.concat(chunks);
|
|
||||||
|
|
||||||
let descriptionMarkdown: string | null = null;
|
|
||||||
const usageTracker = new UsageTracker();
|
|
||||||
try {
|
|
||||||
const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || '';
|
|
||||||
if (apiKey) {
|
|
||||||
const genAI = new GoogleGenerativeAI(apiKey);
|
|
||||||
const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
|
|
||||||
const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.';
|
|
||||||
const result = await model.generateContent([
|
|
||||||
{ inlineData: { data: buf.toString('base64'), mimeType: contentType } },
|
|
||||||
prompt,
|
|
||||||
]);
|
|
||||||
const response: any = result.response as any;
|
|
||||||
descriptionMarkdown = response?.text?.() || null;
|
|
||||||
|
|
||||||
// Track usage similar to agents-runtime
|
|
||||||
try {
|
|
||||||
const inputTokens = response?.usageMetadata?.promptTokenCount || 0;
|
|
||||||
const outputTokens = response?.usageMetadata?.candidatesTokenCount || 0;
|
|
||||||
usageTracker.track({
|
|
||||||
type: 'LLM_USAGE',
|
|
||||||
modelName: 'gemini-2.5-flash',
|
|
||||||
inputTokens,
|
|
||||||
outputTokens,
|
|
||||||
context: 'uploaded_images.describe',
|
|
||||||
});
|
|
||||||
} catch (_) {
|
|
||||||
// ignore usage tracking errors
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.warn('Gemini description failed', e);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log usage to billing if available
|
|
||||||
try {
|
|
||||||
const items = usageTracker.flush();
|
|
||||||
if (items.length > 0) {
|
|
||||||
await logUsage({ items });
|
|
||||||
}
|
|
||||||
} catch (_) {
|
|
||||||
// ignore billing logging errors
|
|
||||||
}
|
|
||||||
|
|
||||||
return NextResponse.json({ id, description: descriptionMarkdown });
|
|
||||||
} catch (e) {
|
|
||||||
console.error('describe error', e);
|
|
||||||
return NextResponse.json({ error: 'Failed to describe' }, { status: 500 });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -112,19 +112,14 @@ export function ComposeBoxPlayground({
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4) Ask server to generate description from S3 image
|
// 4) Ask server to generate description from S3 image
|
||||||
const descRes = await fetch('/api/uploaded-images/describe', {
|
try {
|
||||||
method: 'POST',
|
const { describeUploadedImage } = await import('@/app/actions/uploaded-images.actions');
|
||||||
headers: { 'Content-Type': 'application/json' },
|
const descData = await describeUploadedImage(imageId);
|
||||||
body: JSON.stringify({ id: imageId }),
|
|
||||||
signal: controller.signal,
|
|
||||||
});
|
|
||||||
if (descRes.ok) {
|
|
||||||
const descData = await descRes.json();
|
|
||||||
const description: string | null = descData?.description ?? null;
|
const description: string | null = descData?.description ?? null;
|
||||||
if (uploadAbortRef.current === controller) {
|
if (uploadAbortRef.current === controller) {
|
||||||
setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description });
|
setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description });
|
||||||
}
|
}
|
||||||
} else {
|
} catch {
|
||||||
// If description fails, still allow sending
|
// If description fails, still allow sending
|
||||||
if (uploadAbortRef.current === controller) {
|
if (uploadAbortRef.current === controller) {
|
||||||
setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description: null });
|
setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description: null });
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue