mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-04-25 00:16:29 +02:00
make image description a server action
This commit is contained in:
parent
9b53e4d880
commit
2ef59f414d
3 changed files with 97 additions and 108 deletions
|
|
@ -4,6 +4,9 @@ import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
|
|||
import crypto from 'crypto';
|
||||
import { authCheck } from '@/app/actions/auth.actions';
|
||||
import { USE_AUTH } from '@/app/lib/feature_flags';
|
||||
import { GoogleGenerativeAI } from '@google/generative-ai';
|
||||
import { UsageTracker } from '@/app/lib/billing';
|
||||
import { logUsage } from '@/app/actions/billing.actions';
|
||||
|
||||
export async function getUploadUrlForImage(mimeType: string): Promise<{ id: string; key: string; uploadUrl: string; url: string; mimeType: string }> {
|
||||
// Enforce auth in server action context (supports guest mode when auth disabled)
|
||||
|
|
@ -46,3 +49,93 @@ export async function getUploadUrlForImage(mimeType: string): Promise<{ id: stri
|
|||
|
||||
return { id: idWithExt, key, uploadUrl, url: `/api/uploaded-images/${idWithExt}`, mimeType };
|
||||
}
|
||||
|
||||
export async function describeUploadedImage(id: string): Promise<{ id: string; description: string | null }> {
|
||||
if (USE_AUTH) {
|
||||
await authCheck();
|
||||
}
|
||||
|
||||
if (!id || typeof id !== 'string') {
|
||||
throw new Error('id is required');
|
||||
}
|
||||
|
||||
const bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
|
||||
if (!bucket) {
|
||||
throw new Error('S3 bucket not configured');
|
||||
}
|
||||
|
||||
const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1';
|
||||
const s3 = new S3Client({
|
||||
region,
|
||||
credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? {
|
||||
accessKeyId: process.env.AWS_ACCESS_KEY_ID as string,
|
||||
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY as string,
|
||||
} : undefined,
|
||||
});
|
||||
|
||||
const lastDot = id.lastIndexOf('.');
|
||||
const idWithoutExt = lastDot > 0 ? id.slice(0, lastDot) : id;
|
||||
const last2 = idWithoutExt.slice(-2).padStart(2, '0');
|
||||
const dirA = last2.charAt(0);
|
||||
const dirB = last2.charAt(1);
|
||||
const key = `uploaded_images/${dirA}/${dirB}/${id}`;
|
||||
|
||||
// Fetch object bytes from S3
|
||||
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
||||
const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
||||
const contentType = resp.ContentType || 'application/octet-stream';
|
||||
const body = resp.Body as any;
|
||||
const chunks: Uint8Array[] = [];
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
body.on('data', (c: Uint8Array) => chunks.push(c));
|
||||
body.on('end', () => resolve());
|
||||
body.on('error', reject);
|
||||
});
|
||||
const buf = Buffer.concat(chunks);
|
||||
|
||||
let descriptionMarkdown: string | null = null;
|
||||
const usageTracker = new UsageTracker();
|
||||
try {
|
||||
const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || '';
|
||||
if (apiKey) {
|
||||
const genAI = new GoogleGenerativeAI(apiKey);
|
||||
const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
|
||||
const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.';
|
||||
const result = await model.generateContent([
|
||||
{ inlineData: { data: buf.toString('base64'), mimeType: contentType } },
|
||||
prompt,
|
||||
]);
|
||||
const response: any = result.response as any;
|
||||
descriptionMarkdown = response?.text?.() || null;
|
||||
|
||||
// Track usage
|
||||
try {
|
||||
const inputTokens = response?.usageMetadata?.promptTokenCount || 0;
|
||||
const outputTokens = response?.usageMetadata?.candidatesTokenCount || 0;
|
||||
usageTracker.track({
|
||||
type: 'LLM_USAGE',
|
||||
modelName: 'gemini-2.5-flash',
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
context: 'uploaded_images.describe',
|
||||
});
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('Gemini description failed', e);
|
||||
}
|
||||
|
||||
// Log usage to billing
|
||||
try {
|
||||
const items = usageTracker.flush();
|
||||
if (items.length > 0) {
|
||||
await logUsage({ items });
|
||||
}
|
||||
} catch {
|
||||
// ignore billing logging errors
|
||||
}
|
||||
|
||||
return { id, description: descriptionMarkdown };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,99 +0,0 @@
|
|||
import { NextRequest, NextResponse } from 'next/server';
|
||||
import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3';
|
||||
import { GoogleGenerativeAI } from '@google/generative-ai';
|
||||
import { UsageTracker } from '@/app/lib/billing';
|
||||
import { logUsage } from '@/app/actions/billing.actions';
|
||||
import { requireAuth } from '@/app/lib/auth';
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
// Require authentication (handles guest mode internally when auth disabled)
|
||||
await requireAuth();
|
||||
|
||||
const { id } = await request.json();
|
||||
if (!id || typeof id !== 'string') {
|
||||
return NextResponse.json({ error: 'id is required' }, { status: 400 });
|
||||
}
|
||||
|
||||
const bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
|
||||
if (!bucket) {
|
||||
return NextResponse.json({ error: 'S3 bucket not configured' }, { status: 500 });
|
||||
}
|
||||
|
||||
const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1';
|
||||
const s3 = new S3Client({
|
||||
region,
|
||||
credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? {
|
||||
accessKeyId: process.env.AWS_ACCESS_KEY_ID as string,
|
||||
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY as string,
|
||||
} : undefined,
|
||||
});
|
||||
|
||||
// `id` includes extension (e.g., "<uuid>.png"). Shard using the UUID part.
|
||||
const lastDot = id.lastIndexOf('.');
|
||||
const idWithoutExt = lastDot > 0 ? id.slice(0, lastDot) : id;
|
||||
const last2 = idWithoutExt.slice(-2).padStart(2, '0');
|
||||
const dirA = last2.charAt(0);
|
||||
const dirB = last2.charAt(1);
|
||||
const key = `uploaded_images/${dirA}/${dirB}/${id}`;
|
||||
const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
||||
const contentType = resp.ContentType || 'application/octet-stream';
|
||||
const body = resp.Body as any;
|
||||
const chunks: Uint8Array[] = [];
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
body.on('data', (c: Uint8Array) => chunks.push(c));
|
||||
body.on('end', () => resolve());
|
||||
body.on('error', reject);
|
||||
});
|
||||
const buf = Buffer.concat(chunks);
|
||||
|
||||
let descriptionMarkdown: string | null = null;
|
||||
const usageTracker = new UsageTracker();
|
||||
try {
|
||||
const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || '';
|
||||
if (apiKey) {
|
||||
const genAI = new GoogleGenerativeAI(apiKey);
|
||||
const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
|
||||
const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.';
|
||||
const result = await model.generateContent([
|
||||
{ inlineData: { data: buf.toString('base64'), mimeType: contentType } },
|
||||
prompt,
|
||||
]);
|
||||
const response: any = result.response as any;
|
||||
descriptionMarkdown = response?.text?.() || null;
|
||||
|
||||
// Track usage similar to agents-runtime
|
||||
try {
|
||||
const inputTokens = response?.usageMetadata?.promptTokenCount || 0;
|
||||
const outputTokens = response?.usageMetadata?.candidatesTokenCount || 0;
|
||||
usageTracker.track({
|
||||
type: 'LLM_USAGE',
|
||||
modelName: 'gemini-2.5-flash',
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
context: 'uploaded_images.describe',
|
||||
});
|
||||
} catch (_) {
|
||||
// ignore usage tracking errors
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('Gemini description failed', e);
|
||||
}
|
||||
|
||||
// Log usage to billing if available
|
||||
try {
|
||||
const items = usageTracker.flush();
|
||||
if (items.length > 0) {
|
||||
await logUsage({ items });
|
||||
}
|
||||
} catch (_) {
|
||||
// ignore billing logging errors
|
||||
}
|
||||
|
||||
return NextResponse.json({ id, description: descriptionMarkdown });
|
||||
} catch (e) {
|
||||
console.error('describe error', e);
|
||||
return NextResponse.json({ error: 'Failed to describe' }, { status: 500 });
|
||||
}
|
||||
}
|
||||
|
|
@ -112,19 +112,14 @@ export function ComposeBoxPlayground({
|
|||
}
|
||||
|
||||
// 4) Ask server to generate description from S3 image
|
||||
const descRes = await fetch('/api/uploaded-images/describe', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ id: imageId }),
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (descRes.ok) {
|
||||
const descData = await descRes.json();
|
||||
try {
|
||||
const { describeUploadedImage } = await import('@/app/actions/uploaded-images.actions');
|
||||
const descData = await describeUploadedImage(imageId);
|
||||
const description: string | null = descData?.description ?? null;
|
||||
if (uploadAbortRef.current === controller) {
|
||||
setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description });
|
||||
}
|
||||
} else {
|
||||
} catch {
|
||||
// If description fails, still allow sending
|
||||
if (uploadAbortRef.current === controller) {
|
||||
setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description: null });
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue