diff --git a/apps/rowboat/app/actions/uploaded-images.actions.ts b/apps/rowboat/app/actions/uploaded-images.actions.ts index 733cb7e2..45ca6a68 100644 --- a/apps/rowboat/app/actions/uploaded-images.actions.ts +++ b/apps/rowboat/app/actions/uploaded-images.actions.ts @@ -4,6 +4,9 @@ import { getSignedUrl } from '@aws-sdk/s3-request-presigner'; import crypto from 'crypto'; import { authCheck } from '@/app/actions/auth.actions'; import { USE_AUTH } from '@/app/lib/feature_flags'; +import { GoogleGenerativeAI } from '@google/generative-ai'; +import { UsageTracker } from '@/app/lib/billing'; +import { logUsage } from '@/app/actions/billing.actions'; export async function getUploadUrlForImage(mimeType: string): Promise<{ id: string; key: string; uploadUrl: string; url: string; mimeType: string }> { // Enforce auth in server action context (supports guest mode when auth disabled) @@ -46,3 +49,93 @@ export async function getUploadUrlForImage(mimeType: string): Promise<{ id: stri return { id: idWithExt, key, uploadUrl, url: `/api/uploaded-images/${idWithExt}`, mimeType }; } + +export async function describeUploadedImage(id: string): Promise<{ id: string; description: string | null }> { + if (USE_AUTH) { + await authCheck(); + } + + if (!id || typeof id !== 'string') { + throw new Error('id is required'); + } + + const bucket = process.env.RAG_UPLOADS_S3_BUCKET || ''; + if (!bucket) { + throw new Error('S3 bucket not configured'); + } + + const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1'; + const s3 = new S3Client({ + region, + credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? { + accessKeyId: process.env.AWS_ACCESS_KEY_ID as string, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY as string, + } : undefined, + }); + + const lastDot = id.lastIndexOf('.'); + const idWithoutExt = lastDot > 0 ? id.slice(0, lastDot) : id; + const last2 = idWithoutExt.slice(-2).padStart(2, '0'); + const dirA = last2.charAt(0); + const dirB = last2.charAt(1); + const key = `uploaded_images/${dirA}/${dirB}/${id}`; + + // Fetch object bytes from S3 + const { GetObjectCommand } = await import('@aws-sdk/client-s3'); + const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key })); + const contentType = resp.ContentType || 'application/octet-stream'; + const body = resp.Body as any; + const chunks: Uint8Array[] = []; + await new Promise((resolve, reject) => { + body.on('data', (c: Uint8Array) => chunks.push(c)); + body.on('end', () => resolve()); + body.on('error', reject); + }); + const buf = Buffer.concat(chunks); + + let descriptionMarkdown: string | null = null; + const usageTracker = new UsageTracker(); + try { + const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || ''; + if (apiKey) { + const genAI = new GoogleGenerativeAI(apiKey); + const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' }); + const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.'; + const result = await model.generateContent([ + { inlineData: { data: buf.toString('base64'), mimeType: contentType } }, + prompt, + ]); + const response: any = result.response as any; + descriptionMarkdown = response?.text?.() || null; + + // Track usage + try { + const inputTokens = response?.usageMetadata?.promptTokenCount || 0; + const outputTokens = response?.usageMetadata?.candidatesTokenCount || 0; + usageTracker.track({ + type: 'LLM_USAGE', + modelName: 'gemini-2.5-flash', + inputTokens, + outputTokens, + context: 'uploaded_images.describe', + }); + } catch { + // ignore + } + } + } catch (e) { + console.warn('Gemini description failed', e); + } + + // Log usage to billing + try { + const items = usageTracker.flush(); + if (items.length > 0) { + await logUsage({ items }); + } + } catch { + // ignore billing logging errors + } + + return { id, description: descriptionMarkdown }; +} diff --git a/apps/rowboat/app/api/uploaded-images/describe/route.ts b/apps/rowboat/app/api/uploaded-images/describe/route.ts deleted file mode 100644 index 246537e5..00000000 --- a/apps/rowboat/app/api/uploaded-images/describe/route.ts +++ /dev/null @@ -1,99 +0,0 @@ -import { NextRequest, NextResponse } from 'next/server'; -import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3'; -import { GoogleGenerativeAI } from '@google/generative-ai'; -import { UsageTracker } from '@/app/lib/billing'; -import { logUsage } from '@/app/actions/billing.actions'; -import { requireAuth } from '@/app/lib/auth'; - -export async function POST(request: NextRequest) { - try { - // Require authentication (handles guest mode internally when auth disabled) - await requireAuth(); - - const { id } = await request.json(); - if (!id || typeof id !== 'string') { - return NextResponse.json({ error: 'id is required' }, { status: 400 }); - } - - const bucket = process.env.RAG_UPLOADS_S3_BUCKET || ''; - if (!bucket) { - return NextResponse.json({ error: 'S3 bucket not configured' }, { status: 500 }); - } - - const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1'; - const s3 = new S3Client({ - region, - credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? { - accessKeyId: process.env.AWS_ACCESS_KEY_ID as string, - secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY as string, - } : undefined, - }); - - // `id` includes extension (e.g., ".png"). Shard using the UUID part. - const lastDot = id.lastIndexOf('.'); - const idWithoutExt = lastDot > 0 ? id.slice(0, lastDot) : id; - const last2 = idWithoutExt.slice(-2).padStart(2, '0'); - const dirA = last2.charAt(0); - const dirB = last2.charAt(1); - const key = `uploaded_images/${dirA}/${dirB}/${id}`; - const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key })); - const contentType = resp.ContentType || 'application/octet-stream'; - const body = resp.Body as any; - const chunks: Uint8Array[] = []; - await new Promise((resolve, reject) => { - body.on('data', (c: Uint8Array) => chunks.push(c)); - body.on('end', () => resolve()); - body.on('error', reject); - }); - const buf = Buffer.concat(chunks); - - let descriptionMarkdown: string | null = null; - const usageTracker = new UsageTracker(); - try { - const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || ''; - if (apiKey) { - const genAI = new GoogleGenerativeAI(apiKey); - const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' }); - const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.'; - const result = await model.generateContent([ - { inlineData: { data: buf.toString('base64'), mimeType: contentType } }, - prompt, - ]); - const response: any = result.response as any; - descriptionMarkdown = response?.text?.() || null; - - // Track usage similar to agents-runtime - try { - const inputTokens = response?.usageMetadata?.promptTokenCount || 0; - const outputTokens = response?.usageMetadata?.candidatesTokenCount || 0; - usageTracker.track({ - type: 'LLM_USAGE', - modelName: 'gemini-2.5-flash', - inputTokens, - outputTokens, - context: 'uploaded_images.describe', - }); - } catch (_) { - // ignore usage tracking errors - } - } - } catch (e) { - console.warn('Gemini description failed', e); - } - - // Log usage to billing if available - try { - const items = usageTracker.flush(); - if (items.length > 0) { - await logUsage({ items }); - } - } catch (_) { - // ignore billing logging errors - } - - return NextResponse.json({ id, description: descriptionMarkdown }); - } catch (e) { - console.error('describe error', e); - return NextResponse.json({ error: 'Failed to describe' }, { status: 500 }); - } -} diff --git a/apps/rowboat/components/common/compose-box-playground.tsx b/apps/rowboat/components/common/compose-box-playground.tsx index 3547a994..09723521 100644 --- a/apps/rowboat/components/common/compose-box-playground.tsx +++ b/apps/rowboat/components/common/compose-box-playground.tsx @@ -112,19 +112,14 @@ export function ComposeBoxPlayground({ } // 4) Ask server to generate description from S3 image - const descRes = await fetch('/api/uploaded-images/describe', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ id: imageId }), - signal: controller.signal, - }); - if (descRes.ok) { - const descData = await descRes.json(); + try { + const { describeUploadedImage } = await import('@/app/actions/uploaded-images.actions'); + const descData = await describeUploadedImage(imageId); const description: string | null = descData?.description ?? null; if (uploadAbortRef.current === controller) { setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description }); } - } else { + } catch { // If description fails, still allow sending if (uploadAbortRef.current === controller) { setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description: null });